Skip to content

Commit 77827fa

Browse files
authored
Change Names in database
1 parent a7ddda5 commit 77827fa

1 file changed

Lines changed: 25 additions & 12 deletions

File tree

foppaInit.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import time
1212
from os import walk
1313
from datetime import date
14-
from blazingsql import BlazingContext
14+
#from blazingsql import BlazingContext
1515
from rapidfuzz import fuzz
1616
from rapidfuzz import process
1717
import logging
@@ -37,7 +37,6 @@ def downloadFiles():
3737
os.mkdir("data/geolocate") #For localisation of SIRENE entities
3838

3939
#### Contract award
40-
4140
urls = ["https://data.europa.eu/api/hub/store/data/ted-contract-award-notices-2010.zip",
4241
"https://data.europa.eu/api/hub/store/data/ted-contract-award-notices-2011.zip",
4342
"https://data.europa.eu/api/hub/store/data/ted-contract-award-notices-2012.zip",
@@ -93,7 +92,7 @@ def downloadFiles():
9392
compt=-1
9493
with pd.read_csv("data/opening/StockEtablissementHistorique_utf8.csv", chunksize=chunksize,dtype="str") as reader:
9594
for chunk in reader:
96-
if compt<1:
95+
if compt>-1:
9796
compt=compt+1
9897
chunk = chunk[["siret","dateDebut","dateFin"]]
9998
name = "data/opening/HistoPart"+str(compteur)+".csv"
@@ -128,7 +127,7 @@ def downloadFiles():
128127
chunksize = 10 ** 6
129128
compt=-1
130129
for chunk in pd.read_csv(filename, chunksize=chunksize,dtype = str):
131-
if compt<1:
130+
if compt>-2:
132131
compt=compt+1
133132
for l in range(len(chunk)):
134133
if not(str(chunk["denominationUniteLegale"][chunksize*compt+l]) == "nan"):
@@ -213,11 +212,15 @@ def downloadFiles():
213212
with zipfile.ZipFile("Sirene.zip", 'r') as zip_ref:
214213
zip_ref.extractall("data/geolocate")
215214

216-
215+
217216
def databaseCreation(nameDatabase):
218217
"""Creation of the tables of the database"""
219218
database = sqlite3.connect(nameDatabase)
220219
cursor = database.cursor()
220+
request = "DROP TABLE IF EXISTS Lots"
221+
sql = cursor.execute(request)
222+
request = "CREATE TABLE Lots(lotId INTEGER,tedCanId INTEGER,correctionsNB INTEGER,cancelled INTEGER,awardDate TEXT,awardEstimatedPrice NUMERIC,awardPrice NUMERIC,cpv TEXT,tenderNumber INTEGER,onBehalf TINYINT,jointProcurement TINYINT,fraAgreement TINYINT,fraEstimated INTEGER,lotsNumber INTEGER,accelerated TINYINT,outOfDirectives TINYINT,contractorSme TINYINT,numberTendersSme INTEGER,subContracted TINYINT,gpa TINYINT,multipleCae TINYINT,typeOfContract TEXT,topType TEXT,renewal TINYINT, contractDuration INTEGER, publicityDuration INTEGER,PRIMARY KEY(lotId))"
223+
sql = cursor.execute(request)
221224
request = "DROP TABLE IF EXISTS AgentsBase"
222225
sql = cursor.execute(request)
223226
request = "CREATE TABLE AgentsBase(agentId INTEGER,name TEXT,siret TEXT,address TEXT,city TEXT,zipcode TEXT,country TEXT, date TEXT,type TEXT,PRIMARY KEY(agentId))"
@@ -234,10 +237,6 @@ def databaseCreation(nameDatabase):
234237
sql = cursor.execute(request)
235238
request = "CREATE TABLE CriteriaTemp (lotId INTEGER,CRIT_PRICE_WEIGHT TEXT,CRIT_WEIGHTS TEXT, CRIT_CRITERIA TEXT)"
236239
sql = cursor.execute(request)
237-
request = "DROP TABLE IF EXISTS Lots"
238-
sql = cursor.execute(request)
239-
request = "CREATE TABLE Lots(lotId INTEGER,tedCanId INTEGER,correctionsNB INTEGER,cancelled INTEGER,awardDate TEXT,awardEstimatedPrice NUMERIC,awardPrice NUMERIC,cpv TEXT,tenderNumber INTEGER,onBehalf TINYINT,jointProcurement TINYINT,fraAgreement TINYINT,fraEstimated INTEGER,lotsNumber INTEGER,accelerated TINYINT,outOfDirectives TINYINT,contractorSme TINYINT,numberTendersSme INTEGER,subContracted TINYINT,gpa TINYINT,multipleCae TINYINT,typeOfContract TEXT,topType TEXT,renewal TINYINT, contractDuration INTEGER, publicityDuration INTEGER,PRIMARY KEY(lotId))"
240-
sql = cursor.execute(request)
241240
request = "DROP TABLE IF EXISTS LotClients"
242241
sql = cursor.execute(request)
243242
request = "CREATE TABLE LotClients(lotId INTEGER,agentId INTEGER,FOREIGN KEY(agentId) REFERENCES Agents(agentId) ON UPDATE CASCADE,FOREIGN KEY(lotId) REFERENCES Lots(lotId) ON UPDATE CASCADE)"
@@ -707,6 +706,8 @@ def fineTuningAgents(database):
707706
sql = cursor.execute(request)
708707
request = "UPDATE AgentsSiretiser SET zipcode = NULLIF(zipcode,'None')"
709708
sql = cursor.execute(request)
709+
request = "DELETE FROM AgentsSiretiser WHERE country='INFRUCTUEUX'"
710+
sql = cursor.execute(request)
710711
database.commit()
711712
return database
712713

@@ -779,7 +780,7 @@ def criteriaProcessing(database):
779780
cursor = database.cursor()
780781
request = "DROP TABLE IF EXISTS Criteria"
781782
sql = cursor.execute(request)
782-
request = "CREATE TABLE Criteria (criterionId INTEGER,lotId INTEGER,name TEXT,weight INTEGER,type TEXT,PRIMARY KEY(criterionId),FOREIGN KEY(lotId) REFERENCES Lots(lotId) ON UPDATE CASCADE)"
783+
request = "CREATE TABLE Criteria (criterionId INTEGER,lotId INTEGER,name TEXT,weight INTEGER,type TEXT,PRIMARY KEY(criterionId))"
783784
sql = cursor.execute(request)
784785
datas = pd.read_sql_query("SELECT * FROM CriteriaTemp", database,dtype=str)
785786
datas["CRIT_PRICE_WEIGHT"] =datas["CRIT_PRICE_WEIGHT"].str.replace("-","",regex=True)
@@ -1337,6 +1338,17 @@ def finalTableAgent(database):
13371338
val = (dico[int(namesID[i])],namesAgent[i])
13381339
cursor.execute(sql,val)
13391340

1341+
1342+
request = "UPDATE Agents SET name = NULLIF(name,'None')"
1343+
sql = cursor.execute(request)
1344+
request = "UPDATE Agents SET siret = NULLIF(siret,'None')"
1345+
sql = cursor.execute(request)
1346+
request = "UPDATE Agents SET address = NULLIF(address,'None')"
1347+
sql = cursor.execute(request)
1348+
request = "UPDATE Agents SET city = NULLIF(city,'None')"
1349+
sql = cursor.execute(request)
1350+
request = "UPDATE Agents SET zipcode = NULLIF(zipcode,'None')"
1351+
sql = cursor.execute(request)
13401352
database.commit()
13411353
return database
13421354

@@ -1438,7 +1450,7 @@ def cleaningDatabase(database):
14381450
database.commit()
14391451
os.remove("ADeduper.csv")
14401452
os.remove("ResDedupe.csv")
1441-
#os.remove("data/geolocate/GeolocalisationEtablissement_Sirene_pour_etudes_statistiques_utf8.csv")
1453+
os.remove("data/geolocate/GeolocalisationEtablissement_Sirene_pour_etudes_statistiques_utf8.csv")
14421454
return database
14431455

14441456

@@ -1519,6 +1531,7 @@ def exportDatabase(database):
15191531

15201532
file = open("FOPPA/FOPPA.sql","w")
15211533
for line in database.iterdump():
1534+
print(line)
15221535
file.write(line)
15231536
file.write("\n")
15241537

@@ -1584,6 +1597,6 @@ def exportDatabase(database):
15841597
print("---Export---")
15851598
exportDatabase(db)
15861599
db.close()
1587-
os.remove("Foppa.db")
1600+
#os.remove("Foppa.db")
15881601
del db
15891602

0 commit comments

Comments
 (0)