1111import time
1212from os import walk
1313from datetime import date
14- from blazingsql import BlazingContext
14+ # from blazingsql import BlazingContext
1515from rapidfuzz import fuzz
1616from rapidfuzz import process
1717import logging
@@ -37,7 +37,6 @@ def downloadFiles():
3737 os .mkdir ("data/geolocate" ) #For localisation of SIRENE entities
3838
3939 #### Contract award
40-
4140 urls = ["https://data.europa.eu/api/hub/store/data/ted-contract-award-notices-2010.zip" ,
4241 "https://data.europa.eu/api/hub/store/data/ted-contract-award-notices-2011.zip" ,
4342 "https://data.europa.eu/api/hub/store/data/ted-contract-award-notices-2012.zip" ,
@@ -93,7 +92,7 @@ def downloadFiles():
9392 compt = - 1
9493 with pd .read_csv ("data/opening/StockEtablissementHistorique_utf8.csv" , chunksize = chunksize ,dtype = "str" ) as reader :
9594 for chunk in reader :
96- if compt < 1 :
95+ if compt > - 1 :
9796 compt = compt + 1
9897 chunk = chunk [["siret" ,"dateDebut" ,"dateFin" ]]
9998 name = "data/opening/HistoPart" + str (compteur )+ ".csv"
@@ -128,7 +127,7 @@ def downloadFiles():
128127 chunksize = 10 ** 6
129128 compt = - 1
130129 for chunk in pd .read_csv (filename , chunksize = chunksize ,dtype = str ):
131- if compt < 1 :
130+ if compt > - 2 :
132131 compt = compt + 1
133132 for l in range (len (chunk )):
134133 if not (str (chunk ["denominationUniteLegale" ][chunksize * compt + l ]) == "nan" ):
@@ -213,11 +212,15 @@ def downloadFiles():
213212 with zipfile .ZipFile ("Sirene.zip" , 'r' ) as zip_ref :
214213 zip_ref .extractall ("data/geolocate" )
215214
216-
215+
217216def databaseCreation (nameDatabase ):
218217 """Creation of the tables of the database"""
219218 database = sqlite3 .connect (nameDatabase )
220219 cursor = database .cursor ()
220+ request = "DROP TABLE IF EXISTS Lots"
221+ sql = cursor .execute (request )
222+ request = "CREATE TABLE Lots(lotId INTEGER,tedCanId INTEGER,correctionsNB INTEGER,cancelled INTEGER,awardDate TEXT,awardEstimatedPrice NUMERIC,awardPrice NUMERIC,cpv TEXT,tenderNumber INTEGER,onBehalf TINYINT,jointProcurement TINYINT,fraAgreement TINYINT,fraEstimated INTEGER,lotsNumber INTEGER,accelerated TINYINT,outOfDirectives TINYINT,contractorSme TINYINT,numberTendersSme INTEGER,subContracted TINYINT,gpa TINYINT,multipleCae TINYINT,typeOfContract TEXT,topType TEXT,renewal TINYINT, contractDuration INTEGER, publicityDuration INTEGER,PRIMARY KEY(lotId))"
223+ sql = cursor .execute (request )
221224 request = "DROP TABLE IF EXISTS AgentsBase"
222225 sql = cursor .execute (request )
223226 request = "CREATE TABLE AgentsBase(agentId INTEGER,name TEXT,siret TEXT,address TEXT,city TEXT,zipcode TEXT,country TEXT, date TEXT,type TEXT,PRIMARY KEY(agentId))"
@@ -234,10 +237,6 @@ def databaseCreation(nameDatabase):
234237 sql = cursor .execute (request )
235238 request = "CREATE TABLE CriteriaTemp (lotId INTEGER,CRIT_PRICE_WEIGHT TEXT,CRIT_WEIGHTS TEXT, CRIT_CRITERIA TEXT)"
236239 sql = cursor .execute (request )
237- request = "DROP TABLE IF EXISTS Lots"
238- sql = cursor .execute (request )
239- request = "CREATE TABLE Lots(lotId INTEGER,tedCanId INTEGER,correctionsNB INTEGER,cancelled INTEGER,awardDate TEXT,awardEstimatedPrice NUMERIC,awardPrice NUMERIC,cpv TEXT,tenderNumber INTEGER,onBehalf TINYINT,jointProcurement TINYINT,fraAgreement TINYINT,fraEstimated INTEGER,lotsNumber INTEGER,accelerated TINYINT,outOfDirectives TINYINT,contractorSme TINYINT,numberTendersSme INTEGER,subContracted TINYINT,gpa TINYINT,multipleCae TINYINT,typeOfContract TEXT,topType TEXT,renewal TINYINT, contractDuration INTEGER, publicityDuration INTEGER,PRIMARY KEY(lotId))"
240- sql = cursor .execute (request )
241240 request = "DROP TABLE IF EXISTS LotClients"
242241 sql = cursor .execute (request )
243242 request = "CREATE TABLE LotClients(lotId INTEGER,agentId INTEGER,FOREIGN KEY(agentId) REFERENCES Agents(agentId) ON UPDATE CASCADE,FOREIGN KEY(lotId) REFERENCES Lots(lotId) ON UPDATE CASCADE)"
@@ -707,6 +706,8 @@ def fineTuningAgents(database):
707706 sql = cursor .execute (request )
708707 request = "UPDATE AgentsSiretiser SET zipcode = NULLIF(zipcode,'None')"
709708 sql = cursor .execute (request )
709+ request = "DELETE FROM AgentsSiretiser WHERE country='INFRUCTUEUX'"
710+ sql = cursor .execute (request )
710711 database .commit ()
711712 return database
712713
@@ -779,7 +780,7 @@ def criteriaProcessing(database):
779780 cursor = database .cursor ()
780781 request = "DROP TABLE IF EXISTS Criteria"
781782 sql = cursor .execute (request )
782- request = "CREATE TABLE Criteria (criterionId INTEGER,lotId INTEGER,name TEXT,weight INTEGER,type TEXT,PRIMARY KEY(criterionId),FOREIGN KEY(lotId) REFERENCES Lots(lotId) ON UPDATE CASCADE )"
783+ request = "CREATE TABLE Criteria (criterionId INTEGER,lotId INTEGER,name TEXT,weight INTEGER,type TEXT,PRIMARY KEY(criterionId))"
783784 sql = cursor .execute (request )
784785 datas = pd .read_sql_query ("SELECT * FROM CriteriaTemp" , database ,dtype = str )
785786 datas ["CRIT_PRICE_WEIGHT" ] = datas ["CRIT_PRICE_WEIGHT" ].str .replace ("-" ,"" ,regex = True )
@@ -1337,6 +1338,17 @@ def finalTableAgent(database):
13371338 val = (dico [int (namesID [i ])],namesAgent [i ])
13381339 cursor .execute (sql ,val )
13391340
1341+
1342+ request = "UPDATE Agents SET name = NULLIF(name,'None')"
1343+ sql = cursor .execute (request )
1344+ request = "UPDATE Agents SET siret = NULLIF(siret,'None')"
1345+ sql = cursor .execute (request )
1346+ request = "UPDATE Agents SET address = NULLIF(address,'None')"
1347+ sql = cursor .execute (request )
1348+ request = "UPDATE Agents SET city = NULLIF(city,'None')"
1349+ sql = cursor .execute (request )
1350+ request = "UPDATE Agents SET zipcode = NULLIF(zipcode,'None')"
1351+ sql = cursor .execute (request )
13401352 database .commit ()
13411353 return database
13421354
@@ -1438,7 +1450,7 @@ def cleaningDatabase(database):
14381450 database .commit ()
14391451 os .remove ("ADeduper.csv" )
14401452 os .remove ("ResDedupe.csv" )
1441- # os.remove("data/geolocate/GeolocalisationEtablissement_Sirene_pour_etudes_statistiques_utf8.csv")
1453+ os .remove ("data/geolocate/GeolocalisationEtablissement_Sirene_pour_etudes_statistiques_utf8.csv" )
14421454 return database
14431455
14441456
@@ -1519,6 +1531,7 @@ def exportDatabase(database):
15191531
15201532 file = open ("FOPPA/FOPPA.sql" ,"w" )
15211533 for line in database .iterdump ():
1534+ print (line )
15221535 file .write (line )
15231536 file .write ("\n " )
15241537
@@ -1584,6 +1597,6 @@ def exportDatabase(database):
15841597 print ("---Export---" )
15851598 exportDatabase (db )
15861599 db .close ()
1587- os .remove ("Foppa.db" )
1600+ # os.remove("Foppa.db")
15881601 del db
15891602
0 commit comments