@@ -131,7 +131,9 @@ def get_all_sources_and_licenses(session, media_type):
131131 f"Skipping source { source } : "
132132 f"not available in /{ media_type } / endpoint"
133133 )
134- LOGGER .info (f"Found { len (valid_sources )} sources for { media_type } " )
134+ LOGGER .info (
135+ f"Found { len (valid_sources )} valid sources for { media_type } "
136+ )
135137 return valid_sources , set (OPENVERSE_LEGAL_TOOLS )
136138 except (requests .HTTPError , requests .RequestException ) as e :
137139 raise shared .QuantifyingException (
@@ -148,20 +150,20 @@ def query_openverse(session):
148150 for media_type in MEDIA_TYPES :
149151 LOGGER .info (f"FETCHING { media_type .upper ()} DATA..." )
150152 sources , licenses = get_all_sources_and_licenses (session , media_type )
151- for source in sources :
153+ for source_name in sources :
152154 for license in licenses :
153155 # encode the license to escape '+' e.g sampling+
154156 encoded_license = urllib .parse .quote (license , safe = "" )
155157 url = (
156158 f"{ OPENVERSE_BASE_URL } /{ media_type } /?"
157- f"source={ source } &"
159+ f"source={ source_name } &"
158160 f"license={ encoded_license } "
159161 "&format=json&page=1"
160162 )
161163 LOGGER .info (
162164 "Fetching Openverse data: "
163165 f"media_type={ media_type } | "
164- f"source= { source } | "
166+ f"_nasource= { source_name } | "
165167 f"license={ license } "
166168 )
167169 try :
@@ -177,22 +179,19 @@ def query_openverse(session):
177179 count = data .get ("result_count" , 0 )
178180 # Skip (source x license) with result_count = 0
179181 if count > 0 :
180- key = (source , media_type , license )
182+ key = (source_name , media_type , license )
181183 tally [key ] = count
182184 else :
183185 LOGGER .warning (
184- f"Skipping ({ source } , { license } ): count is 0"
186+ f"Skipping ({ source_name } , { license } ): count is 0"
185187 )
186188 except (requests .HTTPError , requests .RequestException ) as e :
187189 raise shared .QuantifyingException (
188190 f"Openverse fetch failed: { e } " , exit_code = 1
189191 )
190192 LOGGER .info ("Aggregating the data" )
191193 aggregate = []
192- for field , media_count in tally .items ():
193- source = field [0 ]
194- media_type = field [1 ]
195- license_code = field [2 ]
194+ for (source , media_type , license_code ), media_count in tally .items ():
196195 # Append prefix "cc" except for 'pdm' and 'cc0'
197196 if license_code not in ["pdm" , "cc0" ]:
198197 tool_identifier = f"cc { license_code } "
0 commit comments