Skip to content

Commit 34d4f43

Browse files
committed
refactor code
1 parent acb94d4 commit 34d4f43

1 file changed

Lines changed: 9 additions & 10 deletions

File tree

scripts/1-fetch/openverse_fetch.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,9 @@ def get_all_sources_and_licenses(session, media_type):
131131
f"Skipping source {source}: "
132132
f"not available in /{media_type}/ endpoint"
133133
)
134-
LOGGER.info(f"Found {len(valid_sources)} sources for {media_type}")
134+
LOGGER.info(
135+
f"Found {len(valid_sources)} valid sources for {media_type}"
136+
)
135137
return valid_sources, set(OPENVERSE_LEGAL_TOOLS)
136138
except (requests.HTTPError, requests.RequestException) as e:
137139
raise shared.QuantifyingException(
@@ -148,20 +150,20 @@ def query_openverse(session):
148150
for media_type in MEDIA_TYPES:
149151
LOGGER.info(f"FETCHING {media_type.upper()} DATA...")
150152
sources, licenses = get_all_sources_and_licenses(session, media_type)
151-
for source in sources:
153+
for source_name in sources:
152154
for license in licenses:
153155
# encode the license to escape '+' e.g sampling+
154156
encoded_license = urllib.parse.quote(license, safe="")
155157
url = (
156158
f"{OPENVERSE_BASE_URL}/{media_type}/?"
157-
f"source={source}&"
159+
f"source={source_name}&"
158160
f"license={encoded_license}"
159161
"&format=json&page=1"
160162
)
161163
LOGGER.info(
162164
"Fetching Openverse data: "
163165
f"media_type={media_type} | "
164-
f"source={source} | "
166+
f"_nasource={source_name} | "
165167
f"license={license}"
166168
)
167169
try:
@@ -177,22 +179,19 @@ def query_openverse(session):
177179
count = data.get("result_count", 0)
178180
# Skip (source x license) with result_count = 0
179181
if count > 0:
180-
key = (source, media_type, license)
182+
key = (source_name, media_type, license)
181183
tally[key] = count
182184
else:
183185
LOGGER.warning(
184-
f"Skipping ({source}, {license}): count is 0"
186+
f"Skipping ({source_name}, {license}): count is 0"
185187
)
186188
except (requests.HTTPError, requests.RequestException) as e:
187189
raise shared.QuantifyingException(
188190
f"Openverse fetch failed: {e}", exit_code=1
189191
)
190192
LOGGER.info("Aggregating the data")
191193
aggregate = []
192-
for field, media_count in tally.items():
193-
source = field[0]
194-
media_type = field[1]
195-
license_code = field[2]
194+
for (source, media_type, license_code), media_count in tally.items():
196195
# Append prefix "cc" except for 'pdm' and 'cc0'
197196
if license_code not in ["pdm", "cc0"]:
198197
tool_identifier = f"cc {license_code}"

0 commit comments

Comments
 (0)