@@ -152,7 +152,6 @@ def process_journals(session, args):
152152
153153 license_counts = Counter ()
154154 year_counts = defaultdict (Counter )
155- article_counts = defaultdict (int ) # Track total articles per license type
156155 processed_journals = set () # Track unique journals to avoid double counting
157156
158157 total_processed = 0
@@ -216,8 +215,7 @@ def process_journals(session, args):
216215 if not cc_license_types :
217216 continue
218217
219- # Extract article count and year once per journal
220- article_count = bibjson .get ("article_count" , 0 )
218+ # Extract year from oa_start (Open Access start year)
221219 oa_start = bibjson .get ("oa_start" )
222220
223221 # Apply date-back filter if specified
@@ -234,13 +232,9 @@ def process_journals(session, args):
234232 else :
235233 year_counts [license_type ]["Unknown" ] += 1
236234
237- # Add article count only once per unique journal ( avoid double counting)
235+ # Track unique journals to avoid double counting in statistics
238236 if journal_id not in processed_journals :
239237 processed_journals .add (journal_id )
240- # Add full article count to each license type this journal supports
241- if article_count :
242- for license_type in cc_license_types :
243- article_counts [license_type ] += article_count
244238
245239 total_processed += 1
246240
@@ -265,17 +259,15 @@ def process_journals(session, args):
265259 return (
266260 license_counts ,
267261 year_counts ,
268- article_counts ,
269262 len (processed_journals ), # Return unique journal count
270263 )
271264
272265
273266def save_count_data (
274267 license_counts ,
275268 year_counts ,
276- article_counts ,
277269):
278- """Save essential journal data and article context to CSV files."""
270+ """Save essential journal data to CSV files."""
279271
280272 # Save license counts
281273 with open (
@@ -313,7 +305,6 @@ def query_doaj(args):
313305 (
314306 license_counts ,
315307 year_counts ,
316- article_counts ,
317308 journals_processed ,
318309 ) = process_journals (session , args )
319310
@@ -322,21 +313,18 @@ def query_doaj(args):
322313 save_count_data (
323314 license_counts ,
324315 year_counts ,
325- article_counts ,
326316 )
327317
328318 # Save provenance
329- total_articles = sum (article_counts .values ())
330319 provenance_data = {
331- "total_articles_in_cc_journals" : total_articles ,
332320 "total_journals_fetched" : journals_processed ,
333321 "total_processed" : journals_processed ,
334322 "limit" : args .limit ,
335323 "date_back_filter" : args .date_back ,
336324 "quarter" : QUARTER ,
337325 "script" : os .path .basename (__file__ ),
338326 "api_version" : "v4" ,
339- "note" : "Article counts provide context for CC journal scope - individual article licenses unknown " ,
327+ "note" : "Journal-level CC license data only - article counts not available via DOAJ API " ,
340328 }
341329
342330 try :
@@ -360,10 +348,6 @@ def query_doaj(args):
360348 # Calculate total license availability instances
361349 total_license_instances = sum (license_counts .values ())
362350 LOGGER .info (f"Total CC license type instances: { total_license_instances } " )
363-
364- # Calculate total articles for context
365- total_articles = sum (article_counts .values ())
366- LOGGER .info (f"Total articles in CC-licensed journals: { total_articles } " )
367351 LOGGER .info ("Note: Journals supporting multiple CC license types are counted once per license type" )
368352
369353
0 commit comments