@@ -347,6 +347,7 @@ def process(
347347
348348 # Counter for actually processed files
349349 processed_files_count = 0
350+ errors_files_count = 0
350351 input_files = []
351352
352353 for input_file in all_input_files :
@@ -363,7 +364,7 @@ def process(
363364 input_files .append (input_file )
364365
365366 if len (input_files ) == batch_size_pdf :
366- batch_processed = self .process_batch (
367+ batch_processed , batch_errors = self .process_batch (
367368 service ,
368369 input_files ,
369370 input_path ,
@@ -381,11 +382,12 @@ def process(
381382 flavor
382383 )
383384 processed_files_count += batch_processed
385+ errors_files_count += batch_errors
384386 input_files = []
385387
386388 # last batch
387389 if len (input_files ) > 0 :
388- batch_processed = self .process_batch (
390+ batch_processed , batch_errors = self .process_batch (
389391 service ,
390392 input_files ,
391393 input_path ,
@@ -402,9 +404,11 @@ def process(
402404 verbose ,
403405 )
404406 processed_files_count += batch_processed
407+ errors_files_count += batch_errors
405408
406409 # Log final statistics
407410 self .logger .info (f"Processing completed: { processed_files_count } out of { total_files } files processed" )
411+ self .logger .info (f"Errors: { errors_files_count } out of { total_files } files processed" )
408412
409413 def process_batch (
410414 self ,
@@ -428,6 +432,7 @@ def process_batch(
428432 self .logger .info (f"{ len (input_files )} files to process in current batch" )
429433
430434 processed_count = 0
435+ error_count = 0
431436
432437 # we use ThreadPoolExecutor and not ProcessPoolExecutor because it is an I/O intensive process
433438 with concurrent .futures .ThreadPoolExecutor (max_workers = n ) as executor :
@@ -468,10 +473,10 @@ def process_batch(
468473 for r in concurrent .futures .as_completed (results ):
469474 input_file , status , text = r .result ()
470475 filename = self ._output_file_name (input_file , input_path , output )
471- processed_count += 1
472476
473477 if status != 200 or text is None :
474478 self .logger .error (f"Processing of { input_file } failed with error { status } : { text } " )
479+ error_count += 1
475480 # writing error file with suffixed error code
476481 try :
477482 pathlib .Path (os .path .dirname (filename )).mkdir (parents = True , exist_ok = True )
@@ -485,6 +490,7 @@ def process_batch(
485490 except OSError as e :
486491 self .logger .error (f"Failed to write error file { filename } : { str (e )} " )
487492 else :
493+ processed_count += 1
488494 # writing TEI file
489495 try :
490496 pathlib .Path (os .path .dirname (filename )).mkdir (parents = True , exist_ok = True )
@@ -494,7 +500,7 @@ def process_batch(
494500 except OSError as e :
495501 self .logger .error (f"Failed to write TEI XML file { filename } : { str (e )} " )
496502
497- return processed_count
503+ return processed_count , error_count
498504
499505 def process_pdf (
500506 self ,
0 commit comments