2727
2828# Constants
2929QUARTER = os .path .basename (PATHS ["data_quarter" ])
30+ FILE_PATHS = [
31+ shared .path_join (
32+ PATHS ["data_phase" ], "wikipedia_highest_language_usage.csv"
33+ ),
34+ shared .path_join (
35+ PATHS ["data_phase" ], "wikipedia_least_language_usage.csv"
36+ ),
37+ shared .path_join (
38+ PATHS ["data_phase" ], "wikipedia_language_representation.csv"
39+ ),
40+ ]
3041
3142
3243def parse_arguments ():
3344 """
3445 Parse command-line options, returns parsed argument namespace.
3546 """
47+ global QUARTER
3648 LOGGER .info ("Parsing command-line options" )
3749 parser = argparse .ArgumentParser (description = __doc__ )
3850 parser .add_argument (
@@ -51,24 +63,27 @@ def parse_arguments():
5163 help = "Enable git actions such as fetch, merge, add, commit, and push"
5264 " (default: False)" ,
5365 )
66+ parser .add_argument (
67+ "--force" ,
68+ action = "store_true" ,
69+ help = "Regenerate data even if processed files already exist" ,
70+ )
71+
5472 args = parser .parse_args ()
5573 if not args .enable_save and args .enable_git :
5674 parser .error ("--enable-git requires --enable-save" )
5775 if args .quarter != QUARTER :
58- global PATHS
76+ global FILE_PATHS , PATHS
77+ FILE_PATHS = shared .paths_list_update (
78+ LOGGER , FILE_PATHS , QUARTER , args .quarter
79+ )
5980 PATHS = shared .paths_update (LOGGER , PATHS , QUARTER , args .quarter )
81+ QUARTER = args .quarter
6082 args .logger = LOGGER
6183 args .paths = PATHS
6284 return args
6385
6486
65- def check_for_data_file (file_path ):
66- if os .path .exists (file_path ):
67- raise shared .QuantifyingException (
68- f"Processed data already exists for { QUARTER } " , 0
69- )
70-
71-
7287def process_highest_language_usage (args , count_data ):
7388 """
7489 Processing count data: Most represented languages
@@ -87,7 +102,6 @@ def process_highest_language_usage(args, count_data):
87102 file_path = shared .path_join (
88103 PATHS ["data_phase" ], "wikipedia_highest_language_usage.csv"
89104 )
90- check_for_data_file (file_path )
91105 shared .data_to_csv (args , top_10 , file_path , PATHS )
92106
93107
@@ -111,10 +125,8 @@ def process_least_language_usage(args, count_data):
111125 file_path = shared .path_join (
112126 PATHS ["data_phase" ], "wikipedia_least_language_usage.csv"
113127 )
114- check_for_data_file (file_path )
115128 shared .data_to_csv (args , bottom_10 , file_path , PATHS )
116129
117-
118130def process_language_representation (args , count_data ):
119131 """
120132 Processing count data: Language representation
@@ -138,14 +150,14 @@ def process_language_representation(args, count_data):
138150 file_path = shared .path_join (
139151 PATHS ["data_phase" ], "wikipedia_language_representation.csv"
140152 )
141- check_for_data_file (file_path )
142153 shared .data_to_csv (args , language_counts , file_path , PATHS )
143154
144155
145156def main ():
146157 args = parse_arguments ()
147158 shared .paths_log (LOGGER , PATHS )
148159 shared .git_fetch_and_merge (args , PATHS ["repo" ])
160+ shared .check_for_data_files (args , FILE_PATHS , QUARTER )
149161 file_count = shared .path_join (
150162 PATHS ["data_1-fetch" ], "wikipedia_count_by_languages.csv"
151163 )
0 commit comments