2828
2929# Constants
3030QUARTER = os .path .basename (PATHS ["data_quarter" ])
31+ FILE_PATHS = [
32+ shared .path_join (
33+ PATHS ["data_phase" ], "wikipedia_highest_language_usage.csv"
34+ ),
35+ shared .path_join (
36+ PATHS ["data_phase" ], "wikipedia_least_language_usage.csv"
37+ ),
38+ shared .path_join (
39+ PATHS ["data_phase" ], "wikipedia_language_representation.csv"
40+ ),
41+ ]
3142
3243
3344def parse_arguments ():
3445 """
3546 Parse command-line options, returns parsed argument namespace.
3647 """
48+ global QUARTER
3749 LOGGER .info ("Parsing command-line options" )
3850 parser = argparse .ArgumentParser (description = __doc__ )
3951 parser .add_argument (
@@ -52,24 +64,27 @@ def parse_arguments():
5264 help = "Enable git actions such as fetch, merge, add, commit, and push"
5365 " (default: False)" ,
5466 )
67+ parser .add_argument (
68+ "--force" ,
69+ action = "store_true" ,
70+ help = "Regenerate data even if processed files already exist" ,
71+ )
72+
5573 args = parser .parse_args ()
5674 if not args .enable_save and args .enable_git :
5775 parser .error ("--enable-git requires --enable-save" )
5876 if args .quarter != QUARTER :
59- global PATHS
77+ global FILE_PATHS , PATHS
78+ FILE_PATHS = shared .paths_list_update (
79+ LOGGER , FILE_PATHS , QUARTER , args .quarter
80+ )
6081 PATHS = shared .paths_update (LOGGER , PATHS , QUARTER , args .quarter )
82+ QUARTER = args .quarter
6183 args .logger = LOGGER
6284 args .paths = PATHS
6385 return args
6486
6587
66- def check_for_data_file (file_path ):
67- if os .path .exists (file_path ):
68- raise shared .QuantifyingException (
69- f"Processed data already exists for { QUARTER } " , 0
70- )
71-
72-
7388def data_to_csv (args , data , file_path ):
7489 if not args .enable_save :
7590 return
@@ -98,7 +113,6 @@ def process_highest_language_usage(args, count_data):
98113 file_path = shared .path_join (
99114 PATHS ["data_phase" ], "wikipedia_highest_language_usage.csv"
100115 )
101- check_for_data_file (file_path )
102116 data_to_csv (args , top_10 , file_path )
103117
104118
@@ -122,7 +136,6 @@ def process_least_language_usage(args, count_data):
122136 file_path = shared .path_join (
123137 PATHS ["data_phase" ], "wikipedia_least_language_usage.csv"
124138 )
125- check_for_data_file (file_path )
126139 data_to_csv (args , bottom_10 , file_path )
127140
128141
@@ -149,14 +162,14 @@ def process_language_representation(args, count_data):
149162 file_path = shared .path_join (
150163 PATHS ["data_phase" ], "wikipedia_language_representation.csv"
151164 )
152- check_for_data_file (file_path )
153165 data_to_csv (args , language_counts , file_path )
154166
155167
156168def main ():
157169 args = parse_arguments ()
158170 shared .paths_log (LOGGER , PATHS )
159171 shared .git_fetch_and_merge (args , PATHS ["repo" ])
172+ shared .check_for_data_files (args , FILE_PATHS , QUARTER )
160173 file_count = shared .path_join (
161174 PATHS ["data_1-fetch" ], "wikipedia_count_by_languages.csv"
162175 )
0 commit comments