|
47 | 47 | DEFAULT_FETCH_LIMIT = 1000 |
48 | 48 | RATE_LIMIT_DELAY = 0.5 |
49 | 49 |
|
| 50 | +# CC License types |
| 51 | +CC_LICENSE_TYPES = [ |
| 52 | + "CC BY", |
| 53 | + "CC BY-NC", |
| 54 | + "CC BY-SA", |
| 55 | + "CC BY-ND", |
| 56 | + "CC BY-NC-SA", |
| 57 | + "CC BY-NC-ND", |
| 58 | + "CC0", |
| 59 | + "UNKNOWN CC legal tool", |
| 60 | +] |
| 61 | + |
| 62 | +# File Paths |
| 63 | +FILE_DOAJ_COUNT = shared.path_join(PATHS["data_1-fetch"], "doaj_1_count.csv") |
| 64 | +FILE_DOAJ_LANGUAGE = shared.path_join( |
| 65 | + PATHS["data_1-fetch"], "doaj_3_count_by_language.csv" |
| 66 | +) |
| 67 | +FILE_DOAJ_PUBLISHER = shared.path_join( |
| 68 | + PATHS["data_1-fetch"], "doaj_5_count_by_publisher.csv" |
| 69 | +) |
| 70 | +FILE_DOAJ_SUBJECT_REPORT = shared.path_join( |
| 71 | + PATHS["data_1-fetch"], "doaj_2_count_by_subject_report.csv" |
| 72 | +) |
| 73 | +FILE_PROVENANCE = shared.path_join( |
| 74 | + PATHS["data_1-fetch"], "doaj_provenance.yaml" |
| 75 | +) |
| 76 | +FILE_DOAJ_YEAR = shared.path_join( |
| 77 | + PATHS["data_1-fetch"], "doaj_4_count_by_year.csv" |
| 78 | +) |
| 79 | + |
50 | 80 | # CSV Headers |
51 | 81 | HEADER_COUNT = ["TOOL_IDENTIFIER", "COUNT"] |
52 | 82 | HEADER_LANGUAGE = ["TOOL_IDENTIFIER", "LANGUAGE_CODE", "LANGUAGE", "COUNT"] |
|
65 | 95 | ] |
66 | 96 | HEADER_YEAR = ["TOOL_IDENTIFIER", "YEAR", "COUNT"] |
67 | 97 |
|
68 | | -# CC License types |
69 | | -CC_LICENSE_TYPES = [ |
70 | | - "CC BY", |
71 | | - "CC BY-NC", |
72 | | - "CC BY-SA", |
73 | | - "CC BY-ND", |
74 | | - "CC BY-NC-SA", |
75 | | - "CC BY-NC-ND", |
76 | | - "CC0", |
77 | | - "UNKNOWN CC legal tool", |
78 | | -] |
79 | | - |
80 | 98 | # Language code to readable name mapping |
81 | 99 | LANGUAGE_NAMES = { |
| 100 | + "AF": "Afrikaans", |
| 101 | + "AR": "Arabic", |
| 102 | + "BE": "Belarusian", |
| 103 | + "BG": "Bulgarian", |
| 104 | + "BN": "Bengali", |
| 105 | + "CA": "Catalan", |
| 106 | + "CS": "Czech", |
| 107 | + "DA": "Danish", |
| 108 | + "DE": "German", |
| 109 | + "EL": "Greek", |
82 | 110 | "EN": "English", |
83 | 111 | "ES": "Spanish", |
84 | | - "PT": "Portuguese", |
| 112 | + "ET": "Estonian", |
| 113 | + "FA": "Persian", |
| 114 | + "FI": "Finnish", |
85 | 115 | "FR": "French", |
86 | | - "DE": "German", |
| 116 | + "HE": "Hebrew", |
| 117 | + "HI": "Hindi", |
| 118 | + "HR": "Croatian", |
| 119 | + "HU": "Hungarian", |
| 120 | + "ID": "Indonesian", |
| 121 | + "IS": "Icelandic", |
87 | 122 | "IT": "Italian", |
88 | | - "RU": "Russian", |
89 | | - "ZH": "Chinese", |
90 | 123 | "JA": "Japanese", |
91 | | - "AR": "Arabic", |
92 | | - "TR": "Turkish", |
| 124 | + "KO": "Korean", |
| 125 | + "LT": "Lithuanian", |
| 126 | + "LV": "Latvian", |
| 127 | + "MK": "Macedonian", |
| 128 | + "MS": "Malay", |
93 | 129 | "NL": "Dutch", |
94 | | - "SV": "Swedish", |
95 | 130 | "NO": "Norwegian", |
96 | | - "DA": "Danish", |
97 | | - "FI": "Finnish", |
98 | 131 | "PL": "Polish", |
99 | | - "CS": "Czech", |
100 | | - "HU": "Hungarian", |
| 132 | + "PT": "Portuguese", |
101 | 133 | "RO": "Romanian", |
102 | | - "BG": "Bulgarian", |
103 | | - "HR": "Croatian", |
| 134 | + "RU": "Russian", |
104 | 135 | "SK": "Slovak", |
105 | 136 | "SL": "Slovenian", |
106 | | - "ET": "Estonian", |
107 | | - "LV": "Latvian", |
108 | | - "LT": "Lithuanian", |
109 | | - "EL": "Greek", |
110 | | - "CA": "Catalan", |
111 | | - "IS": "Icelandic", |
112 | | - "MK": "Macedonian", |
113 | 137 | "SR": "Serbian", |
114 | | - "UK": "Ukrainian", |
115 | | - "BE": "Belarusian", |
116 | | - "KO": "Korean", |
| 138 | + "SV": "Swedish", |
| 139 | + "SW": "Swahili", |
117 | 140 | "TH": "Thai", |
118 | | - "VI": "Vietnamese", |
119 | | - "ID": "Indonesian", |
120 | | - "MS": "Malay", |
121 | | - "HI": "Hindi", |
122 | | - "BN": "Bengali", |
| 141 | + "TR": "Turkish", |
| 142 | + "UK": "Ukrainian", |
123 | 143 | "UR": "Urdu", |
124 | | - "FA": "Persian", |
125 | | - "HE": "Hebrew", |
126 | | - "SW": "Swahili", |
127 | | - "AF": "Afrikaans", |
| 144 | + "VI": "Vietnamese", |
| 145 | + "ZH": "Chinese", |
128 | 146 | } |
129 | 147 |
|
130 | | -# File Paths |
131 | | -FILE_DOAJ_COUNT = shared.path_join(PATHS["data_1-fetch"], "doaj_1_count.csv") |
132 | | -FILE_DOAJ_SUBJECT_REPORT = shared.path_join( |
133 | | - PATHS["data_1-fetch"], "doaj_2_count_by_subject_report.csv" |
134 | | -) |
135 | | -FILE_DOAJ_LANGUAGE = shared.path_join( |
136 | | - PATHS["data_1-fetch"], "doaj_3_count_by_language.csv" |
137 | | -) |
138 | | -FILE_DOAJ_YEAR = shared.path_join( |
139 | | - PATHS["data_1-fetch"], "doaj_4_count_by_year.csv" |
140 | | -) |
141 | | -FILE_DOAJ_PUBLISHER = shared.path_join( |
142 | | - PATHS["data_1-fetch"], "doaj_5_count_by_publisher.csv" |
143 | | -) |
144 | | -FILE_PROVENANCE = shared.path_join( |
145 | | - PATHS["data_1-fetch"], "doaj_provenance.yaml" |
146 | | -) |
147 | | - |
148 | 148 |
|
149 | 149 | # Load ISO 3166-1 alpha-2 country codes from YAML file |
150 | 150 | def load_country_names(): |
|
0 commit comments