@@ -234,12 +234,18 @@ def get_livelihood_activity_regexes() -> list:
234234 livelihood_activity_regexes = json .load (f )
235235
236236 # Create regex patterns for metadata attributes to replace the placeholders in the regexes
237+
238+ # Dynamically build age_gender_pattern from HouseholdLaborProvider
239+ age_gender_labels = LivelihoodActivity .HouseholdLaborProvider .get_all_labels ()
240+ age_gender_labels_escaped = [re .escape (label ) for label in age_gender_labels ]
241+ age_gender_pattern = r"(?P<household_labor_provider>" + "|" .join (age_gender_labels_escaped ) + ")"
242+
237243 placeholder_patterns = {
238244 "label_pattern" : r"[a-zà-ÿ][a-zà-ÿ',/ \.\>\-\(\)]+?" ,
239245 "product_pattern" : r"(?P<product_id>[a-zà-ÿ][a-zà-ÿ1-9',/ \.\>\-\(\)]+?)" ,
240246 "season_pattern" : r"(?P<season>season [12]|saison [12]|[12][a-z] season||[12][a-zà-ÿ] saison|r[eé]colte principale|principale r[eé]colte|gu|deyr+?)" , # NOQA: E501
241247 "additional_identifier_pattern" : r"\(?(?P<additional_identifier>rainfed|irrigated|pluviale?|irriguée|submersion libre|submersion contrôlée|flottant)\)?" ,
242- "age_gender_pattern" : r"(?P<household_labor_provider>boys/girls|girls/boys|garçons/filles|filles/garçons|garcons/filles|filles/garcons|men|hommes|homme|women|femmes|femme|boys|garçons|garçon|garcons|garcon|girls|filles|fille)" ,
248+ "age_gender_pattern" : age_gender_pattern ,
243249 "unit_of_measure_pattern" : r"(?P<unit_of_measure_id>[a-z]+)" ,
244250 "nbr_pattern" : r"(?:n[bo]?r?e?|no)\.?" ,
245251 "vendu_pattern" : r"(?:quantité )?vendu(?:e|s|ss|es|ses)?" ,
@@ -281,26 +287,25 @@ def get_livelihood_activity_regular_expression_attributes(label: str) -> dict:
281287 if match :
282288 attributes .update (match .groupdict ())
283289
284- # Map French age/gender identifiers to English household_labor_provider enum values
290+ # Map household_labor_provider to canonical values using TextChoices
285291 if "household_labor_provider" in attributes and attributes ["household_labor_provider" ]:
286- hlp = attributes ["household_labor_provider" ].lower ()
287- if hlp in ["garçons" , "garçon" , "garcons" , "garcon" ]:
288- attributes ["household_labor_provider" ] = "boys"
289- elif hlp in ["filles" , "fille" ]:
290- attributes ["household_labor_provider" ] = "girls"
291- elif hlp in [
292- "boys/girls" ,
293- "girls/boys" ,
294- "garçons/filles" ,
295- "filles/garçons" ,
296- "garcons/filles" ,
297- "filles/garcons" ,
298- ]:
299- attributes ["household_labor_provider" ] = "children"
300- elif hlp in ["hommes" , "homme" ]:
301- attributes ["household_labor_provider" ] = "men"
302- elif hlp in ["femmes" , "femme" ]:
303- attributes ["household_labor_provider" ] = "women"
292+ hlp_label = attributes ["household_labor_provider" ].lower ()
293+ # First check if it's already a canonical value
294+ canonical_values = [value for value , _ in LivelihoodActivity .HouseholdLaborProvider .choices ]
295+ if hlp_label in canonical_values :
296+ # Already a canonical value, use as-is
297+ attributes ["household_labor_provider" ] = hlp_label
298+ else :
299+ # Check if it's an alias
300+ aliases = LivelihoodActivity .HouseholdLaborProvider .get_aliases ()
301+ if hlp_label in aliases :
302+ attributes ["household_labor_provider" ] = aliases [hlp_label ]
303+ else :
304+ # Check if it's a display label
305+ for choice_value , choice_label in LivelihoodActivity .HouseholdLaborProvider .choices :
306+ if str (choice_label ).lower () == hlp_label :
307+ attributes ["household_labor_provider" ] = choice_value
308+ break
304309
305310 attributes ["activity_label" ] = label
306311 attributes ["strategy_type" ] = strategy_type
0 commit comments