Skip to content

Commit 7319b5b

Browse files
committed
Address PR feedback see HEA-809
1 parent fdf91b3 commit 7319b5b

4 files changed

Lines changed: 112 additions & 20 deletions

File tree

apps/baseline/models.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1154,6 +1154,57 @@ class HouseholdLaborProvider(models.TextChoices):
11541154
ADULTS = "adults", _("Mainly Adults")
11551155
ALL = "all", _("All Together")
11561156

1157+
@classmethod
1158+
def get_aliases(cls):
1159+
"""
1160+
Return a dict mapping alias labels to their canonical values.
1161+
"""
1162+
return {
1163+
# French singular/plural for men
1164+
"hommes": cls.MEN,
1165+
"homme": cls.MEN,
1166+
# French singular/plural for women
1167+
"femmes": cls.WOMEN,
1168+
"femme": cls.WOMEN,
1169+
# French singular/plural for boys
1170+
"garçons": cls.BOYS,
1171+
"garçon": cls.BOYS,
1172+
"garcons": cls.BOYS, # without accent
1173+
"garcon": cls.BOYS, # without accent
1174+
# French singular/plural for girls
1175+
"filles": cls.GIRLS,
1176+
"fille": cls.GIRLS,
1177+
# French for adults
1178+
"adultes": cls.ADULTS,
1179+
# Children combinations (boys/girls in any order)
1180+
"boys/girls": cls.CHILDREN,
1181+
"girls/boys": cls.CHILDREN,
1182+
"garçons/filles": cls.CHILDREN,
1183+
"filles/garçons": cls.CHILDREN,
1184+
"garcons/filles": cls.CHILDREN, # without accent
1185+
"filles/garcons": cls.CHILDREN, # without accent
1186+
# Adults combinations (men/women in any order)
1187+
"men/women": cls.ADULTS,
1188+
"women/men": cls.ADULTS,
1189+
"men & women": cls.ADULTS,
1190+
"women & men": cls.ADULTS,
1191+
"hommes/femmes": cls.ADULTS,
1192+
"femmes/hommes": cls.ADULTS,
1193+
"hommes & femmes": cls.ADULTS,
1194+
"femmes & hommes": cls.ADULTS,
1195+
}
1196+
1197+
@classmethod
1198+
def get_all_labels(cls):
1199+
"""
1200+
Return all possible labels (canonical values + display labels + aliases) for pattern matching.
1201+
"""
1202+
canonical_values = [value for value, _label in cls.choices]
1203+
display_labels = [str(label) for _value, label in cls.choices]
1204+
alias_labels = list(cls.get_aliases().keys())
1205+
all_labels = canonical_values + display_labels + alias_labels
1206+
return sorted(all_labels, key=len, reverse=True)
1207+
11571208
household_labor_provider = models.CharField(
11581209
max_length=10, choices=HouseholdLaborProvider.choices, blank=True, verbose_name=_("Activity done by")
11591210
)

pipelines/assets/livelihood_activity.py

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -234,12 +234,18 @@ def get_livelihood_activity_regexes() -> list:
234234
livelihood_activity_regexes = json.load(f)
235235

236236
# Create regex patterns for metadata attributes to replace the placeholders in the regexes
237+
238+
# Dynamically build age_gender_pattern from HouseholdLaborProvider
239+
age_gender_labels = LivelihoodActivity.HouseholdLaborProvider.get_all_labels()
240+
age_gender_labels_escaped = [re.escape(label) for label in age_gender_labels]
241+
age_gender_pattern = r"(?P<household_labor_provider>" + "|".join(age_gender_labels_escaped) + ")"
242+
237243
placeholder_patterns = {
238244
"label_pattern": r"[a-zà-ÿ][a-zà-ÿ',/ \.\>\-\(\)]+?",
239245
"product_pattern": r"(?P<product_id>[a-zà-ÿ][a-zà-ÿ1-9',/ \.\>\-\(\)]+?)",
240246
"season_pattern": r"(?P<season>season [12]|saison [12]|[12][a-z] season||[12][a-zà-ÿ] saison|r[eé]colte principale|principale r[eé]colte|gu|deyr+?)", # NOQA: E501
241247
"additional_identifier_pattern": r"\(?(?P<additional_identifier>rainfed|irrigated|pluviale?|irriguée|submersion libre|submersion contrôlée|flottant)\)?",
242-
"age_gender_pattern": r"(?P<household_labor_provider>boys/girls|girls/boys|garçons/filles|filles/garçons|garcons/filles|filles/garcons|men|hommes|homme|women|femmes|femme|boys|garçons|garçon|garcons|garcon|girls|filles|fille)",
248+
"age_gender_pattern": age_gender_pattern,
243249
"unit_of_measure_pattern": r"(?P<unit_of_measure_id>[a-z]+)",
244250
"nbr_pattern": r"(?:n[bo]?r?e?|no)\.?",
245251
"vendu_pattern": r"(?:quantité )?vendu(?:e|s|ss|es|ses)?",
@@ -281,26 +287,25 @@ def get_livelihood_activity_regular_expression_attributes(label: str) -> dict:
281287
if match:
282288
attributes.update(match.groupdict())
283289

284-
# Map French age/gender identifiers to English household_labor_provider enum values
290+
# Map household_labor_provider to canonical values using TextChoices
285291
if "household_labor_provider" in attributes and attributes["household_labor_provider"]:
286-
hlp = attributes["household_labor_provider"].lower()
287-
if hlp in ["garçons", "garçon", "garcons", "garcon"]:
288-
attributes["household_labor_provider"] = "boys"
289-
elif hlp in ["filles", "fille"]:
290-
attributes["household_labor_provider"] = "girls"
291-
elif hlp in [
292-
"boys/girls",
293-
"girls/boys",
294-
"garçons/filles",
295-
"filles/garçons",
296-
"garcons/filles",
297-
"filles/garcons",
298-
]:
299-
attributes["household_labor_provider"] = "children"
300-
elif hlp in ["hommes", "homme"]:
301-
attributes["household_labor_provider"] = "men"
302-
elif hlp in ["femmes", "femme"]:
303-
attributes["household_labor_provider"] = "women"
292+
hlp_label = attributes["household_labor_provider"].lower()
293+
# First check if it's already a canonical value
294+
canonical_values = [value for value, _ in LivelihoodActivity.HouseholdLaborProvider.choices]
295+
if hlp_label in canonical_values:
296+
# Already a canonical value, use as-is
297+
attributes["household_labor_provider"] = hlp_label
298+
else:
299+
# Check if it's an alias
300+
aliases = LivelihoodActivity.HouseholdLaborProvider.get_aliases()
301+
if hlp_label in aliases:
302+
attributes["household_labor_provider"] = aliases[hlp_label]
303+
else:
304+
# Check if it's a display label
305+
for choice_value, choice_label in LivelihoodActivity.HouseholdLaborProvider.choices:
306+
if str(choice_label).lower() == hlp_label:
307+
attributes["household_labor_provider"] = choice_value
308+
break
304309

305310
attributes["activity_label"] = label
306311
attributes["strategy_type"] = strategy_type

pipelines/assets/livelihood_activity_regexes.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,12 @@
127127
true,
128128
null
129129
],
130+
[
131+
"{age_gender_pattern}",
132+
null,
133+
false,
134+
null
135+
],
130136
[
131137
"(?:other purchases?|autres? achats?|achats?):?",
132138
"OtherPurchase",

pipelines_tests/test_assets/test_livelihood_activity_regexes.json

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,6 +1041,36 @@
10411041
"product_id": "petty trade",
10421042
"household_labor_provider": "women"
10431043
},
1044+
"adults": {
1045+
"household_labor_provider": "adults"
1046+
},
1047+
"adultes": {
1048+
"household_labor_provider": "adults"
1049+
},
1050+
"men/women": {
1051+
"household_labor_provider": "adults"
1052+
},
1053+
"women/men": {
1054+
"household_labor_provider": "adults"
1055+
},
1056+
"men & women": {
1057+
"household_labor_provider": "adults"
1058+
},
1059+
"women & men": {
1060+
"household_labor_provider": "adults"
1061+
},
1062+
"hommes/femmes": {
1063+
"household_labor_provider": "adults"
1064+
},
1065+
"femmes/hommes": {
1066+
"household_labor_provider": "adults"
1067+
},
1068+
"hommes & femmes": {
1069+
"household_labor_provider": "adults"
1070+
},
1071+
"femmes & hommes": {
1072+
"household_labor_provider": "adults"
1073+
},
10441074
"other self-employment: petty trade - boys": {
10451075
"strategy_type": "OtherCashIncome",
10461076
"is_start": true,

0 commit comments

Comments
 (0)