Skip to content

Commit 99ecba4

Browse files
committed
Updated regex, unit tests for the men,women,children,boys,girls patterns see HEA-809
1 parent 5a84d08 commit 99ecba4

5 files changed

Lines changed: 207 additions & 48 deletions

File tree

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Generated by Django 5.2.7 on 2025-12-29 10:31
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("baseline", "0022_alter_wealthgroup_options"),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name="livelihoodactivity",
15+
name="household_labor_provider",
16+
field=models.CharField(
17+
blank=True,
18+
choices=[
19+
("men", "Mainly Men"),
20+
("women", "Mainly Women"),
21+
("children", "Mainly Children"),
22+
("boys", "Mainly Boys"),
23+
("girls", "Mainly Girls"),
24+
("adults", "Mainly Adults"),
25+
("all", "All Together"),
26+
],
27+
max_length=10,
28+
verbose_name="Activity done by",
29+
),
30+
),
31+
]

apps/baseline/models.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,6 +1149,9 @@ class HouseholdLaborProvider(models.TextChoices):
11491149
MEN = "men", _("Mainly Men")
11501150
WOMEN = "women", _("Mainly Women")
11511151
CHILDREN = "children", _("Mainly Children")
1152+
BOYS = "boys", _("Mainly Boys")
1153+
GIRLS = "girls", _("Mainly Girls")
1154+
ADULTS = "adults", _("Mainly Adults")
11521155
ALL = "all", _("All Together")
11531156

11541157
household_labor_provider = models.CharField(

pipelines/assets/livelihood_activity.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ def get_livelihood_activity_regexes() -> list:
239239
"product_pattern": r"(?P<product_id>[a-zà-ÿ][a-zà-ÿ1-9',/ \.\>\-\(\)]+?)",
240240
"season_pattern": r"(?P<season>season [12]|saison [12]|[12][a-z] season||[12][a-zà-ÿ] saison|r[eé]colte principale|principale r[eé]colte|gu|deyr+?)", # NOQA: E501
241241
"additional_identifier_pattern": r"\(?(?P<additional_identifier>rainfed|irrigated|pluviale?|irriguée|submersion libre|submersion contrôlée|flottant)\)?",
242-
"age_gender_pattern": r"(?P<additional_identifier>filles/garçons|garçons/filles|garcons/filles|filles/garcons|boys/girls|girls/boys|men|hommes|women|femmes|boys|garcons|garçons|girls|filles)",
242+
"age_gender_pattern": r"(?P<household_labor_provider>boys/girls|girls/boys|garçons/filles|filles/garçons|garcons/filles|filles/garcons|men|hommes|homme|women|femmes|femme|boys|garçons|garçon|garcons|garcon|girls|filles|fille)",
243243
"unit_of_measure_pattern": r"(?P<unit_of_measure_id>[a-z]+)",
244244
"nbr_pattern": r"(?:n[bo]?r?e?|no)\.?",
245245
"vendu_pattern": r"(?:quantité )?vendu(?:e|s|ss|es|ses)?",
@@ -271,13 +271,36 @@ def get_livelihood_activity_regular_expression_attributes(label: str) -> dict:
271271
"unit_of_measure_id": None,
272272
"season": None,
273273
"additional_identifier": None,
274+
"household_labor_provider": None,
274275
"attribute": None,
275276
"notes": None,
276277
}
277278
for pattern, strategy_type, is_start, attribute in get_livelihood_activity_regexes():
278279
match = pattern.fullmatch(label)
279280
if match:
280281
attributes.update(match.groupdict())
282+
283+
# Map French age/gender identifiers to English household_labor_provider enum values
284+
if "household_labor_provider" in attributes and attributes["household_labor_provider"]:
285+
hlp = attributes["household_labor_provider"].lower()
286+
if hlp in ["garçons", "garçon", "garcons", "garcon"]:
287+
attributes["household_labor_provider"] = "boys"
288+
elif hlp in ["filles", "fille"]:
289+
attributes["household_labor_provider"] = "girls"
290+
elif hlp in [
291+
"boys/girls",
292+
"girls/boys",
293+
"garçons/filles",
294+
"filles/garçons",
295+
"garcons/filles",
296+
"filles/garcons",
297+
]:
298+
attributes["household_labor_provider"] = "children"
299+
elif hlp in ["hommes", "homme"]:
300+
attributes["household_labor_provider"] = "men"
301+
elif hlp in ["femmes", "femme"]:
302+
attributes["household_labor_provider"] = "women"
303+
281304
attributes["activity_label"] = label
282305
attributes["strategy_type"] = strategy_type
283306
attributes["is_start"] = is_start

pipelines/assets/livelihood_activity_regexes.json

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,75 @@
5656
null
5757
],
5858
[
59-
"{age_gender_pattern}",
59+
"(?:autre auto-emploi|other self-employment){separator_pattern} {age_gender_pattern}{separator_pattern} {product_pattern}",
60+
"OtherCashIncome",
61+
true,
62+
null
63+
],
64+
[
65+
"(?:autre auto-emploi|other self-employment){separator_pattern} {product_pattern}{separator_pattern} {age_gender_pattern}",
66+
"OtherCashIncome",
67+
true,
68+
null
69+
],
70+
[
71+
"{age_gender_pattern}{separator_pattern} {product_pattern}{separator_pattern} (?:quantity|quantité) ?\\({unit_of_measure_pattern}\\)",
6072
null,
61-
false,
73+
true,
74+
"quantity_produced_or_purchased"
75+
],
76+
[
77+
"{age_gender_pattern}{separator_pattern} {product_pattern}{separator_pattern} {nbr_pattern} (?:de )?(?:pers|personnes|people)(?: ?/ ?| par | per )(?:ménage|mènage|hh)",
78+
null,
79+
true,
80+
"people_per_household"
81+
],
82+
[
83+
"(?P<product_id>[a-zà-ÿ ][a-zà-ÿ1-9',/ \\.\\>\\-\\(\\)]+? *: *[a-zà-ÿ ][a-zà-ÿ1-9',/ \\.\\>\\-\\(\\)]+?) {age_gender_pattern} *\\({unit_of_measure_pattern}(?: collectés?|gathered)?\\)",
84+
null,
85+
true,
86+
"quantity_produced"
87+
],
88+
[
89+
"{product_pattern}{separator_pattern} {age_gender_pattern} ?\\({unit_of_measure_pattern}(?: collectés?|gathered)?\\)",
90+
null,
91+
true,
92+
"quantity_produced"
93+
],
94+
[
95+
"{product_pattern}: {label_pattern} {age_gender_pattern} \\({unit_of_measure_pattern}(?: collectés?|gathered)?\\)",
96+
null,
97+
true,
98+
"quantity_produced"
99+
],
100+
[
101+
"{product_pattern}{separator_pattern} {age_gender_pattern}{separator_pattern} (?:no\\.|nbr?\\.) people per hh",
102+
null,
103+
true,
104+
"people_per_household"
105+
],
106+
[
107+
"(?P<product_id>[a-zà-ÿ ][a-zà-ÿ1-9',/ \\.\\>\\-\\(\\)]+?) *: *{age_gender_pattern}",
108+
null,
109+
true,
110+
null
111+
],
112+
[
113+
"(?P<product_id>[a-zà-ÿ ][a-zà-ÿ1-9',/ \\.\\>\\-\\(\\)]+?) +{age_gender_pattern}",
114+
null,
115+
true,
116+
null
117+
],
118+
[
119+
"{age_gender_pattern} (?P<product_id>[a-zà-ÿ][a-zà-ÿ1-9',/ \\.\\>\\-\\(\\)]+?)",
120+
null,
121+
true,
122+
null
123+
],
124+
[
125+
"{age_gender_pattern}{separator_pattern} {product_pattern}",
126+
null,
127+
true,
62128
null
63129
],
64130
[

pipelines_tests/test_assets/test_livelihood_activity_regexes.json

Lines changed: 81 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -940,49 +940,85 @@
940940
"product_id": "petty trade",
941941
"attribute": "people_per_household"
942942
},
943-
"men": {
944-
"additional_identifier": "men"
945-
},
946-
"women": {
947-
"additional_identifier": "women"
948-
},
949-
"boys": {
950-
"additional_identifier": "boys"
951-
},
952-
"girls": {
953-
"additional_identifier": "girls"
954-
},
955-
"hommes": {
956-
"additional_identifier": "hommes"
957-
},
958-
"femmes": {
959-
"additional_identifier": "femmes"
960-
},
961-
"garcons": {
962-
"additional_identifier": "garcons"
963-
},
964-
"garçons": {
965-
"additional_identifier": "garçons"
966-
},
967-
"filles": {
968-
"additional_identifier": "filles"
969-
},
970-
"filles/garçons": {
971-
"additional_identifier": "filles/garçons"
972-
},
973-
"garçons/filles": {
974-
"additional_identifier": "garçons/filles"
975-
},
976-
"garcons/filles": {
977-
"additional_identifier": "garcons/filles"
978-
},
979-
"filles/garcons": {
980-
"additional_identifier": "filles/garcons"
981-
},
982-
"boys/girls": {
983-
"additional_identifier": "boys/girls"
984-
},
985-
"girls/boys": {
986-
"additional_identifier": "girls/boys"
987-
}
943+
"hommes - preparation terre/labour": {
944+
"is_start": true,
945+
"product_id": "preparation terre/labour",
946+
"household_labor_provider": "men"
947+
},
948+
"women- threshing, harvesting": {
949+
"is_start": true,
950+
"product_id": "threshing, harvesting",
951+
"household_labor_provider": "women"
952+
},
953+
"poisson : peche homme (kg collectés)": {
954+
"is_start": true,
955+
"product_id": "poisson : peche",
956+
"household_labor_provider": "men",
957+
"unit_of_measure_id": "kg",
958+
"attribute": "quantity_produced"
959+
},
960+
"men - petty trade : nb. personnes par ménage": {
961+
"is_start": true,
962+
"product_id": "petty trade",
963+
"household_labor_provider": "men",
964+
"attribute": "people_per_household"
965+
},
966+
"pousse pousseur : garçon": {
967+
"is_start": true,
968+
"product_id": "pousse pousseur",
969+
"household_labor_provider": "boys"
970+
},
971+
"femme vente de légumes": {
972+
"is_start": true,
973+
"product_id": "vente de légumes",
974+
"household_labor_provider": "women"
975+
},
976+
"petty trade - men: no. people per hh": {
977+
"is_start": true,
978+
"product_id": "petty trade",
979+
"household_labor_provider": "men",
980+
"attribute": "people_per_household"
981+
},
982+
"petit commerce femme": {
983+
"is_start": true,
984+
"product_id": "petit commerce",
985+
"household_labor_provider": "women"
986+
},
987+
"autre auto-emploi: women - petty trade": {
988+
"strategy_type": "OtherCashIncome",
989+
"is_start": true,
990+
"product_id": "petty trade",
991+
"household_labor_provider": "women"
992+
},
993+
"other self-employment: petty trade - boys": {
994+
"strategy_type": "OtherCashIncome",
995+
"is_start": true,
996+
"product_id": "petty trade",
997+
"household_labor_provider": "boys"
998+
},
999+
"boys/girls - agriculture work": {
1000+
"is_start": true,
1001+
"product_id": "agriculture work",
1002+
"household_labor_provider": "children"
1003+
},
1004+
"garçons/filles collection d'eau": {
1005+
"is_start": true,
1006+
"product_id": "collection d'eau",
1007+
"household_labor_provider": "children"
1008+
},
1009+
"girls charcoal selling": {
1010+
"is_start": true,
1011+
"product_id": "charcoal selling",
1012+
"household_labor_provider": "girls"
1013+
},
1014+
"boys fishing": {
1015+
"is_start": true,
1016+
"product_id": "fishing",
1017+
"household_labor_provider": "boys"
1018+
},
1019+
"filles vente de légumes": {
1020+
"is_start": true,
1021+
"product_id": "vente de légumes",
1022+
"household_labor_provider": "girls"
1023+
}
9881024
}

0 commit comments

Comments
 (0)