@@ -244,7 +244,9 @@ def get_livelihood_activity_regexes() -> list:
244244 placeholder_patterns = {
245245 "label_pattern" : r"[a-zà-ÿ][a-zà-ÿ1-9',/ \.\>\-\(\)]+?" ,
246246 "product_pattern" : r"(?P<product_id>[a-zà-ÿ][a-zà-ÿ1-9',/ \.\>\-\(\)]+?)" ,
247- "season_pattern" : r"(?P<season>season [12]|saison [12]|[12][a-z] season||[12][a-zà-ÿ] saison|r[eé]colte principale|principale r[eé]colte|gu|deyr+?)" , # NOQA: E501
247+ "payment_product_pattern" : r"(?P<payment_product_id>[a-zà-ÿ][a-zà-ÿ1-9',/ \.\>\-\(\)]+?)" ,
248+ "labor_pattern" : r"(?P<product_id>(?:labou?r|travail|main-d'œuvre|pre-harvest labou?r|labour:? pre-harvest|harvest labou?r|labour:? harvest|post-harvest labou?r|labour:? post-harvest|travail:? pre-r[eéè]colte) *[:-]? *[a-zà-ÿ][a-zà-ÿ1-9',/ \.\>\-\(\)]+?)" ,
249+ "season_pattern" : r"(?P<season>season [12]|saison [12]|[12][a-z] season||[12][a-zà-ÿ] saison|r[eé]colte principale|principale r[eé]colte|gu|deyr+?)" ,
248250 "additional_identifier_pattern" : r"\(?(?P<additional_identifier>rainfed|irrigated|pluviale?|irriguée|submersion libre|submersion contrôlée|flottant)\)?" ,
249251 "age_gender_pattern" : age_gender_pattern ,
250252 "unit_of_measure_pattern" : r"(?P<unit_of_measure_id>[a-z]+)" ,
@@ -277,6 +279,7 @@ def get_livelihood_activity_regular_expression_attributes(label: str) -> dict:
277279 "strategy_type" : None ,
278280 "is_start" : None ,
279281 "product_id" : None ,
282+ "payment_product_id" : None ,
280283 "unit_of_measure_id" : None ,
281284 "currency_id" : None ,
282285 "season" : None ,
@@ -341,6 +344,7 @@ def get_livelihood_activity_label_map(activity_type: str) -> dict[str, dict]:
341344 "strategy_type" ,
342345 "is_start" ,
343346 "product_id" ,
347+ "payment_product_id" ,
344348 "unit_of_measure_id" ,
345349 "currency_id" ,
346350 "season" ,
@@ -416,6 +420,10 @@ def get_all_label_attributes(
416420 all_label_attributes = labels .apply (lambda x : get_label_attributes (x , activity_type )).fillna ("" )
417421 all_label_attributes = classifiedproductlookup .do_lookup (all_label_attributes , "product_id" , "product_id" )
418422 all_label_attributes ["product_id" ] = all_label_attributes ["product_id" ].replace (pd .NA , None )
423+ all_label_attributes = classifiedproductlookup .do_lookup (
424+ all_label_attributes , "payment_product_id" , "payment_product_id"
425+ )
426+ all_label_attributes ["payment_product_id" ] = all_label_attributes ["payment_product_id" ].replace (pd .NA , None )
419427 all_label_attributes = unitofmeasurelookup .do_lookup (
420428 all_label_attributes , "unit_of_measure_id" , "unit_of_measure_id"
421429 )
@@ -461,7 +469,9 @@ def get_all_label_attributes(
461469 columns = ["activity_label" , "status" , "strategy_type" , "product_id" , "season_original" , "country_id" ]
462470 if livelihood_zone_id :
463471 columns .append ("livelihood_zone_id" )
464- raise ValueError ("Unrecognized seasons in labels:\n " + unrecognized_seasons_df [columns ].to_markdown ())
472+ raise ValueError (
473+ f"Unrecognized seasons in { activity_type } labels:\n " + unrecognized_seasons_df [columns ].to_markdown ()
474+ )
465475
466476 # Make sure we keep the same index so we can match by row number
467477 all_label_attributes .index = labels .index
@@ -541,6 +551,9 @@ def livelihood_activity_label_recognition_dataframe(
541551 recognized_attributes_df ["product_name" ] = (
542552 recognized_attributes_df ["product_id" ].map (product_name_map ).fillna ("" )
543553 )
554+ recognized_attributes_df ["payment_product_name" ] = (
555+ recognized_attributes_df ["payment_product_id" ].map (product_name_map ).fillna ("" )
556+ )
544557
545558 # Join the recognized attributes to the label dataframe
546559 label_df ["activity_type" ] = activity_type
@@ -554,10 +567,15 @@ def livelihood_activity_label_recognition_dataframe(
554567 regex_attributes_df = pd .DataFrame .from_records (
555568 all_labels_df ["label" ].astype (str ).map (get_livelihood_activity_regular_expression_attributes )
556569 )
557- regex_attributes_df = ClassifiedProductLookup (require_match = False ).do_lookup (
558- regex_attributes_df , "product_id" , "product_id"
570+ classifiedproductlookup = ClassifiedProductLookup (require_match = False )
571+ regex_attributes_df = classifiedproductlookup .do_lookup (regex_attributes_df , "product_id" , "product_id" )
572+ regex_attributes_df = classifiedproductlookup .do_lookup (
573+ regex_attributes_df , "payment_product_id" , "payment_product_id"
559574 )
560575 regex_attributes_df ["product_name" ] = regex_attributes_df ["product_id" ].map (product_name_map ).fillna ("" )
576+ regex_attributes_df ["payment_product_name" ] = (
577+ regex_attributes_df ["payment_product_id" ].map (product_name_map ).fillna ("" )
578+ )
561579 all_labels_df = all_labels_df .join (
562580 regex_attributes_df ,
563581 how = "left" ,
@@ -574,6 +592,7 @@ def livelihood_activity_label_recognition_dataframe(
574592 "strategy_type" ,
575593 "is_start" ,
576594 "product_id" ,
595+ "payment_product_id" ,
577596 "unit_of_measure_id" ,
578597 "currency_id" ,
579598 "season" ,
@@ -583,6 +602,7 @@ def livelihood_activity_label_recognition_dataframe(
583602 )
584603 )
585604 db_labels_df ["product_name" ] = db_labels_df ["product_id" ].map (product_name_map ).fillna ("" )
605+ db_labels_df ["payment_product_name" ] = db_labels_df ["payment_product_id" ].map (product_name_map ).fillna ("" )
586606 all_labels_df = all_labels_df .join (
587607 db_labels_df .set_index (["label_lower" , "activity_type" ]),
588608 on = ("label_lower" , "activity_type" ),
@@ -633,6 +653,9 @@ def livelihood_activity_label_recognition_dataframe(
633653 how = "left" ,
634654 )
635655 activity_type_label_df ["product_name" ] = activity_type_label_df ["product_id" ].map (product_name_map ).fillna ("" )
656+ activity_type_label_df ["payment_product_name" ] = (
657+ activity_type_label_df ["payment_product_id" ].map (product_name_map ).fillna ("" )
658+ )
636659 summary_label_dfs .append (activity_type_label_df )
637660
638661 # Concatenate all the activity type dataframes back into a single dataframe and put
@@ -655,13 +678,15 @@ def livelihood_activity_label_recognition_dataframe(
655678 "strategy_type" ,
656679 "attribute" ,
657680 "product_name" ,
681+ "payment_product_name" ,
658682 "unit_of_measure_id_original" ,
659683 "currency_id" ,
660684 "season" ,
661685 "additional_identifier" ,
662686 "notes" ,
663687 "household_labor_provider" ,
664688 "product_id" ,
689+ "payment_product_id" ,
665690 "activity_label" ,
666691 ]
667692 ]
@@ -671,11 +696,14 @@ def livelihood_activity_label_recognition_dataframe(
671696 all_labels_df [
672697 [
673698 "label_lower" ,
699+ "activity_type" ,
674700 "activity_label_regex" ,
675701 "strategy_type_regex" ,
676702 "is_start_regex" ,
677703 "product_id_regex" ,
678704 "product_name_regex" ,
705+ "payment_product_id_regex" ,
706+ "payment_product_name_regex" ,
679707 "unit_of_measure_id_regex" ,
680708 "season_regex" ,
681709 "additional_identifier_regex" ,
@@ -685,6 +713,8 @@ def livelihood_activity_label_recognition_dataframe(
685713 "is_start_db" ,
686714 "product_id_db" ,
687715 "product_name_db" ,
716+ "payment_product_id_db" ,
717+ "payment_product_name_db" ,
688718 "unit_of_measure_id_db" ,
689719 "season_db" ,
690720 "additional_identifier_db" ,
@@ -693,8 +723,8 @@ def livelihood_activity_label_recognition_dataframe(
693723 ]
694724 ]
695725 .drop_duplicates ()
696- .set_index ("label_lower" ),
697- on = "label" ,
726+ .set_index ([ "label_lower" , "activity_type" ] ),
727+ on = [ "label" , "activity_type" ] ,
698728 how = "inner" ,
699729 )
700730
@@ -1570,6 +1600,33 @@ def get_instances_from_dataframe(
15701600 livelihood_strategy ["attribute_rows" ][attribute ],
15711601 )
15721602 )
1603+ elif attribute in activity_field_names and value :
1604+ # This is an attribute for the LivelihoodActivity rather than the LivelihoodStrategy,
1605+ # so add it to the livelihood activities for this strategy.
1606+ if attribute not in livelihood_strategy ["attribute_rows" ]:
1607+ livelihood_strategy ["attribute_rows" ][attribute ] = row
1608+ for livelihood_activity in livelihood_activities_for_strategy :
1609+ livelihood_activity [attribute ] = value
1610+ elif livelihood_activities_for_strategy [0 ][attribute ] != value :
1611+ errors .append (
1612+ "Found different value '%s' from row %s for existing attribute '%s' with value '%s' from row %s for activity with label '%s'"
1613+ % (
1614+ value ,
1615+ row ,
1616+ attribute ,
1617+ livelihood_activities_for_strategy [0 ][attribute ],
1618+ livelihood_strategy ["attribute_rows" ][attribute ],
1619+ label ,
1620+ )
1621+ )
1622+ elif (
1623+ attribute .endswith ("_original" )
1624+ and attribute .removesuffix ("_original" ) in activity_field_names
1625+ and value
1626+ ):
1627+ # Keep the original value of the attribute to aid trouble-shooting.
1628+ for livelihood_activity in livelihood_activities_for_strategy :
1629+ livelihood_activity [attribute ] = value
15731630
15741631 # Update the LivelihoodActivity records
15751632 if any (value for value in df .loc [row , "B" :].astype (str ).str .strip ()):
0 commit comments