@@ -423,7 +423,7 @@ def get_all_label_attributes(
423423 # Convert the season alias to an actual Season.name, which is the natural key for a Season.
424424 # We only do this if the country_id is in the dataframe, so that we can use this function to test labels
425425 # outside the context of a BSS, e.g. in unit tests, without needing to define country-specific seasons.
426- # The country_id is needed for an actual Season lookup because all the BSSs use Season 1 and Season 2 names for
426+ # The country_id is needed for an actual Season lookup because all the BSSs use Season 1 and Season 2 aliases for
427427 # the seasons and we need to know the Country (and maybe the Strategy Type) to limit the lookup to a small enough
428428 # set of rows that Season 1 and Season 2 can uniquely identify a specific Season.
429429 # The all_label_attributes dataframe should also contain a 'strategy_type' column, which will be used by the
@@ -453,6 +453,17 @@ def get_all_label_attributes(
453453 # Drop the intermediate columns used for the lookup
454454 all_label_attributes = all_label_attributes .drop (columns = ["zone_season" , "zone_season_original" ])
455455 all_label_attributes ["season" ] = all_label_attributes ["season" ].astype (object ).replace (pd .NA , None )
456+ # Check that we found an actual Season for any labels that contained a season alias
457+ unrecognized_seasons_df = all_label_attributes [
458+ (all_label_attributes ["season_original" ] != "" ) & (all_label_attributes ["season" ].isna ())
459+ ]
460+ if not unrecognized_seasons_df .empty :
461+ raise ValueError (
462+ "Unrecognized seasons in labels:\n "
463+ + unrecognized_seasons_df [
464+ ["activity_label" , "status" , "strategy_type" , "product_id" , "season_original" ]
465+ ].to_markdown ()
466+ )
456467
457468 # Make sure we keep the same index so we can match by row number
458469 all_label_attributes .index = labels .index
@@ -756,15 +767,30 @@ def get_instances_from_dataframe(
756767 ]
757768
758769 # Save the identifier for Season 2 because we need it when creating MilkProduction and ButterProduction instances
770+ # Try to find zone-specific seasons first, and fall back to national-level seasons if necessary.
759771 seasonnamelookup = SeasonNameLookup ()
760- dairy_season2_names = [
761- seasonnamelookup .get (
762- "Season 2" , country_id = livelihood_zone_baseline .livelihood_zone .country_id , purpose = "MilkProduction"
763- ),
764- seasonnamelookup .get (
765- "Season 2" , country_id = livelihood_zone_baseline .livelihood_zone .country_id , purpose = "ButterProduction"
766- ),
767- ]
772+ dairy_season2_names = []
773+ for purpose in ["MilkProduction" , "ButterProduction" ]:
774+ for alias in ["season 2" , "saison 2" , "2ème saison" ]:
775+ # Try to find a zone-specific season first
776+ season_2 = seasonnamelookup .get (
777+ f"{ alias } ({ livelihood_zone_baseline .livelihood_zone_id } )" ,
778+ country_id = livelihood_zone_baseline .livelihood_zone .country_id ,
779+ purpose = purpose ,
780+ )
781+ # Fall back to a general season if a zone-specific one isn't found
782+ if not season_2 :
783+ season_2 = seasonnamelookup .get (
784+ alias , country_id = livelihood_zone_baseline .livelihood_zone .country_id , purpose = purpose
785+ )
786+ # If we found a season, then there is no need to continue looking for other aliases
787+ if season_2 :
788+ break
789+ if not season_2 :
790+ raise ValueError (
791+ "Could not find a Season matching 'Season 2' for purpose '%s' in BSS %s" % (purpose , partition_key )
792+ )
793+ dairy_season2_names .append (season_2 )
768794
769795 # Prepare a lookup for ClassifiedProduct, so it caches and reuses the results of .get() lookups
770796 classifiedproductlookup = ClassifiedProductLookup ()
@@ -1047,30 +1073,65 @@ def get_instances_from_dataframe(
10471073 for livelihood_activity in livelihood_activities_for_strategy
10481074 )
10491075 ):
1050- # Find the MilkProduction livelihood strategy
1076+ # Find the corresponding MilkProduction livelihood strategy
1077+ # First, find the equivalent MilkProduction season for the current ButterProduction strategy.
1078+ # Try to find a Zone-specific season first, and fall back to a general season if necessary.
1079+ milk_season = seasonnamelookup .get (
1080+ f'{ livelihood_strategy ["season_original" ]} ({ livelihood_zone_baseline .livelihood_zone_id } )' ,
1081+ country_id = livelihood_zone_baseline .livelihood_zone .country_id ,
1082+ purpose = "MilkProduction" ,
1083+ )
1084+ if not milk_season :
1085+ milk_season = seasonnamelookup .get (
1086+ livelihood_strategy ["season_original" ],
1087+ country_id = livelihood_zone_baseline .livelihood_zone .country_id ,
1088+ purpose = "MilkProduction" ,
1089+ )
1090+ if not milk_season :
1091+ raise ValueError (
1092+ f"Could not find a MilkProduction Season matching '{ livelihood_strategy ['season_original' ]} ' "
1093+ f"from ButterProduction strategy for season '{ livelihood_strategy ['season_original' ]} ' "
1094+ f"({ livelihood_strategy ['season' ]} ) at row { livelihood_strategy ['bss_row' ]} from:\n "
1095+ )
1096+ # Next, find the candidate MilkProduction strategies
10511097 milk_strategy = None
1052- for strategy in reversed (livelihood_strategies ):
1098+ milk_strategies = [
1099+ strategy
1100+ for strategy in reversed (livelihood_strategies )
1101+ if strategy ["strategy_type" ] == "MilkProduction"
1102+ ]
1103+ # Test each strategy in turn
1104+ for strategy in milk_strategies :
10531105 if (
1054- strategy ["strategy_type" ] == "MilkProduction"
10551106 # Season for the current LivelihoodStrategy hasn't been converted to a natural key yet,
10561107 # so coerce it to a list for comparison
1057- and strategy ["season" ]
1058- == [
1059- seasonnamelookup .get (
1060- livelihood_strategy ["season_original" ],
1061- country_id = livelihood_zone_baseline .livelihood_zone .country_id ,
1062- purpose = "MilkProduction" ,
1063- )
1064- ]
1108+ strategy ["season" ] == [milk_season ]
10651109 and strategy ["additional_identifier" ]
10661110 == livelihood_strategy ["additional_identifier" ]
10671111 ):
10681112 milk_strategy = strategy
10691113 break
10701114 if not milk_strategy :
1115+ # Keep only the required attributes so that the error message is clearer
1116+ milk_strategies = [
1117+ {
1118+ k : strategy [k ]
1119+ for k in [
1120+ "bss_row" ,
1121+ "strategy_type" ,
1122+ "season_original" ,
1123+ "season" ,
1124+ "product_id" ,
1125+ "additional_identifier" ,
1126+ ]
1127+ }
1128+ for strategy in milk_strategies
1129+ ]
10711130 raise ValueError (
10721131 f"Could not find the MilkProduction Livelihood Strategy associated with "
1073- f"the ButterProduction strategy at row { row } ."
1132+ f"the ButterProduction strategy for season '{ livelihood_strategy ['season_original' ]} ' "
1133+ f"({ livelihood_strategy ['season' ]} ) at row { livelihood_strategy ['bss_row' ]} from:\n "
1134+ f"{ '\n ' .join ([str (strategy ) for strategy in milk_strategies ])} "
10741135 )
10751136 milk_activities = {
10761137 activity ["wealth_group" ]: activity
@@ -1717,6 +1778,28 @@ def get_instances_from_dataframe(
17171778 }
17181779 if not unrecognized_labels .empty :
17191780 metadata ["unrecognized_labels" ] = MetadataValue .md (unrecognized_labels .to_markdown (index = False ))
1781+ if livelihood_strategies and activity_type != ActivityLabel .LivelihoodActivityType .LIVELIHOOD_SUMMARY :
1782+ seasons_df = pd .DataFrame (
1783+ [
1784+ (
1785+ livelihood_strategy ["strategy_type" ],
1786+ livelihood_strategy ["season_original" ],
1787+ livelihood_strategy ["season" ][0 ] if livelihood_strategy ["season" ] else None ,
1788+ )
1789+ for livelihood_strategy in livelihood_strategies
1790+ if livelihood_strategy .get ("season" ) or livelihood_strategy .get ("season_original" )
1791+ ],
1792+ columns = ("strategy_type" , "season_original" , "season" ),
1793+ ).drop_duplicates ()
1794+ if seasons_df ["season" ].isna ().any ():
1795+ metadata ["unrecognized_seasons" ] = MetadataValue .md (
1796+ seasons_df [seasons_df ["season" ].isna ()].to_markdown (index = False )
1797+ )
1798+
1799+ if seasons_df ["season" ].notna ().any ():
1800+ metadata ["recognized_seasons" ] = MetadataValue .md (
1801+ seasons_df [seasons_df ["season" ].notna ()].to_markdown (index = False )
1802+ )
17201803 metadata ["pct_rows_recognized" ] = round (
17211804 (
17221805 1
0 commit comments