@@ -378,15 +378,18 @@ def get_label_attributes(label: str, activity_type: str) -> pd.Series:
378378 return pd .Series (attributes )
379379
380380
381- def get_all_label_attributes (labels : pd .Series , activity_type : str , country_code : str | None ) -> pd .DataFrame :
381+ def get_all_label_attributes (
382+ labels : pd .Series , activity_type : str , country_code : str | None , livelihood_zone_id : str | None
383+ ) -> pd .DataFrame :
382384 """
383385 Return a DataFrame of the attributes for all of the labels in the supplied Series.
384386
385387 The Product, Unit of Measure and Season attributes are processed using the relevant Lookup classes so that the
386388 resulting DataFrame contains the correct identifiers for these attributes.
387389
388- The country_code parameter is optional so that this function can be used to test individual labels,
389- but it should be provided when processing a BSS because the Season lookup is country-specific.
390+ The country_code and livelihood_zone_id parameters are optional so that this function can be used to test
391+ individual labels, but they should be provided when processing a BSS because the Season lookup is country-specific,
392+ and may rely on sub-national seasons using zone-specific aliases in some countries.
390393 """
391394 # Clear caches for the functions, so that we use the lastest data from the database
392395 get_label_attributes .cache_clear ()
@@ -411,12 +414,38 @@ def get_all_label_attributes(labels: pd.Series, activity_type: str, country_code
411414 all_label_attributes , "unit_of_measure_id" , "unit_of_measure_id"
412415 )
413416 all_label_attributes ["unit_of_measure_id" ] = all_label_attributes ["unit_of_measure_id" ].replace (pd .NA , None )
414- # Add the country_id because it is required for the Season lookup
417+ # Convert the season alias to an actual Season.name, which is the natural key for a Season.
418+ # We only do this if the country_id is in the dataframe, so that we can use this function to test labels
419+ # outside the context of a BSS, e.g. in unit tests, without needing to define country-specific seasons.
420+ # The country_id is needed for an actual Season lookup because all the BSSs use Season 1 and Season 2 names for
421+ # the seasons and we need to know the Country (and maybe the Strategy Type) to limit the lookup to a small enough
422+ # set of rows that Season 1 and Season 2 can uniquely identify a specific Season.
423+ # The all_label_attributes dataframe should also contain a 'strategy_type' column, which will be used by the
424+ # lookup to restrict the possible matches to Seasons with a matching `purpose` (or those with a null purpose).
415425 if country_code :
416426 all_label_attributes ["country_id" ] = country_code
417- # The all_label_attributes dataframe should also contain a 'strategy_type' column, which will be used by the
418- # lookup to restrict the possible matches to Seasons with a matching `purpose` (or those with a null purpose).
419427 all_label_attributes = seasonnamelookup .do_lookup (all_label_attributes , "season" , "season" )
428+ # Some countries have sub-national seasons, but still use 'Season 1' and 'Season 2' labels in their BSSs,
429+ # so we need to be able to match these labels to a specific set of sub-national seasons for each BSS. We do
430+ # this by overwriting the national-level season we just identified with a zone-specific season, if available.
431+ # Zone-specific seasons include the livelihood zone code in the alias, e.g. `Season 1 (NG04)`.
432+ if livelihood_zone_id :
433+ all_label_attributes ["livelihood_zone_id" ] = livelihood_zone_id
434+ all_label_attributes ["zone_season" ] = all_label_attributes [
435+ ["season_original" , "livelihood_zone_id" ]
436+ ].apply (
437+ lambda x : (
438+ f"{ x ['season_original' ]} ({ x ['livelihood_zone_id' ]} )"
439+ if x ["season_original" ]
440+ else x ["season_original" ]
441+ ),
442+ axis = 1 ,
443+ )
444+ all_label_attributes = seasonnamelookup .do_lookup (all_label_attributes , "zone_season" , "zone_season" )
445+ # Make a final season column that uses the zone-specific season if available and the national-level season, if not.
446+ all_label_attributes ["season" ] = all_label_attributes ["zone_season" ].fillna (all_label_attributes ["season" ])
447+ # Drop the intermediate columns used for the lookup
448+ all_label_attributes = all_label_attributes .drop (columns = ["zone_season" , "zone_season_original" ])
420449 all_label_attributes ["season" ] = all_label_attributes ["season" ].replace (pd .NA , None )
421450
422451 # Make sure we keep the same index so we can match by row number
@@ -685,7 +714,10 @@ def get_instances_from_dataframe(
685714
686715 # Get a dataframe of the attributes for each label in column A
687716 all_label_attributes = get_all_label_attributes (
688- df ["A" ], activity_type , livelihood_zone_baseline .livelihood_zone .country_id
717+ df ["A" ],
718+ activity_type ,
719+ livelihood_zone_baseline .livelihood_zone .country_id ,
720+ livelihood_zone_baseline .livelihood_zone_id ,
689721 )
690722
691723 # Check that we recognize all of the activity labels
0 commit comments