Skip to content

Commit 833735e

Browse files
committed
Support sub-national seasons using zone-specific aliases - see HEA-196
1 parent 1527338 commit 833735e

1 file changed

Lines changed: 39 additions & 7 deletions

File tree

pipelines/assets/livelihood_activity.py

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -378,15 +378,18 @@ def get_label_attributes(label: str, activity_type: str) -> pd.Series:
378378
return pd.Series(attributes)
379379

380380

381-
def get_all_label_attributes(labels: pd.Series, activity_type: str, country_code: str | None) -> pd.DataFrame:
381+
def get_all_label_attributes(
382+
labels: pd.Series, activity_type: str, country_code: str | None, livelihood_zone_id: str | None
383+
) -> pd.DataFrame:
382384
"""
383385
Return a DataFrame of the attributes for all of the labels in the supplied Series.
384386
385387
The Product, Unit of Measure and Season attributes are processed using the relevant Lookup classes so that the
386388
resulting DataFrame contains the correct identifiers for these attributes.
387389
388-
The country_code parameter is optional so that this function can be used to test individual labels,
389-
but it should be provided when processing a BSS because the Season lookup is country-specific.
390+
The country_code and livelihood_zone_id parameters are optional so that this function can be used to test
391+
individual labels, but they should be provided when processing a BSS because the Season lookup is country-specific,
392+
and may rely on sub-national seasons using zone-specific aliases in some countries.
390393
"""
391394
# Clear caches for the functions, so that we use the lastest data from the database
392395
get_label_attributes.cache_clear()
@@ -411,12 +414,38 @@ def get_all_label_attributes(labels: pd.Series, activity_type: str, country_code
411414
all_label_attributes, "unit_of_measure_id", "unit_of_measure_id"
412415
)
413416
all_label_attributes["unit_of_measure_id"] = all_label_attributes["unit_of_measure_id"].replace(pd.NA, None)
414-
# Add the country_id because it is required for the Season lookup
417+
# Convert the season alias to an actual Season.name, which is the natural key for a Season.
418+
# We only do this if the country_id is in the dataframe, so that we can use this function to test labels
419+
# outside the context of a BSS, e.g. in unit tests, without needing to define country-specific seasons.
420+
# The country_id is needed for an actual Season lookup because all the BSSs use Season 1 and Season 2 names for
421+
# the seasons and we need to know the Country (and maybe the Strategy Type) to limit the lookup to a small enough
422+
# set of rows that Season 1 and Season 2 can uniquely identify a specific Season.
423+
# The all_label_attributes dataframe should also contain a 'strategy_type' column, which will be used by the
424+
# lookup to restrict the possible matches to Seasons with a matching `purpose` (or those with a null purpose).
415425
if country_code:
416426
all_label_attributes["country_id"] = country_code
417-
# The all_label_attributes dataframe should also contain a 'strategy_type' column, which will be used by the
418-
# lookup to restrict the possible matches to Seasons with a matching `purpose` (or those with a null purpose).
419427
all_label_attributes = seasonnamelookup.do_lookup(all_label_attributes, "season", "season")
428+
# Some countries have sub-national seasons, but still use 'Season 1' and 'Season 2' labels in their BSSs,
429+
# so we need to be able to match these labels to a specific set of sub-national seasons for each BSS. We do
430+
# this by overwriting the national-level season we just identified with a zone-specific season, if available.
431+
# Zone-specific seasons include the livelihood zone code in the alias, e.g. `Season 1 (NG04)`.
432+
if livelihood_zone_id:
433+
all_label_attributes["livelihood_zone_id"] = livelihood_zone_id
434+
all_label_attributes["zone_season"] = all_label_attributes[
435+
["season_original", "livelihood_zone_id"]
436+
].apply(
437+
lambda x: (
438+
f"{x['season_original']} ({x['livelihood_zone_id']})"
439+
if x["season_original"]
440+
else x["season_original"]
441+
),
442+
axis=1,
443+
)
444+
all_label_attributes = seasonnamelookup.do_lookup(all_label_attributes, "zone_season", "zone_season")
445+
# Make a final season column that uses the zone-specific season if available and the national-level season, if not.
446+
all_label_attributes["season"] = all_label_attributes["zone_season"].fillna(all_label_attributes["season"])
447+
# Drop the intermediate columns used for the lookup
448+
all_label_attributes = all_label_attributes.drop(columns=["zone_season", "zone_season_original"])
420449
all_label_attributes["season"] = all_label_attributes["season"].replace(pd.NA, None)
421450

422451
# Make sure we keep the same index so we can match by row number
@@ -685,7 +714,10 @@ def get_instances_from_dataframe(
685714

686715
# Get a dataframe of the attributes for each label in column A
687716
all_label_attributes = get_all_label_attributes(
688-
df["A"], activity_type, livelihood_zone_baseline.livelihood_zone.country_id
717+
df["A"],
718+
activity_type,
719+
livelihood_zone_baseline.livelihood_zone.country_id,
720+
livelihood_zone_baseline.livelihood_zone_id,
689721
)
690722

691723
# Check that we recognize all of the activity labels

0 commit comments

Comments
 (0)