Skip to content

Commit 039965e

Browse files
authored
Merge pull request #202 from American-Institutes-for-Research/HEA-380/update_load_all_fewsnet_geographies_to_use_metadata
Update load_all_fewsnet_geographies to use fnid from metadata
2 parents 11efb14 + 7c5bf32 commit 039965e

1 file changed

Lines changed: 110 additions & 51 deletions

File tree

pipelines/jobs/metadata.py

Lines changed: 110 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -513,58 +513,117 @@ def load_all_community_aliases(context: OpExecutionContext):
513513
def load_all_fewsnet_geographies(context: OpExecutionContext):
514514
"""
515515
Load all Livelihood Zone Baseline geographies from the FEWS NET Data Warehouse via the API.
516+
Reads the 'Metadata' sheet from BSS Metadata Google Sheet to get fnid for each baseline.
516517
"""
517-
baseline_countries = (
518-
LivelihoodZoneBaseline.objects.all()
519-
.values_list("livelihood_zone__country__iso3166a2", flat=True)
520-
.order_by("livelihood_zone__country__iso3166a2")
521-
.distinct()
522-
)
523-
all_geometries = {}
524-
for iso3166a2 in baseline_countries:
525-
response = requests.get(
526-
f"https://fdw.fews.net/api/feature/?format=geojson&unit_type=livelihood_zone&ordering=fnid&country_code={iso3166a2}"
527-
)
528-
response.raise_for_status()
529-
srid = int(response.json()["crs"]["properties"]["name"].split(":")[-1])
530-
for feature in response.json()["features"]:
531-
# Also save the geometry for the Livelihood Zone Baseline
532-
all_geometries[
533-
(
534-
feature["properties"]["attributes"]["EFF_YEAR"],
535-
feature["properties"]["attributes"]["LZCODE"],
536-
)
537-
] = feature
538-
539-
for livelihood_zone_baseline in LivelihoodZoneBaseline.objects.filter(
540-
livelihood_zone__country_id=iso3166a2
541-
).order_by(
542-
"livelihood_zone__country__iso3166a2",
543-
"reference_year_end_date",
544-
"livelihood_zone__code",
545-
):
546-
for feature in all_geometries.values():
547-
start_date = (
548-
datetime.date.fromisoformat(feature["properties"]["start_date"])
549-
if feature["properties"]["start_date"]
550-
else datetime.date.min
551-
)
552-
end_date = (
553-
datetime.date.fromisoformat(feature["properties"]["end_date"])
554-
if feature["properties"]["end_date"]
555-
else datetime.date.max
556-
)
557-
if (
558-
feature["properties"]["attributes"]["LZCODE"] == livelihood_zone_baseline.livelihood_zone.code
559-
) and (start_date <= livelihood_zone_baseline.valid_from_date <= end_date):
560-
geometry = GEOSGeometry(json.dumps(feature["geometry"]), srid=srid)
561-
if isinstance(geometry, Polygon):
562-
geometry = MultiPolygon(geometry)
563-
livelihood_zone_baseline.geography = geometry
564-
livelihood_zone_baseline.save()
565-
context.log.info(f"Updated geometry for {livelihood_zone_baseline}")
566-
continue
567-
context.log.warning(f"Failed to find FEWS NET geometry for {livelihood_zone_baseline}")
518+
# Read the Metadata sheet from BSS Metadata Google Sheet
519+
storage_options = {"token": "service_account", "access": "read_only", "root_file_id": "0AOJ0gJ8sjnO7Uk9PVA"}
520+
storage_options["creds"] = json.loads(os.environ["GOOGLE_APPLICATION_CREDENTIALS"])
521+
p = UPath("gdrive://Database Design/BSS Metadata", **storage_options)
522+
523+
with p.fs.open(p.path, mode="rb", cache_type="bytes") as f:
524+
# Google Sheets have to be exported rather than read directly
525+
if isinstance(f, GoogleDriveFile) and (f.details["mimeType"] == "application/vnd.google-apps.spreadsheet"):
526+
f = BytesIO(p.fs.export(p.path, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"))
527+
528+
# Read the Metadata sheet with columns: code, reference_year_end_date, fnid
529+
metadata_df = pd.read_excel(f, sheet_name="Metadata", engine="openpyxl")
530+
531+
# Filter out rows without fnid
532+
metadata_df = metadata_df[metadata_df["fnid"].notna()]
533+
534+
context.log.info(f"Found {len(metadata_df)} baseline zones with fnid in Metadata sheet")
535+
536+
# Process each baseline zone
537+
for _, row in metadata_df.iterrows():
538+
zone_code = row["code"]
539+
reference_year_end_date = pd.to_datetime(row["reference_year_end_date"]).date()
540+
fnid = row["fnid"]
541+
542+
try:
543+
livelihood_zone_baseline = LivelihoodZoneBaseline.objects.get(
544+
livelihood_zone__code=zone_code, reference_year_end_date=reference_year_end_date
545+
)
546+
except LivelihoodZoneBaseline.DoesNotExist:
547+
context.log.warning(
548+
f"Baseline not found for code={zone_code}, reference_year_end_date={reference_year_end_date}"
549+
)
550+
continue
551+
except LivelihoodZoneBaseline.MultipleObjectsReturned:
552+
context.log.error(
553+
f"Multiple baselines found for code={zone_code}, reference_year_end_date={reference_year_end_date}"
554+
)
555+
continue
556+
557+
try:
558+
response = requests.get(
559+
f"https://fdw.fews.net/api/feature/?format=geojson&unit_type=livelihood_zone&fnid={fnid}&fields=with_population&demography_year={livelihood_zone_baseline.reference_year_end_date.year}"
560+
)
561+
response.raise_for_status()
562+
563+
geojson_data = response.json()
564+
565+
if not geojson_data.get("features"):
566+
context.log.warning(f"No features found for fnid={fnid}, baseline={livelihood_zone_baseline}")
567+
continue
568+
569+
srid = int(geojson_data["crs"]["properties"]["name"].split(":")[-1])
570+
571+
feature = geojson_data["features"][0]
572+
573+
# Convert to Django geometry
574+
geometry = GEOSGeometry(json.dumps(feature["geometry"]), srid=srid)
575+
if isinstance(geometry, Polygon):
576+
geometry = MultiPolygon(geometry)
577+
578+
# Update the baseline with the geometry
579+
livelihood_zone_baseline.geography = geometry
580+
581+
# Extract population estimate if available
582+
population_estimate = feature["properties"].get("estimated_population")
583+
if population_estimate is not None:
584+
try:
585+
livelihood_zone_baseline.population_estimate = int(population_estimate)
586+
# Population sources are in the geographicunit metadata url
587+
unit_id = geojson_data["features"][0].get("id")
588+
metadata_url = f"https://fdw.fews.net/api/geographicunit/metadata/?id={unit_id}&format=json&fields=with_population"
589+
context.log.info(f"Fetching population metadata from {metadata_url}")
590+
metadata_response = requests.get(metadata_url)
591+
metadata_response.raise_for_status()
592+
593+
metadata_json = metadata_response.json()
594+
595+
# Extract population source from metadata
596+
population_source = None
597+
if "metadata" in metadata_json and "Population" in metadata_json["metadata"]:
598+
population_list = metadata_json["metadata"]["Population"]
599+
for pop_item in population_list:
600+
if pop_item.get("Name") == "Population distribution":
601+
population_source = pop_item.get("Description")
602+
context.log.info(f"Found population source: {population_source}")
603+
break
604+
605+
if population_source:
606+
livelihood_zone_baseline.population_source = population_source
607+
else:
608+
context.log.warning(
609+
f"No population source found in metadata for {livelihood_zone_baseline} (fnid={fnid})"
610+
)
611+
except (ValueError, TypeError) as e:
612+
context.log.warning(f"Invalid population data for {livelihood_zone_baseline} (fnid={fnid}): {e}")
613+
614+
livelihood_zone_baseline.save()
615+
616+
log_msg = f"Updated geometry for {livelihood_zone_baseline} (fnid={fnid})"
617+
if population_estimate is not None:
618+
log_msg += f" with population={livelihood_zone_baseline.population_estimate}"
619+
context.log.info(log_msg)
620+
621+
except requests.RequestException as e:
622+
context.log.error(f"Failed to fetch geography for fnid={fnid}, baseline={livelihood_zone_baseline}: {e}")
623+
except (KeyError, ValueError, json.JSONDecodeError) as e:
624+
context.log.error(
625+
f"Failed to parse geography response for fnid={fnid}, baseline={livelihood_zone_baseline}: {e}"
626+
)
568627

569628

570629
@job

0 commit comments

Comments
 (0)