Skip to content

Commit d5d6f9f

Browse files
committed
Check for duplicate entries in load_metadata_for_model
1 parent 7c6045a commit d5d6f9f

1 file changed

Lines changed: 8 additions & 4 deletions

File tree

pipelines/jobs/metadata.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,14 +149,18 @@ def load_metadata_for_model(context: OpExecutionContext, sheet_name: str, model:
149149
id_fields = "wealth_characteristic_label"
150150
else:
151151
id_fields = "code"
152+
# Ensure we don't have any duplicate entries
153+
duplicates_df = df[df.duplicated(subset=id_fields, keep=False)]
154+
if not duplicates_df.empty:
155+
raise ValueError(
156+
f"Found duplicate entries in worksheet '{sheet_name}' for {model_name}:\n{duplicates_df.to_markdown()}"
157+
)
152158
# Add primary keys if they are not already in the id_fields,
153159
# so that we can save individual instances if required
154160
if isinstance(id_fields, str):
155161
id_fields = [id_fields]
156162
if model._meta.pk.name not in id_fields:
157-
keys_df = pd.DataFrame.from_records(
158-
model.objects.all().values(model._meta.pk.name, *id_fields)
159-
) # NOQA: E501
163+
keys_df = pd.DataFrame.from_records(model.objects.all().values(model._meta.pk.name, *id_fields))
160164
if keys_df.empty:
161165
keys_df = pd.DataFrame(columns=[model._meta.pk.name] + id_fields)
162166
df = df.merge(
@@ -178,7 +182,6 @@ def load_metadata_for_model(context: OpExecutionContext, sheet_name: str, model:
178182
update_fields=[k for k in fields if k not in id_fields and k != model._meta.pk.name],
179183
unique_fields=id_fields,
180184
)
181-
context.log.info(f"Created or updated {len(instances)} {sheet_name} instances")
182185
except Exception:
183186
# Bulk create failed, so try creating/updating the instances one at a time to see which one failed
184187
for i, instance in enumerate(instances):
@@ -192,6 +195,7 @@ def load_metadata_for_model(context: OpExecutionContext, sheet_name: str, model:
192195
raise RuntimeError(
193196
f"Failed to create/update {model_name} instance {i} {key} from:\n{json.dumps(instance, indent=4, ensure_ascii=False)}"
194197
) from e
198+
context.log.info(f"Created or updated {len(instances)} {sheet_name} instances")
195199

196200

197201
@op

0 commit comments

Comments
 (0)