Skip to content

Commit c14911f

Browse files
committed
Updated tests to handle json lines format
1 parent 267665e commit c14911f

2 files changed

Lines changed: 59 additions & 36 deletions

File tree

scripts/manual_uploads/manual_s3_dynamo_upload.py

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,11 @@
44
import os
55
import argparse
66
from pathlib import Path
7-
from typing import Any, Dict, List, Optional
7+
from typing import Any, Dict, List, Optional, Union, Generator
88
from decimal import Decimal
99

1010

11-
12-
def map_dynamo_type(value: Any):
11+
def map_dynamo_type(value: Any) -> Dict[str, Any]:
1312
if isinstance(value, str):
1413
return {"S": value}
1514
elif isinstance(value, bool):
@@ -29,7 +28,20 @@ def map_dynamo_type(value: Any):
2928
return {"S": value}
3029

3130

32-
def upload_to_s3(s3_client, bucket, filepath, dry_run=False):
31+
def load_json_lines(filepath: Union[str, Path]) -> Generator[Dict[str, Any], None, None]:
32+
with open(filepath) as f:
33+
for line in f:
34+
if line.strip():
35+
yield json.loads(line)
36+
37+
38+
def upload_to_s3(
39+
s3_client: Any,
40+
bucket: str,
41+
filepath: Union[str, Path],
42+
dry_run: bool = False
43+
) -> None:
44+
3345
filename = os.path.basename(filepath)
3446
print(f"Filepath: {filepath}")
3547
s3_key = f"manual-uploads/{filename}"
@@ -45,21 +57,23 @@ def upload_to_s3(s3_client, bucket, filepath, dry_run=False):
4557
print(f"Failed to upload {filepath}: {e}")
4658

4759

48-
def upload_to_dynamo(dynamo_client, table_name, filepath):
49-
with open(filepath) as f:
50-
item = json.load(f)
60+
def upload_to_dynamo(
61+
dynamo_client: Any,
62+
table_name: str,
63+
filepath: Union[str, Path],
64+
) -> None:
5165

52-
try:
53-
dynamo_client.put_item(
54-
TableName=table_name, Item={key: map_dynamo_type(value) for key, value in item.items()}
55-
)
56-
print(f"Uploaded {filepath} to DynamoDB table {table_name}")
57-
except Exception as e:
58-
print(f"Failed to upload {filepath}: {e}")
66+
for item in load_json_lines(filepath):
67+
try:
68+
dynamo_client.put_item(
69+
TableName=table_name, Item={key: map_dynamo_type(value) for key, value in item.items()}
70+
)
71+
print(f"Uploaded {filepath} to DynamoDB table {table_name}")
72+
except Exception as e:
73+
print(f"Failed to upload {filepath}: {e}")
5974

6075

61-
def run_upload(args=None):
62-
print("\n\n\n***** We are in main *****\n\n\n")
76+
def run_upload(args: Optional[List[str]] = None) -> None:
6377
parser = argparse.ArgumentParser()
6478
parser.add_argument("--env")
6579
parser.add_argument("--upload-s3", type=Path)

tests/utils/manual_s3_dynamo_upload_tests.py

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,26 @@
88
import boto3
99
from moto import mock_aws
1010

11-
from scripts.manual_uploads.manual_s3_dynamo_upload import run_upload
11+
from scripts.manual_uploads.manual_s3_dynamo_upload import run_upload, map_dynamo_type
1212

1313

1414
@pytest.fixture
1515
def test_data_dir(tmp_path):
16-
data = {"ID_NUMBER": "123", "ATTRIBUTE_TYPE": "Test", "value": 99}
16+
# {"NHS_NUMBER": "C456", "ATTRIBUTE_TYPE": "COHORTS", "COHORT_MEMBERSHIPS": [{"COHORT_LABEL": "under_75", "DATE_JOINED": "2025-01-01"},{"COHORT_LABEL": "over_75", "DATE_JOINED": "2025-01-01"}], "ACTION_FLAG": "ADD", "HASH": "fake_hash_delete_sim", "sync_time": "2025-05-01 12:00:00"}
17+
data = [
18+
{"NHS_NUMBER": "1234567890", "ATTRIBUTE_TYPE": "COHORTS", "COHORT_MEMBERSHIPS": [{"COHORT_LABEL": "under_75", "DATE_JOINED": "2025-01-01"}]},
19+
{"NHS_NUMBER": "2345678901", "ATTRIBUTE_TYPE": "COHORTS", "COHORT_MEMBERSHIPS": [{"COHORT_LABEL": "over_75", "DATE_JOINED": "2025-01-01"}]},
20+
{"NHS_NUMBER": "3456789012", "ATTRIBUTE_TYPE": "COHORTS", "COHORT_MEMBERSHIPS": [{"COHORT_LABEL": "16+_covid", "DATE_JOINED": "2025-01-01"}]}
21+
]
1722
file_path = tmp_path / "test.json"
1823
with open(file_path, "w") as f:
19-
json.dump(data, f)
24+
for item in data:
25+
f.write(json.dumps(item) + "\n")
2026
return tmp_path, data
2127

2228

2329
@mock_aws
24-
def test_script_cli_end_to_end(test_data_dir, capsys):
30+
def test_script_cli_end_to_end(test_data_dir):
2531
# Arrange
2632
data_dir, expected_data = test_data_dir
2733
env = "test"
@@ -39,41 +45,44 @@ def test_script_cli_end_to_end(test_data_dir, capsys):
3945
dynamodb.create_table(
4046
TableName=dynamo_table,
4147
KeySchema=[
42-
{"AttributeName": "ID_NUMBER", "KeyType": "HASH"},
48+
{"AttributeName": "NHS_NUMBER", "KeyType": "HASH"},
4349
{"AttributeName": "ATTRIBUTE_TYPE", "KeyType": "RANGE"}
4450
],
4551
AttributeDefinitions=[
46-
{"AttributeName": "ID_NUMBER", "AttributeType": "S"},
52+
{"AttributeName": "NHS_NUMBER", "AttributeType": "S"},
4753
{"AttributeName": "ATTRIBUTE_TYPE", "AttributeType": "S"}
4854
],
4955
BillingMode="PAY_PER_REQUEST"
5056
)
5157

5258
# Act
53-
return_code = run_upload([
59+
run_upload([
5460
"--env", env,
5561
"--upload-s3", str(data_dir),
5662
"--upload-dynamo", str(data_dir),
5763
"--region", region,
5864
"--s3-bucket", s3_bucket,
5965
"--dynamo-table", dynamo_table
6066
])
61-
captured = capsys.readouterr()
6267

63-
# Assert
6468
key = f"manual-uploads/test.json"
6569
obj = s3.get_object(Bucket=s3_bucket, Key=key)
66-
uploaded_s3_data = json.load(obj["Body"])
67-
assert uploaded_s3_data == expected_data
70+
body = obj["Body"].read().decode("utf-8")
71+
uploaded_s3_data = [json.loads(line) for line in body.splitlines() if line.strip()]
6872

69-
item = dynamodb.get_item(
70-
TableName=dynamo_table,
71-
Key={
72-
"ID_NUMBER": {"S": expected_data["ID_NUMBER"]},
73-
"ATTRIBUTE_TYPE": {"S": expected_data["ATTRIBUTE_TYPE"]}
73+
dynamo_items = []
74+
for expected_item in expected_data:
75+
key = {
76+
"NHS_NUMBER": {"S": expected_item["NHS_NUMBER"]},
77+
"ATTRIBUTE_TYPE": {"S": expected_item["ATTRIBUTE_TYPE"]}
7478
}
75-
)["Item"]
76-
assert item["value"]["N"] == "99"
79+
response = dynamodb.get_item(TableName=dynamo_table, Key=key)
80+
item = response.get("Item")
81+
assert item is not None, f"Missing item for key {key}"
82+
dynamo_items.append(item)
7783

78-
assert "Uploaded" in captured.out
79-
assert "Error" not in captured.err
84+
expected_dynamo_items = [{k: map_dynamo_type(v) for k, v in item.items()} for item in expected_data]
85+
86+
# Assert
87+
assert uploaded_s3_data == expected_data
88+
assert dynamo_items == expected_dynamo_items

0 commit comments

Comments
 (0)