Skip to content
This repository was archived by the owner on Aug 8, 2025. It is now read-only.

Commit 53a3710

Browse files
committed
Add in JSON schema for YAML validation of plan, tasks and validations
1 parent 3eaf43b commit 53a3710

12 files changed

Lines changed: 1383 additions & 14 deletions

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,24 @@ Want some YAML instead? Also, no worries. Check the example [plan](docker/data/c
3434
to [DocumentationPlanRun.scala](src/main/scala/io/github/datacatering/plan/DocumentationPlanRun.scala)
3535
1. Needs to extend `io.github.datacatering.datacaterer.api.PlanRun`
3636

37+
### YAML
38+
39+
1. Copy existing plan file (such as [foreign-key.yaml](docker/data/custom/plan/foreign-key.yaml)) in directory
40+
[docker/data/custom/plan](docker/data/custom/plan)
41+
2. Copy existing task file (such as [json-account-task.yaml](docker/data/custom/task/file/json/json-account-task.yaml))
42+
in directory [docker/data/custom/task](docker/data/custom/task)
43+
1. If you want to run data validations, copy the file [simple-validation.yaml](docker/data/custom/validation/simple-validation.yaml)
44+
and add validation to plan via:
45+
```yaml
46+
validations:
47+
- "<name of validation (i.e. account_checks)>"
48+
```
49+
3. [Use JSON schema to help creating metadata for plan, tasks or validations](schema/data-caterer-latest.json).
50+
You can import this schema into your IDE for validation of your YAML files. Links below show how you can import the schema:
51+
- [IntelliJ](https://www.jetbrains.com/help/idea/json.html#ws_json_schema_add_custom)
52+
- [VS Code](https://code.visualstudio.com/docs/languages/json#_json-schemas-and-settings)
53+
54+
3755
## Run
3856
3957
Requires:
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: "foreign_key_example_plan"
2+
description: "Create account data in JSON and Postgres"
3+
tasks:
4+
- name: "json_account_file"
5+
dataSourceName: "json"
6+
enabled: true
7+
- name: "postgres_account"
8+
dataSourceName: "postgresCustomer"
9+
enabled: true
10+
11+
sinkOptions:
12+
foreignKeys:
13+
- source:
14+
dataSource: "postgresCustomer"
15+
step: "accounts"
16+
fields: [ "account_number" ]
17+
generate:
18+
- dataSource: "json"
19+
step: "account"
20+
fields: [ "account_id" ]
21+
22+
validations:
23+
- "json_account_checks"

docker/data/custom/task/file/csv/csv-transaction-task.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ steps:
2121
options:
2222
expression: "#{Name.name}"
2323
- name: "year"
24-
type: "int"
24+
type: "integer"
2525
options:
2626
min: 2021
2727
max: 2022

docker/data/custom/task/file/json/json-account-task.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ steps:
99
fields:
1010
- name: "account_id"
1111
- name: "year"
12-
type: "int"
12+
type: "integer"
1313
options:
1414
min: 2021
1515
max: 2022
@@ -40,6 +40,7 @@ steps:
4040
options:
4141
sql: "element_at(sort_array(transactions.txn_date), 1)"
4242
- name: "details"
43+
type: struct
4344
fields:
4445
- name: "name"
4546
- name: "txn_date"
@@ -48,6 +49,7 @@ steps:
4849
min: "2021-01-01"
4950
max: "2021-12-31"
5051
- name: "updated_by"
52+
type: struct
5153
fields:
5254
- name: "user"
5355
- name: "time"
@@ -60,4 +62,4 @@ steps:
6062
- name: "amount"
6163
type: "double"
6264
- name: "tags"
63-
type: "array<string>"
65+
type: "array"

docker/data/custom/task/http/http-account-task-simple.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,28 @@ steps:
55
records: 50
66
fields:
77
- name: "httpUrl"
8+
type: struct
89
fields:
910
- name: "url"
1011
static: "http://localhost:80/anything/{id}"
1112
- name: "method"
1213
static: "PUT"
1314
- name: "pathParam"
15+
type: array
1416
fields:
1517
- name: "id"
1618
options:
1719
sql: "body.account_id"
1820
- name: "queryParam"
21+
type: array
1922
fields:
2023
- name: "limit"
2124
type: "integer"
2225
options:
2326
min: 1
2427
max: 10
2528
- name: "httpHeaders"
29+
type: array
2630
fields:
2731
- name: "Content-Type"
2832
static: "application/json"
@@ -35,6 +39,7 @@ steps:
3539
options:
3640
sql: "body.details.updated_by.time"
3741
- name: "httpBody"
42+
type: struct
3843
fields:
3944
- name: "account_id"
4045
options:
@@ -50,6 +55,7 @@ steps:
5055
min: 10.0
5156
max: 100.0
5257
- name: "details"
58+
type: struct
5359
fields:
5460
- name: "name"
5561
- name: "txn_date"
@@ -58,6 +64,7 @@ steps:
5864
min: "2021-01-01"
5965
max: "2021-12-31"
6066
- name: "updated_by"
67+
type: struct
6168
fields:
6269
- name: "user"
6370
- name: "time"

docker/data/custom/task/http/http-account-task.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ steps:
2323
options:
2424
sql: "to_json(content)"
2525
- name: "content"
26+
type: struct
2627
fields:
2728
- name: "account_id"
2829
- name: "year"
@@ -36,6 +37,7 @@ steps:
3637
min: 10.0
3738
max: 100.0
3839
- name: "details"
40+
type: struct
3941
fields:
4042
- name: "name"
4143
- name: "txn_date"
@@ -44,6 +46,7 @@ steps:
4446
min: "2021-01-01"
4547
max: "2021-12-31"
4648
- name: "updated_by"
49+
type: struct
4750
fields:
4851
- name: "user"
4952
- name: "time"

docker/data/custom/task/jdbc/postgres/postgres-account-task.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ steps:
33
- name: "accounts"
44
type: "postgres"
55
count:
6-
records: "1000"
6+
records: 1000
77
options:
88
dbtable: "account.accounts"
99
fields:
@@ -32,7 +32,7 @@ steps:
3232
- name: "customer_id_bigint"
3333
type: "long"
3434
- name: "customer_id_decimal"
35-
type: "decimal(26,3)"
35+
type: "decimal"
3636
- name: "customer_id_real"
3737
type: "float"
3838
- name: "customer_id_double"

docker/data/custom/task/jms/solace/jms-account-task.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name: "json_account_jms"
22
steps:
33
- name: "jms_account"
4-
type: "json"
4+
type: "solace"
55
count:
66
records: 50
77
options:
@@ -12,10 +12,11 @@ steps:
1212
options:
1313
sql: "to_json(content)"
1414
- name: "content"
15+
type: struct
1516
fields:
1617
- name: "account_id"
1718
- name: "year"
18-
type: "int"
19+
type: "integer"
1920
options:
2021
min: 2021
2122
max: 2022
@@ -25,6 +26,7 @@ steps:
2526
min: 10.0
2627
max: 100.0
2728
- name: "details"
29+
type: "struct"
2830
fields:
2931
- name: "name"
3032
- name: "txn_date"
@@ -33,6 +35,7 @@ steps:
3335
min: "2021-01-01"
3436
max: "2021-12-31"
3537
- name: "updated_by"
38+
type: "struct"
3639
fields:
3740
- name: "user"
3841
- name: "time"

docker/data/custom/task/kafka/kafka-account-task-simple.yaml

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
name: "simple_kafka"
22
steps:
33
- name: "kafka_account"
4-
type: "json"
4+
type: "kafka"
55
count:
6-
records: "10"
6+
records: 10
77
options:
88
topic: "account-topic"
99
fields:
@@ -12,26 +12,29 @@ steps:
1212
options:
1313
sql: "body.account_id"
1414
- name: "messageBody"
15+
type: struct
1516
fields:
1617
- name: "account_id"
1718
- name: "year"
1819
type: "int"
1920
options:
20-
min: "2021"
21-
max: "2022"
21+
min: 2021
22+
max: 2022
2223
- name: "amount"
2324
type: "double"
2425
options:
25-
min: "10.0"
26-
max: "100.0"
26+
min: 10.0
27+
max: 100.0
2728
- name: "details"
29+
type: struct
2830
fields:
2931
- name: "name"
3032
- name: "first_txn_date"
3133
type: "date"
3234
options:
3335
sql: "ELEMENT_AT(SORT_ARRAY(body.transactions.txn_date), 1)"
3436
- name: "updated_by"
37+
type: struct
3538
fields:
3639
- name: "user"
3740
- name: "time"
@@ -44,6 +47,7 @@ steps:
4447
- name: "amount"
4548
type: "double"
4649
- name: "messageHeaders"
50+
type: struct
4751
fields:
4852
- name: "account-id"
4953
options:

docker/data/custom/task/kafka/kafka-account-task.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name: "json_account_kafka"
22
steps:
33
- name: "kafka_account"
4-
type: "json"
4+
type: "kafka"
55
count:
66
records: 10
77
options:
@@ -26,6 +26,7 @@ steps:
2626
# options:
2727
# sql: "1"
2828
- name: "content"
29+
type: struct
2930
fields:
3031
- name: "account_id"
3132
- name: "year"
@@ -39,6 +40,7 @@ steps:
3940
min: 10.0
4041
max: 100.0
4142
- name: "details"
43+
type: struct
4244
fields:
4345
- name: "name"
4446
- name: "txn_date"
@@ -47,6 +49,7 @@ steps:
4749
min: "2021-01-01"
4850
max: "2021-12-31"
4951
- name: "updated_by"
52+
type: struct
5053
fields:
5154
- name: "user"
5255
- name: "time"

0 commit comments

Comments
 (0)