Skip to content

Commit 4b51187

Browse files
authored
Merge pull request #29 from dataiku/bug/sc-104770-error-message-w-non-json
Bug/sc 104770 error message with non json API
2 parents 9ec6f73 + ee2a320 commit 4b51187

9 files changed

Lines changed: 97 additions & 40 deletions

File tree

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
## [Version 1.1.2](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.1.2) - Bugfix release - 2022-10-19
4+
5+
- Fix for last page of RFC5988 pagination triggering loop condtion
6+
- Fix for RFC5988 pagination taking priority on other modes
7+
- Improved pagination logs
8+
- Improved error message upon receiving non JSON material
9+
310
## [Version 1.1.1](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.1.1) - Feature release - 2022-09-22
411

512
- Handling relative URLs in paginated APIs

custom-recipes/api-connect/recipe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def get_partitioning_keys(id_list, dku_flow_variables):
2424
partitioning_keys[dimension] = dku_flow_variables.get(dimension_src)
2525
return partitioning_keys
2626

27+
logger.info('API-Connect plugin recipe v1.1.2')
2728

2829
input_A_names = get_input_names_for_role('input_A_role')
2930
config = get_recipe_config()

plugin.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"id": "api-connect",
3-
"version": "1.1.1",
3+
"version": "1.1.2",
44
"meta": {
55
"label": "API Connect",
66
"description": "Retrieve data from any REST API",

python-connectors/api-connect_dataset/connector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class RestAPIConnector(Connector):
1414

1515
def __init__(self, config, plugin_config):
1616
Connector.__init__(self, config, plugin_config) # pass the parameters to the base class
17-
17+
logger.info('API-Connect plugin connector v1.1.2')
1818
logger.info("config={}".format(logger.filter_secrets(config)))
1919
endpoint_parameters = get_endpoint_parameters(config)
2020
credential = config.get("credential", {})

python-lib/dku_utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ def get_endpoint_parameters(configuration):
2727
"next_page_url_key", "is_next_page_url_relative", "next_page_url_base",
2828
"top_key", "skip_key", "maximum_number_rows"
2929
]
30-
parameters = {endpoint_parameter: configuration.get(endpoint_parameter) for endpoint_parameter in endpoint_parameters if configuration.get(endpoint_parameter) is not None}
30+
parameters = {
31+
endpoint_parameter: configuration.get(endpoint_parameter) for endpoint_parameter in endpoint_parameters if configuration.get(endpoint_parameter) is not None
32+
}
3133
return parameters
3234

3335

python-lib/pagination.py

Lines changed: 66 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,11 @@
77

88
class Pagination(object):
99

10-
def __init__(self, config=None, skip_key=None, limit_key=None, total_key=None, next_page_key=None):
10+
def __init__(self):
1111
self.next_page_key = None
1212
self.next_page_url_base = None
1313
self.skip_key = None
14-
self.limit_key = None
15-
self.total_key = None
16-
self.total = None
1714
self.next_page_url = None
18-
self.remaining_records = None
1915
self.records_to_skip = None
2016
self.pagination_type = ""
2117
self.counting_key = None
@@ -26,10 +22,11 @@ def __init__(self, config=None, skip_key=None, limit_key=None, total_key=None, n
2622
self.next_page_number = None
2723
self.params_must_be_blanked = False
2824
self.data_is_list = None
25+
self.update_next_page = self.update_next_page_default
2926

30-
def configure_paging(self, config=None, skip_key=None, limit_key=None, total_key=None, next_page_key=None, next_page_url_base=None, url=None, pagination_type="na"):
27+
def configure_paging(self, config=None, skip_key=None,
28+
next_page_key=None, next_page_url_base=None, pagination_type="na"):
3129
config = {} if config is None else config
32-
self.limit_key = config.get("limit_key", limit_key)
3330
self.pagination_type = config.get("pagination_type", pagination_type)
3431
if self.pagination_type == "next_page":
3532
self.next_page_key = config.get("next_page_key", next_page_key)
@@ -39,9 +36,19 @@ def configure_paging(self, config=None, skip_key=None, limit_key=None, total_key
3936
self.next_page_url_base = next_page_url_base
4037
elif self.pagination_type in ["offset", "page"]:
4138
self.skip_key = config.get("skip_key", skip_key)
39+
logger.info("configure_paging: self.pagination_type='{}', self.next_page_key='{}', self.next_page_url_base='{}', self.skip_key='{}'".format(
40+
self.pagination_type, self.next_page_key, self.next_page_url_base, self.skip_key
41+
))
42+
if self.pagination_type == "next_page":
43+
self.update_next_page = self.update_next_page_link
44+
elif self.pagination_type == "offset":
45+
self.update_next_page = self.update_next_page_offset
46+
elif self.pagination_type == "page":
47+
self.update_next_page = self.update_next_page_per_page
48+
else:
49+
self.update_next_page = self.update_next_page_default
4250

4351
def reset_paging(self, counting_key=None, url=None):
44-
self.remaining_records = 0
4552
self.records_to_skip = 0
4653
self.counting_key = counting_key
4754
self.counter = 0
@@ -53,52 +60,75 @@ def reset_paging(self, counting_key=None, url=None):
5360
self.is_last_batch_empty = False
5461
self.is_first_batch = True
5562
self.is_paging_started = True
63+
logger.info("reset_paging:next_page_url={}, counting_key={}, next_page_number={}".format(self.next_page_url, self.counting_key, self.next_page_number))
5664

5765
def set_counting_key(self, counting_key):
5866
self.counting_key = counting_key
67+
logger.info("set_counting_key: counting_key set to {}".format(self.counting_key))
5968

60-
def update_next_page(self, data, response_links=None):
61-
response_links = response_links or {}
62-
next_link = response_links.get('next', {})
63-
next_page_url = next_link.get("url")
64-
self.is_first_batch = False
65-
self.counter += 1
66-
self.next_page_number = self.next_page_number + 1
67-
if next_page_url:
68-
self.next_page_url = next_page_url
69-
self.params_must_be_blanked = True
69+
def compute_batch_size(self, data):
7070
self.data_is_list = False
7171
if isinstance(data, list):
7272
self.data_is_list = True
7373
batch_size = len(data)
74-
self.records_to_skip = self.records_to_skip + batch_size
75-
if batch_size == 0:
76-
self.is_last_batch_empty = True
77-
return
7874
elif self.counting_key:
7975
extracted_data = get_value_from_path(data, self.counting_key.split("."), can_raise=False)
8076
if extracted_data:
8177
batch_size = len(extracted_data)
8278
else:
8379
batch_size = 0
84-
self.is_last_batch_empty = True
8580
else:
8681
batch_size = 1
82+
if batch_size == 0:
83+
self.is_last_batch_empty = True
84+
return batch_size
85+
86+
def update_next_page_offset(self, data, response_links=None):
87+
self.is_first_batch = False
88+
self.counter += 1
89+
batch_size = self.compute_batch_size(data)
90+
self.records_to_skip = self.records_to_skip + batch_size
91+
logger.info("update_next_page_offset:data_is_list={}, records_to_skip={}, batch_size={}, is_last_batch_empty={}".format(
92+
self.data_is_list, self.records_to_skip, batch_size, self.is_last_batch_empty
93+
))
94+
95+
def update_next_page_per_page(self, data, response_links=None):
96+
self.is_first_batch = False
97+
self.counter += 1
98+
self.next_page_number = self.next_page_number + 1
99+
batch_size = self.compute_batch_size(data)
100+
logger.info("update_next_page_per_page:data_is_list={}, next_page_number={}, batch_size={}, is_last_batch_empty={}".format(
101+
self.data_is_list, self.next_page_number, batch_size, self.is_last_batch_empty
102+
))
103+
104+
def update_next_page_link(self, data, response_links=None):
105+
self.is_first_batch = False
106+
self.counter += 1
107+
self.next_page_url = None
108+
self.data_is_list = False
109+
87110
if self.next_page_key:
88111
next_page_path = extract_key_using_json_path(data, self.next_page_key)
89112
if self.next_page_url_base and next_page_path:
90113
self.next_page_url = "/".join([self.next_page_url_base, next_page_path])
91114
else:
92115
self.next_page_url = next_page_path
93-
if self.skip_key:
94-
self.skip = data.get(self.skip_key)
95-
if self.limit_key:
96-
self.limit = data.get(self.limit_key)
97-
if self.total_key:
98-
self.total = data.get(self.total_key)
99-
self.records_to_skip = self.records_to_skip + batch_size
100-
if self.total:
101-
self.remaining_records = self.total - self.records_to_skip
116+
logger.info("update_next_page_link:next_page_url_base={}, next_page_path={}, next_page_url={}".format(
117+
self.next_page_url_base, next_page_path, self.next_page_url
118+
))
119+
else:
120+
response_links = response_links or {}
121+
next_link = response_links.get('next', {})
122+
next_page_url = next_link.get("url")
123+
if next_page_url:
124+
self.next_page_url = next_page_url
125+
self.params_must_be_blanked = True
126+
logger.info("update_next_page_link:next_link={}, next_page_url={}, params_must_be_blanked={}, next_page_number={}, counter={}".format(
127+
next_link, self.next_page_url, self.params_must_be_blanked, self.next_page_number, self.counter
128+
))
129+
130+
def update_next_page_default(self, data, response_links=None):
131+
self.is_first_batch = False
102132

103133
def has_next_page(self):
104134
if self.is_last_batch_empty:
@@ -118,14 +148,18 @@ def has_next_page(self):
118148
if self.pagination_type in ["page", "offset"]:
119149
if self.counting_key:
120150
# There is a counting key and we already know the last batch was not empty
151+
logger.info("has_next_page:pagination_type={}, counting_key={} -> True".format(self.pagination_type, self.counting_key))
121152
return True
122153
else:
123154
if self.data_is_list:
124155
# for lists is_last_batch_empty is set correctly and handled by the code above
156+
logger.info("has_next_page:pagination_type={}, data_is_list={} -> True".format(self.pagination_type, self.data_is_list))
125157
return True
126158
# Without a counting_key we have no mean to know if the last batch was empty.
127159
# To avoid infinite loop we stop pagination here
160+
logger.info("has_next_page:pagination_type={} -> False".format(self.pagination_type))
128161
return False
162+
logger.info("has_next_page:pagination_type={} -> False".format(self.pagination_type))
129163
return False
130164

131165
def get_params(self):

python-lib/rest_api_client.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,17 +64,14 @@ def __init__(self, credential, endpoint, custom_key_values={}):
6464
is_next_page_url_relative = endpoint.get("is_next_page_url_relative", False)
6565
next_page_url_base = endpoint.get("next_page_url_base", None) if is_next_page_url_relative else None
6666
next_page_url_base = format_template(next_page_url_base, **self.presets_variables)
67-
top_key = endpoint.get("top_key")
6867
skip_key = endpoint.get("skip_key")
6968
pagination_type = endpoint.get("pagination_type", "na")
70-
if pagination_type=="next_page" and is_next_page_url_relative and not next_page_url_base:
69+
if pagination_type == "next_page" and is_next_page_url_relative and not next_page_url_base:
7170
raise RestAPIClientError("Pagination's 'Next page URL' is relative but no 'Base URL to next page' has been set")
7271
self.pagination.configure_paging(
7372
skip_key=skip_key,
74-
limit_key=top_key,
7573
next_page_key=next_page_url_key,
7674
next_page_url_base=next_page_url_base,
77-
url=self.endpoint_url,
7875
pagination_type=pagination_type
7976
)
8077
self.last_interaction = None
@@ -157,7 +154,17 @@ def request(self, method, url, can_raise_exeption=True, **kwargs):
157154
if response.status_code in [204]:
158155
self.pagination.update_next_page({}, response.links)
159156
return self.empty_json_response()
160-
json_response = response.json()
157+
try:
158+
json_response = response.json()
159+
except Exception as err:
160+
self.pagination.update_next_page({}, None)
161+
error_message = "Error '{}' when decoding JSON".format(str(err)[:100])
162+
logger.error(error_message)
163+
logger.error("response.content={}".format(response.content))
164+
if can_raise_exeption:
165+
raise RestAPIClientError("The API did not return JSON as expected. {}".format(error_message))
166+
return {"error": error_message}
167+
161168
self.pagination.update_next_page(json_response, response.links)
162169
return json_response
163170

python-lib/rest_api_recipe_session.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ def format_page_rows(self, data_rows, is_raw_output, metadata=None):
124124

125125

126126
def is_error_message(jsons_response):
127+
if type(jsons_response) not in [dict, list]:
128+
return False
127129
if "error" in jsons_response and len(jsons_response) == 1:
128130
return True
129131
else:

tests/python/integration/test_scenario.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,7 @@ def test_run_api_connect_ntlm_authentication(user_dss_clients):
4141

4242
def test_run_api_connect_relative_url_pagination(user_dss_clients):
4343
dss_scenario.run(user_dss_clients, project_key=TEST_PROJECT_KEY, scenario_id="RELATIVEURLPAGINATION")
44+
45+
46+
def test_run_api_connect_check_sc_110446(user_dss_clients):
47+
dss_scenario.run(user_dss_clients, project_key=TEST_PROJECT_KEY, scenario_id="CHECKSC110446")

0 commit comments

Comments
 (0)