Skip to content

Commit ee2a320

Browse files
authored
Merge pull request #30 from dataiku/bug/sc-104935-rfc5988-takes-priority-on-offset-pagination
Spliting update_next_page method
2 parents be23c4e + 04769a1 commit ee2a320

9 files changed

Lines changed: 77 additions & 55 deletions

File tree

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
## [Version 1.1.2](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.1.2) - Bugfix release - 2022-10-19
4+
5+
- Fix for last page of RFC5988 pagination triggering loop condtion
6+
- Fix for RFC5988 pagination taking priority on other modes
7+
- Improved pagination logs
8+
- Improved error message upon receiving non JSON material
9+
310
## [Version 1.1.1](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.1.1) - Feature release - 2022-09-22
411

512
- Handling relative URLs in paginated APIs

custom-recipes/api-connect/recipe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def get_partitioning_keys(id_list, dku_flow_variables):
2424
partitioning_keys[dimension] = dku_flow_variables.get(dimension_src)
2525
return partitioning_keys
2626

27+
logger.info('API-Connect plugin recipe v1.1.2')
2728

2829
input_A_names = get_input_names_for_role('input_A_role')
2930
config = get_recipe_config()

plugin.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"id": "api-connect",
3-
"version": "1.1.1",
3+
"version": "1.1.2",
44
"meta": {
55
"label": "API Connect",
66
"description": "Retrieve data from any REST API",

python-connectors/api-connect_dataset/connector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class RestAPIConnector(Connector):
1414

1515
def __init__(self, config, plugin_config):
1616
Connector.__init__(self, config, plugin_config) # pass the parameters to the base class
17-
17+
logger.info('API-Connect plugin connector v1.1.2')
1818
logger.info("config={}".format(logger.filter_secrets(config)))
1919
endpoint_parameters = get_endpoint_parameters(config)
2020
credential = config.get("credential", {})

python-lib/dku_utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ def get_endpoint_parameters(configuration):
2727
"next_page_url_key", "is_next_page_url_relative", "next_page_url_base",
2828
"top_key", "skip_key", "maximum_number_rows"
2929
]
30-
parameters = {endpoint_parameter: configuration.get(endpoint_parameter) for endpoint_parameter in endpoint_parameters if configuration.get(endpoint_parameter) is not None}
30+
parameters = {
31+
endpoint_parameter: configuration.get(endpoint_parameter) for endpoint_parameter in endpoint_parameters if configuration.get(endpoint_parameter) is not None
32+
}
3133
return parameters
3234

3335

python-lib/pagination.py

Lines changed: 56 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,11 @@
77

88
class Pagination(object):
99

10-
def __init__(self, config=None, skip_key=None, limit_key=None, total_key=None, next_page_key=None):
10+
def __init__(self):
1111
self.next_page_key = None
1212
self.next_page_url_base = None
1313
self.skip_key = None
14-
self.limit_key = None
15-
self.total_key = None
16-
self.total = None
1714
self.next_page_url = None
18-
self.remaining_records = None
1915
self.records_to_skip = None
2016
self.pagination_type = ""
2117
self.counting_key = None
@@ -26,10 +22,11 @@ def __init__(self, config=None, skip_key=None, limit_key=None, total_key=None, n
2622
self.next_page_number = None
2723
self.params_must_be_blanked = False
2824
self.data_is_list = None
25+
self.update_next_page = self.update_next_page_default
2926

30-
def configure_paging(self, config=None, skip_key=None, limit_key=None, total_key=None, next_page_key=None, next_page_url_base=None, url=None, pagination_type="na"):
27+
def configure_paging(self, config=None, skip_key=None,
28+
next_page_key=None, next_page_url_base=None, pagination_type="na"):
3129
config = {} if config is None else config
32-
self.limit_key = config.get("limit_key", limit_key)
3330
self.pagination_type = config.get("pagination_type", pagination_type)
3431
if self.pagination_type == "next_page":
3532
self.next_page_key = config.get("next_page_key", next_page_key)
@@ -39,12 +36,19 @@ def configure_paging(self, config=None, skip_key=None, limit_key=None, total_key
3936
self.next_page_url_base = next_page_url_base
4037
elif self.pagination_type in ["offset", "page"]:
4138
self.skip_key = config.get("skip_key", skip_key)
42-
logger.info("configure_paging: self.limit_key='{}', self.pagination_type='{}', self.next_page_key='{}', self.next_page_url_base='{}', self.skip_key='{}'".format(
43-
self.limit_key, self.pagination_type, self.next_page_key, self.next_page_url_base, self.skip_key
39+
logger.info("configure_paging: self.pagination_type='{}', self.next_page_key='{}', self.next_page_url_base='{}', self.skip_key='{}'".format(
40+
self.pagination_type, self.next_page_key, self.next_page_url_base, self.skip_key
4441
))
42+
if self.pagination_type == "next_page":
43+
self.update_next_page = self.update_next_page_link
44+
elif self.pagination_type == "offset":
45+
self.update_next_page = self.update_next_page_offset
46+
elif self.pagination_type == "page":
47+
self.update_next_page = self.update_next_page_per_page
48+
else:
49+
self.update_next_page = self.update_next_page_default
4550

4651
def reset_paging(self, counting_key=None, url=None):
47-
self.remaining_records = 0
4852
self.records_to_skip = 0
4953
self.counting_key = counting_key
5054
self.counter = 0
@@ -62,65 +66,70 @@ def set_counting_key(self, counting_key):
6266
self.counting_key = counting_key
6367
logger.info("set_counting_key: counting_key set to {}".format(self.counting_key))
6468

65-
def update_next_page(self, data, response_links=None):
66-
response_links = response_links or {}
67-
next_link = response_links.get('next', {})
68-
next_page_url = next_link.get("url")
69-
self.is_first_batch = False
70-
self.counter += 1
71-
self.next_page_number = self.next_page_number + 1
72-
if next_page_url:
73-
self.next_page_url = next_page_url
74-
self.params_must_be_blanked = True
75-
logger.info("update_next_page:next_link={}, next_page_url={}, params_must_be_blanked={}, next_page_number={}, counter={}".format(
76-
next_link, self.next_page_url, self.params_must_be_blanked, self.next_page_number, self.counter
77-
))
69+
def compute_batch_size(self, data):
7870
self.data_is_list = False
7971
if isinstance(data, list):
8072
self.data_is_list = True
8173
batch_size = len(data)
82-
self.records_to_skip = self.records_to_skip + batch_size
83-
if batch_size == 0:
84-
self.is_last_batch_empty = True
85-
logger.info("update_next_page:update_next_page:data is list:batch_size={}, records_to_skip={}, is_last_batch_empty={}".format(
86-
batch_size, self.records_to_skip, self.is_last_batch_empty
87-
))
88-
return
8974
elif self.counting_key:
9075
extracted_data = get_value_from_path(data, self.counting_key.split("."), can_raise=False)
9176
if extracted_data:
9277
batch_size = len(extracted_data)
9378
else:
9479
batch_size = 0
95-
self.is_last_batch_empty = True
9680
else:
9781
batch_size = 1
82+
if batch_size == 0:
83+
self.is_last_batch_empty = True
84+
return batch_size
85+
86+
def update_next_page_offset(self, data, response_links=None):
87+
self.is_first_batch = False
88+
self.counter += 1
89+
batch_size = self.compute_batch_size(data)
90+
self.records_to_skip = self.records_to_skip + batch_size
91+
logger.info("update_next_page_offset:data_is_list={}, records_to_skip={}, batch_size={}, is_last_batch_empty={}".format(
92+
self.data_is_list, self.records_to_skip, batch_size, self.is_last_batch_empty
93+
))
94+
95+
def update_next_page_per_page(self, data, response_links=None):
96+
self.is_first_batch = False
97+
self.counter += 1
98+
self.next_page_number = self.next_page_number + 1
99+
batch_size = self.compute_batch_size(data)
100+
logger.info("update_next_page_per_page:data_is_list={}, next_page_number={}, batch_size={}, is_last_batch_empty={}".format(
101+
self.data_is_list, self.next_page_number, batch_size, self.is_last_batch_empty
102+
))
103+
104+
def update_next_page_link(self, data, response_links=None):
105+
self.is_first_batch = False
106+
self.counter += 1
107+
self.next_page_url = None
108+
self.data_is_list = False
109+
98110
if self.next_page_key:
99111
next_page_path = extract_key_using_json_path(data, self.next_page_key)
100112
if self.next_page_url_base and next_page_path:
101113
self.next_page_url = "/".join([self.next_page_url_base, next_page_path])
102114
else:
103115
self.next_page_url = next_page_path
104-
logger.info("update_next_page:next_page_url_base={}, next_page_path={}, next_page_url={}".format(
116+
logger.info("update_next_page_link:next_page_url_base={}, next_page_path={}, next_page_url={}".format(
105117
self.next_page_url_base, next_page_path, self.next_page_url
106118
))
107-
if self.skip_key:
108-
self.skip = data.get(self.skip_key)
109-
logger.info("update_next_page:skip=data[{}]={}".format(self.skip_key, self.skip))
110-
if self.limit_key:
111-
self.limit = data.get(self.limit_key)
112-
logger.info("update_next_page:limit=data[{}]={}".format(self.limit_key, self.limit))
113-
if self.total_key:
114-
self.total = data.get(self.total_key)
115-
logger.info("update_next_page:total=data[{}]={}".format(self.total_key, self.total))
116-
self.records_to_skip = self.records_to_skip + batch_size
117-
logger.info("update_next_page:records_to_skip={}, batch_size={}".format(self.records_to_skip, batch_size))
118-
if self.total:
119-
self.remaining_records = self.total - self.records_to_skip
120-
logger.info("update_next_page:remaining_records={}, total={}, records_to_skip={}".format(
121-
self.remaining_records, self.total, self.records_to_skip
119+
else:
120+
response_links = response_links or {}
121+
next_link = response_links.get('next', {})
122+
next_page_url = next_link.get("url")
123+
if next_page_url:
124+
self.next_page_url = next_page_url
125+
self.params_must_be_blanked = True
126+
logger.info("update_next_page_link:next_link={}, next_page_url={}, params_must_be_blanked={}, next_page_number={}, counter={}".format(
127+
next_link, self.next_page_url, self.params_must_be_blanked, self.next_page_number, self.counter
122128
))
123129

130+
def update_next_page_default(self, data, response_links=None):
131+
self.is_first_batch = False
132+
124133
def has_next_page(self):
125134
if self.is_last_batch_empty:
126135
logger.info("has_next_page:last was batch empty -> False")

python-lib/rest_api_client.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,17 +64,14 @@ def __init__(self, credential, endpoint, custom_key_values={}):
6464
is_next_page_url_relative = endpoint.get("is_next_page_url_relative", False)
6565
next_page_url_base = endpoint.get("next_page_url_base", None) if is_next_page_url_relative else None
6666
next_page_url_base = format_template(next_page_url_base, **self.presets_variables)
67-
top_key = endpoint.get("top_key")
6867
skip_key = endpoint.get("skip_key")
6968
pagination_type = endpoint.get("pagination_type", "na")
70-
if pagination_type=="next_page" and is_next_page_url_relative and not next_page_url_base:
69+
if pagination_type == "next_page" and is_next_page_url_relative and not next_page_url_base:
7170
raise RestAPIClientError("Pagination's 'Next page URL' is relative but no 'Base URL to next page' has been set")
7271
self.pagination.configure_paging(
7372
skip_key=skip_key,
74-
limit_key=top_key,
7573
next_page_key=next_page_url_key,
7674
next_page_url_base=next_page_url_base,
77-
url=self.endpoint_url,
7875
pagination_type=pagination_type
7976
)
8077
self.last_interaction = None
@@ -160,7 +157,7 @@ def request(self, method, url, can_raise_exeption=True, **kwargs):
160157
try:
161158
json_response = response.json()
162159
except Exception as err:
163-
self.pagination.update_next_page({}, response.links)
160+
self.pagination.update_next_page({}, None)
164161
error_message = "Error '{}' when decoding JSON".format(str(err)[:100])
165162
logger.error(error_message)
166163
logger.error("response.content={}".format(response.content))

python-lib/rest_api_recipe_session.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ def format_page_rows(self, data_rows, is_raw_output, metadata=None):
124124

125125

126126
def is_error_message(jsons_response):
127+
if type(jsons_response) not in [dict, list]:
128+
return False
127129
if "error" in jsons_response and len(jsons_response) == 1:
128130
return True
129131
else:

tests/python/integration/test_scenario.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,7 @@ def test_run_api_connect_ntlm_authentication(user_dss_clients):
4141

4242
def test_run_api_connect_relative_url_pagination(user_dss_clients):
4343
dss_scenario.run(user_dss_clients, project_key=TEST_PROJECT_KEY, scenario_id="RELATIVEURLPAGINATION")
44+
45+
46+
def test_run_api_connect_check_sc_110446(user_dss_clients):
47+
dss_scenario.run(user_dss_clients, project_key=TEST_PROJECT_KEY, scenario_id="CHECKSC110446")

0 commit comments

Comments
 (0)