Skip to content

Commit ff7cf35

Browse files
authored
Merge pull request #24 from dataiku/bug/sc-98256-offset-pagination-on-lists
[sc-98256] Handling offset pagination on APIs returning an array
2 parents 3c0a89b + 143499b commit ff7cf35

9 files changed

Lines changed: 60 additions & 28 deletions

File tree

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
# Changelog
22

33

4+
## [Version 1.1.0](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.1.0) - Feature and bugfix release - 2022-09-15
5+
6+
- Handling Offset pagination on APIs returning an array
7+
- Fix throttling calculation
8+
- Allow dots in `Key to next request URL`
9+
- Add NTLM authentication
10+
411
## [Version 1.0.6](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.0.6) - Feature and bugfix release - 2022-05-19
512

613
- Add "Follow authorization header" option
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
jsonpath-ng==1.5.3
2+
requests_ntlm==1.1.0

parameter-sets/credential/parameter-set.json

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@
3838
{
3939
"value": "bearer_token",
4040
"label": "Bearer token"
41+
},
42+
{
43+
"value": "ntlm",
44+
"label": "NTLM"
4145
}
4246
]
4347
},
@@ -46,14 +50,14 @@
4650
"label": "User name",
4751
"description": "Can be reused as {{username}}",
4852
"type": "STRING",
49-
"visibilityCondition": "model.login_type == 'basic_login'"
53+
"visibilityCondition": "['basic_login', 'ntlm'].includes(model.login_type)"
5054
},
5155
{
5256
"name": "password",
5357
"label": "Password",
5458
"description": "Can be reused as {{password}}",
5559
"type": "PASSWORD",
56-
"visibilityCondition": "model.login_type == 'basic_login'"
60+
"visibilityCondition": "['basic_login', 'ntlm'].includes(model.login_type)"
5761
},
5862
{
5963
"name": "token",

plugin.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"id": "api-connect",
3-
"version": "1.0.6",
3+
"version": "1.1.0",
44
"meta": {
55
"label": "API Connect",
66
"description": "Retrieve data from any REST API",

python-lib/dku_utils.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import copy
3+
from jsonpath_ng.ext import parse
34

45

56
def get_dku_key_values(endpoint_query_string):
@@ -46,7 +47,7 @@ def parse_keys_for_json(items):
4647
def get_value_from_path(dictionary, path, default=None, can_raise=True):
4748
ret = copy.deepcopy(dictionary)
4849
for key in path:
49-
if key in ret and isinstance(ret, dict):
50+
if isinstance(ret, dict) and (key in ret):
5051
ret = ret.get(key)
5152
else:
5253
error_message = "The extraction path {} was not found in the incoming data".format(path)
@@ -84,3 +85,12 @@ def format_template(template, **kwargs):
8485
def is_string(data):
8586
data_type = type(data).__name__
8687
return data_type in ["str", "unicode"]
88+
89+
90+
def extract_key_using_json_path(json_dictionary, json_path):
91+
matches = parse(json_path).find(json_dictionary)
92+
if matches:
93+
res = matches[0].value
94+
return res
95+
else:
96+
return None

python-lib/pagination.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from safe_logger import SafeLogger
2-
from dku_utils import get_value_from_path
2+
from dku_utils import get_value_from_path, extract_key_using_json_path
33

44

55
logger = SafeLogger("api-connect plugin Pagination")
@@ -24,14 +24,15 @@ def __init__(self, config=None, skip_key=None, limit_key=None, total_key=None, n
2424
self.is_paging_started = None
2525
self.next_page_number = None
2626
self.params_must_be_blanked = False
27+
self.data_is_list = None
2728

2829
def configure_paging(self, config=None, skip_key=None, limit_key=None, total_key=None, next_page_key=None, url=None, pagination_type="na"):
2930
config = {} if config is None else config
3031
self.limit_key = config.get("limit_key", limit_key)
3132
self.pagination_type = config.get("pagination_type", pagination_type)
3233
if self.pagination_type == "next_page":
3334
self.next_page_key = config.get("next_page_key", next_page_key)
34-
self.next_page_key = None if self.next_page_key == [''] else self.next_page_key
35+
self.next_page_key = None if self.next_page_key == '' else self.next_page_key
3536
elif self.pagination_type in ["offset", "page"]:
3637
self.skip_key = config.get("skip_key", skip_key)
3738

@@ -62,7 +63,9 @@ def update_next_page(self, data, response_links=None):
6263
if next_page_url:
6364
self.next_page_url = next_page_url
6465
self.params_must_be_blanked = True
66+
self.data_is_list = False
6567
if isinstance(data, list):
68+
self.data_is_list = True
6669
batch_size = len(data)
6770
self.records_to_skip = self.records_to_skip + batch_size
6871
if batch_size == 0:
@@ -77,8 +80,8 @@ def update_next_page(self, data, response_links=None):
7780
self.is_last_batch_empty = True
7881
else:
7982
batch_size = 1
80-
if self.next_page_key and (len(self.next_page_key) > 0):
81-
self.next_page_url = self.get_from_path(data, self.next_page_key)
83+
if self.next_page_key:
84+
self.next_page_url = extract_key_using_json_path(data, self.next_page_key)
8285
if self.skip_key:
8386
self.skip = data.get(self.skip_key)
8487
if self.limit_key:
@@ -89,17 +92,6 @@ def update_next_page(self, data, response_links=None):
8992
if self.total:
9093
self.remaining_records = self.total - self.records_to_skip
9194

92-
def get_from_path(self, dictionary, path):
93-
if isinstance(path, list):
94-
edge = dictionary
95-
for key in path:
96-
edge = edge.get(key)
97-
if edge is None:
98-
return None
99-
return edge
100-
else:
101-
return dictionary.get(path)
102-
10395
def has_next_page(self):
10496
if self.is_last_batch_empty:
10597
logger.info("has_next_page:last was batch empty -> False")
@@ -120,7 +112,11 @@ def has_next_page(self):
120112
# There is a counting key and we already know the last batch was not empty
121113
return True
122114
else:
123-
# No way to know if the last batch was empty so we stop here
115+
if self.data_is_list:
116+
# for lists is_last_batch_empty is set correctly and handled by the code above
117+
return True
118+
# Without a counting_key we have no mean to know if the last batch was empty.
119+
# To avoid infinite loop we stop pagination here
124120
return False
125121
return False
126122

python-lib/rest_api_client.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def __init__(self, credential, endpoint, custom_key_values={}):
6060

6161
self.requests_kwargs.update({"params": self.params})
6262
self.pagination = Pagination()
63-
next_page_url_key = endpoint.get("next_page_url_key", "").split('.')
63+
next_page_url_key = endpoint.get("next_page_url_key", "")
6464
top_key = endpoint.get("top_key")
6565
skip_key = endpoint.get("skip_key")
6666
pagination_type = endpoint.get("pagination_type", "na")
@@ -87,14 +87,21 @@ def __init__(self, credential, endpoint, custom_key_values={}):
8787
key_value_body = endpoint.get("key_value_body", {})
8888
self.requests_kwargs.update({"json": get_dku_key_values(key_value_body)})
8989
self.metadata = {}
90+
self.call_number = 0
9091

9192
def set_login(self, credential):
9293
login_type = credential.get("login_type", "no_auth")
9394
if login_type == "basic_login":
94-
self.username = credential.get("username", "")
95-
self.password = credential.get("password", "")
96-
self.auth = (self.username, self.password)
97-
self.requests_kwargs.update({"auth": self.auth})
95+
username = credential.get("username", "")
96+
password = credential.get("password", "")
97+
auth = (username, password)
98+
self.requests_kwargs.update({"auth": auth})
99+
if login_type == "ntlm":
100+
from requests_ntlm import HttpNtlmAuth
101+
username = credential.get("username", "")
102+
password = credential.get("password", "")
103+
auth = HttpNtlmAuth(username, password)
104+
self.requests_kwargs.update({"auth": auth})
98105
if login_type == "bearer_token":
99106
token = credential.get("token", "")
100107
bearer_template = credential.get("bearer_template", "Bearer {{token}}")
@@ -119,8 +126,9 @@ def request(self, method, url, can_raise_exeption=True, **kwargs):
119126
kwargs = template_dict(kwargs, **self.presets_variables)
120127
if self.loop_detector.is_stuck_in_loop(url, kwargs.get("params", {}), kwargs.get("headers", {})):
121128
raise RestAPIClientError("The api-connect plugin is stuck in a loop. Please check the pagination parameters.")
129+
request_start_time = time.time()
130+
self.time_last_request = request_start_time
122131
try:
123-
request_start_time = time.time()
124132
response = self.request_with_redirect_retry(method, url, **kwargs)
125133
request_finish_time = time.time()
126134
except Exception as err:
@@ -131,7 +139,6 @@ def request(self, method, url, can_raise_exeption=True, **kwargs):
131139
else:
132140
return {"error": error_message}
133141
self.set_metadata("request_duration", request_finish_time - request_start_time)
134-
self.time_last_request = time.time()
135142
self.set_metadata("status_code", response.status_code)
136143
self.set_metadata("response_headers", "{}".format(response.headers))
137144
if response.status_code >= 400:
@@ -167,6 +174,8 @@ def paginated_api_call(self, can_raise_exeption=True):
167174
params = self.requests_kwargs.get("params")
168175
params.update(pagination_params)
169176
self.requests_kwargs.update({"params": params})
177+
self.call_number = self.call_number + 1
178+
logger.info("API call number #{}".format(self.call_number))
170179
return self.request(self.http_method, self.pagination.get_next_page_url(), can_raise_exeption, **self.requests_kwargs)
171180

172181
def empty_json_response(self):

python-lib/rest_api_recipe_session.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ def process_dataframe(self, input_parameters_dataframe, is_raw_output):
4646
self.initial_parameter_columns.update({parameter_name: input_parameters_row.get(column_name)})
4747
updated_endpoint_parameters = copy.deepcopy(self.endpoint_parameters)
4848
updated_endpoint_parameters.update(self.initial_parameter_columns)
49-
logger.info("Creating client with credential={}, updated_endpoint={}, custom_key_values={}".format(
49+
logger.info("Processing row #{}, creating client with credential={}, updated_endpoint={}, custom_key_values={}".format(
50+
index + 1,
5051
logger.filter_secrets(self.credential_parameters),
5152
updated_endpoint_parameters,
5253
self.custom_key_values

tests/python/integration/test_scenario.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,6 @@ def test_run_api_connect_redirection(user_dss_clients):
3333

3434
def test_run_api_connect_check_sc_84465(user_dss_clients):
3535
dss_scenario.run(user_dss_clients, project_key=TEST_PROJECT_KEY, scenario_id="CHECKSC84465")
36+
37+
def test_run_api_connect_ntlm_authentication(user_dss_clients):
38+
dss_scenario.run(user_dss_clients, project_key=TEST_PROJECT_KEY, scenario_id="NTLMAUTHENTICATION")

0 commit comments

Comments
 (0)