Skip to content

Commit 356583b

Browse files
nithisubtimm4205
authored andcommitted
chore: Raised lxml upper bound from <6.0.0 to <=6.0.2 to unblock Python 3.14 support and Bumped beautifulsoup4 minimum version from 4.7.0 to 4.13.5 to fix lxml 6.0 parsing bug with curly braces
1 parent a0527a8 commit 356583b

3 files changed

Lines changed: 82 additions & 4 deletions

File tree

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
scramp>=1.2.0,<1.5.0
22
pytz>=2020.1
3-
beautifulsoup4>=4.7.0,<5.0.0
3+
beautifulsoup4>=4.13.5,<5.0.0
44
boto3>=1.42.22,<2.0.0
55
requests>=2.23.0,<3.0.0
6-
lxml>=4.6.5,<6.0.0
6+
lxml>=4.6.5,<=6.0.2
77
botocore>=1.12.201,<2.0.0
88
packaging
99
setuptools

test/integration/datatype/_generate_test_datatype_tables.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from redshift_connector import Connection
1212

1313
SCHEMA_NAME: str = "datatype_integration"
14+
# toggle precision between 18 and 38 to test 8 & 16 byte. scale must be >= 8
15+
NUMERIC_PRECISION: str = "(18, 8)"
1416
CREATE_FILE_PATH: str = "{}/datatype_test_stmts.sql".format(pathlib.Path().absolute())
1517
TEARDOWN_FILE_PATH: str = "{}/datatype_teardown_stmts.sql".format(pathlib.Path().absolute())
1618
"""
@@ -200,7 +202,7 @@ class Datatypes(Enum):
200202
int2 = auto()
201203
int4 = auto()
202204
int8 = auto()
203-
numeric = "(18, 8)" # toggle precision between 18 and 38 to test 8 & 16 byte. scale must be >= 8
205+
numeric = auto()
204206
float4 = auto()
205207
float8 = auto()
206208
bool = auto()
@@ -460,7 +462,7 @@ def _build_table_stmts(dt: Datatypes) -> None:
460462

461463
col_type: str = dt.name
462464
if dt.name == Datatypes.numeric.name:
463-
col_type += dt.value
465+
col_type += NUMERIC_PRECISION
464466

465467
create_stmt: str = "create table {schema}.test_{datatype} (c1 varchar, c2 {col_type});".format(
466468
schema=SCHEMA_NAME, datatype=dt.name, col_type=col_type

test/unit/plugin/test_adfs_credentials_provider.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import typing
22
from unittest.mock import MagicMock, Mock, patch
33

4+
import bs4 # type: ignore
45
import pytest # type: ignore
56
import requests
67

@@ -219,3 +220,78 @@ def test_form_based_authentication_empty_saml_response_should_fail(mocker) -> No
219220

220221
with pytest.raises(InterfaceError, match="Failed to find ADFS access_token in authentication response payload"):
221222
acp.form_based_authentication()
223+
224+
225+
class TestLxmlCurlyBraceCompatibility:
226+
"""
227+
Regression tests for lxml 6.0 + bs4 compatibility.
228+
"""
229+
230+
# This HTML triggers the actual lxml 6.0 + bs4 < 4.13.5 bug.
231+
# lxml 6.0 recovers {currency: as an HTML attribute, and bs4 < 4.13.5
232+
# fails to handle it in _getNsTag(), raising ValueError.
233+
MALFORMED_HTML_WITH_CURLY_BRACE_ATTR = (
234+
'<!DOCTYPE html><html lang="id"><head><noscript>'
235+
'<img height="1" width="1" style="display:none" '
236+
'src="https://www.example.com/tr?id=123&ev=PageView&noscript=1" '
237+
'fbq("track", "Purchase" , {currency: "IDR" , value: 20000.00}); />'
238+
'</noscript></head><body></body>'
239+
)
240+
241+
ADFS_HTML_WITH_CURLY_BRACES = """
242+
<html>
243+
<head>
244+
<style>
245+
.illustrationClass {background-image:url(/adfs/portal/illustration/illustration.png);}
246+
.f_circleG { position: absolute; height: 22px; width: 22px; }
247+
</style>
248+
</head>
249+
<body>
250+
<form method="post" id="loginForm" action="/adfs/ls/IdpInitiatedSignOn.aspx?loginToRp=urn:amazon:webservices">
251+
<input id="userNameInput" name="UserName" type="email" value="" />
252+
<input id="passwordInput" name="Password" type="password" />
253+
<input id="optionForms" type="hidden" name="AuthMethod" value="FormsAuthentication"/>
254+
</form>
255+
</body>
256+
</html>
257+
"""
258+
259+
ADFS_SAML_RESPONSE_WITH_CURLY_BRACES = """
260+
<html>
261+
<head><title>Working...</title></head>
262+
<body>
263+
<form method="POST" name="hiddenform" action="https://signin.aws.amazon.com:443/saml">
264+
<input type="hidden" name="SAMLResponse" value="dummy_value"/>
265+
</form>
266+
</body>
267+
</html>
268+
"""
269+
270+
def test_lxml6_curly_brace_attribute_bug(self):
271+
# This must not raise. With the old bs4, it throws ValueError.
272+
soup = bs4.BeautifulSoup(self.MALFORMED_HTML_WITH_CURLY_BRACE_ATTR, features="lxml")
273+
assert soup is not None
274+
275+
def test_parse_adfs_sign_in_page_with_curly_braces(self):
276+
soup = bs4.BeautifulSoup(self.ADFS_HTML_WITH_CURLY_BRACES, features="lxml")
277+
username_input = soup.find("input", {"name": "UserName"})
278+
assert username_input is not None
279+
assert username_input.get("type") == "email"
280+
281+
password_input = soup.find("input", {"name": "Password"})
282+
assert password_input is not None
283+
284+
def test_form_based_auth_with_curly_brace_html(self, mocker):
285+
acp, _ = make_valid_adfs_credentials_provider()
286+
287+
mock_auth_form = MagicMock()
288+
mock_auth_form.text = self.ADFS_HTML_WITH_CURLY_BRACES
289+
290+
mock_saml_response = MagicMock()
291+
mock_saml_response.text = self.ADFS_SAML_RESPONSE_WITH_CURLY_BRACES
292+
293+
mocker.patch("requests.get", return_value=mock_auth_form)
294+
mocker.patch("requests.post", return_value=mock_saml_response)
295+
296+
result = acp.form_based_authentication()
297+
assert result == "dummy_value"

0 commit comments

Comments
 (0)