|
1 | 1 | import typing |
2 | 2 | from unittest.mock import MagicMock, Mock, patch |
3 | 3 |
|
| 4 | +import bs4 # type: ignore |
4 | 5 | import pytest # type: ignore |
5 | 6 | import requests |
6 | 7 |
|
@@ -219,3 +220,78 @@ def test_form_based_authentication_empty_saml_response_should_fail(mocker) -> No |
219 | 220 |
|
220 | 221 | with pytest.raises(InterfaceError, match="Failed to find ADFS access_token in authentication response payload"): |
221 | 222 | acp.form_based_authentication() |
| 223 | + |
| 224 | + |
| 225 | +class TestLxmlCurlyBraceCompatibility: |
| 226 | + """ |
| 227 | + Regression tests for lxml 6.0 + bs4 compatibility. |
| 228 | + """ |
| 229 | + |
| 230 | + # This HTML triggers the actual lxml 6.0 + bs4 < 4.13.5 bug. |
| 231 | + # lxml 6.0 recovers {currency: as an HTML attribute, and bs4 < 4.13.5 |
| 232 | + # fails to handle it in _getNsTag(), raising ValueError. |
| 233 | + MALFORMED_HTML_WITH_CURLY_BRACE_ATTR = ( |
| 234 | + '<!DOCTYPE html><html lang="id"><head><noscript>' |
| 235 | + '<img height="1" width="1" style="display:none" ' |
| 236 | + 'src="https://www.example.com/tr?id=123&ev=PageView&noscript=1" ' |
| 237 | + 'fbq("track", "Purchase" , {currency: "IDR" , value: 20000.00}); />' |
| 238 | + '</noscript></head><body></body>' |
| 239 | + ) |
| 240 | + |
| 241 | + ADFS_HTML_WITH_CURLY_BRACES = """ |
| 242 | + <html> |
| 243 | + <head> |
| 244 | + <style> |
| 245 | + .illustrationClass {background-image:url(/adfs/portal/illustration/illustration.png);} |
| 246 | + .f_circleG { position: absolute; height: 22px; width: 22px; } |
| 247 | + </style> |
| 248 | + </head> |
| 249 | + <body> |
| 250 | + <form method="post" id="loginForm" action="/adfs/ls/IdpInitiatedSignOn.aspx?loginToRp=urn:amazon:webservices"> |
| 251 | + <input id="userNameInput" name="UserName" type="email" value="" /> |
| 252 | + <input id="passwordInput" name="Password" type="password" /> |
| 253 | + <input id="optionForms" type="hidden" name="AuthMethod" value="FormsAuthentication"/> |
| 254 | + </form> |
| 255 | + </body> |
| 256 | + </html> |
| 257 | + """ |
| 258 | + |
| 259 | + ADFS_SAML_RESPONSE_WITH_CURLY_BRACES = """ |
| 260 | + <html> |
| 261 | + <head><title>Working...</title></head> |
| 262 | + <body> |
| 263 | + <form method="POST" name="hiddenform" action="https://signin.aws.amazon.com:443/saml"> |
| 264 | + <input type="hidden" name="SAMLResponse" value="dummy_value"/> |
| 265 | + </form> |
| 266 | + </body> |
| 267 | + </html> |
| 268 | + """ |
| 269 | + |
| 270 | + def test_lxml6_curly_brace_attribute_bug(self): |
| 271 | + # This must not raise. With the old bs4, it throws ValueError. |
| 272 | + soup = bs4.BeautifulSoup(self.MALFORMED_HTML_WITH_CURLY_BRACE_ATTR, features="lxml") |
| 273 | + assert soup is not None |
| 274 | + |
| 275 | + def test_parse_adfs_sign_in_page_with_curly_braces(self): |
| 276 | + soup = bs4.BeautifulSoup(self.ADFS_HTML_WITH_CURLY_BRACES, features="lxml") |
| 277 | + username_input = soup.find("input", {"name": "UserName"}) |
| 278 | + assert username_input is not None |
| 279 | + assert username_input.get("type") == "email" |
| 280 | + |
| 281 | + password_input = soup.find("input", {"name": "Password"}) |
| 282 | + assert password_input is not None |
| 283 | + |
| 284 | + def test_form_based_auth_with_curly_brace_html(self, mocker): |
| 285 | + acp, _ = make_valid_adfs_credentials_provider() |
| 286 | + |
| 287 | + mock_auth_form = MagicMock() |
| 288 | + mock_auth_form.text = self.ADFS_HTML_WITH_CURLY_BRACES |
| 289 | + |
| 290 | + mock_saml_response = MagicMock() |
| 291 | + mock_saml_response.text = self.ADFS_SAML_RESPONSE_WITH_CURLY_BRACES |
| 292 | + |
| 293 | + mocker.patch("requests.get", return_value=mock_auth_form) |
| 294 | + mocker.patch("requests.post", return_value=mock_saml_response) |
| 295 | + |
| 296 | + result = acp.form_based_authentication() |
| 297 | + assert result == "dummy_value" |
0 commit comments