Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions ext/standard/tests/url/gh12703.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
--TEST--
GH-12703 (parse_url mishandles colon inside path)
--FILE--
<?php
// Case 1 (issue report): an absolute path with a colon and no query string
// used to return false instead of the path.
var_dump(parse_url('/page:1'));
var_dump(parse_url('/page:1', PHP_URL_PATH));
var_dump(parse_url('/page:1', PHP_URL_SCHEME));

// A query string already worked via a different branch, keep it as a regression guard.
var_dump(parse_url('/page:1?foo=bar'));

// Pathological single-slash inputs that exercise the new branch path.
var_dump(parse_url('/:'));
var_dump(parse_url('/:80'));

// Case 2: a relative-scheme URL (//host/path) with colon-like digits inside
// the path used to strip the digits out as a phantom port. The host has no
// explicit port in these inputs, so parse_url should not report one.
var_dump(parse_url('//www.example.com/foo:65535/'));
var_dump(parse_url('//www.example.com/foo:1/'));
var_dump(parse_url('//www.example.com/foo:65536/'));
var_dump(parse_url('//host/a:1/b:2/'));

// Explicit host port must still be extracted, and the colon inside the path
// must stay inside the path.
var_dump(parse_url('//www.example.com:8080/foo:65535/'));

// Full URL with scheme, auth, host, port, path with colon, query, fragment
// must still round-trip correctly.
var_dump(parse_url('scheme://user:pass@host:8080/a:1/b:2?q=1#f'));
?>
--EXPECT--
array(1) {
["path"]=>
string(7) "/page:1"
}
string(7) "/page:1"
NULL
array(2) {
["path"]=>
string(7) "/page:1"
["query"]=>
string(7) "foo=bar"
}
array(1) {
["path"]=>
string(2) "/:"
}
array(1) {
["path"]=>
string(4) "/:80"
}
array(2) {
["host"]=>
string(15) "www.example.com"
["path"]=>
string(11) "/foo:65535/"
}
array(2) {
["host"]=>
string(15) "www.example.com"
["path"]=>
string(7) "/foo:1/"
}
array(2) {
["host"]=>
string(15) "www.example.com"
["path"]=>
string(11) "/foo:65536/"
}
array(2) {
["host"]=>
string(4) "host"
["path"]=>
string(9) "/a:1/b:2/"
}
array(3) {
["host"]=>
string(15) "www.example.com"
["port"]=>
int(8080)
["path"]=>
string(11) "/foo:65535/"
}
array(8) {
["scheme"]=>
string(6) "scheme"
["host"]=>
string(4) "host"
["port"]=>
int(8080)
["user"]=>
string(4) "user"
["pass"]=>
string(4) "pass"
["path"]=>
string(8) "/a:1/b:2"
["query"]=>
string(3) "q=1"
["fragment"]=>
string(1) "f"
}
12 changes: 9 additions & 3 deletions ext/standard/url.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,15 @@ static void php_replace_controlchars(char *str, size_t len)

ZEND_ASSERT(str != NULL);

/* Replace ASCII C0 control chars (0x00..0x1F) and DEL (0x7F). An inline
* comparison is used instead of iscntrl() because (a) it avoids the
* per-byte locale lookup through __ctype_b_loc(), and (b) URL components
* are bytes, not locale-dependent text, so the C-locale semantics are
* what we want regardless of the process locale. The compiler can also
* auto-vectorize this simple form. */
while (s < e) {
if (iscntrl(*s)) {
*s='_';
if (UNEXPECTED(*s < 0x20 || *s == 0x7f)) {
*s = '_';
}
s++;
}
Expand Down Expand Up @@ -104,7 +110,7 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
while (p < e) {
/* scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] */
if (!isalpha(*p) && !isdigit(*p) && *p != '+' && *p != '.' && *p != '-') {
if (e + 1 < ue && e < binary_strcspn(s, ue, "?#")) {
if (*s != '/' && e + 1 < ue && e < binary_strcspn(s, ue, "?#")) {
goto parse_port;
} else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
s += 2;
Expand Down
Loading