Skip to content

Commit fa96c3c

Browse files
committed
Add internal URI handling API
1 parent 02b9455 commit fa96c3c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+865
-208
lines changed

ext/filter/logical_filters.c

Lines changed: 42 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "filter_private.h"
2121
#include "ext/standard/url.h"
2222
#include "ext/pcre/php_pcre.h"
23+
#include "ext/uri/php_uri.h"
2324

2425
#include "zend_multiply.h"
2526

@@ -89,6 +90,8 @@
8990
#define FORMAT_IPV4 4
9091
#define FORMAT_IPV6 6
9192

93+
#define URL_OPTION_URI_PARSER_CLASS "uri_parser_class"
94+
9295
static bool _php_filter_validate_ipv6(const char *str, size_t str_len, int ip[8]);
9396

9497
static bool php_filter_parse_int(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
@@ -591,7 +594,6 @@ static bool php_filter_is_valid_ipv6_hostname(const zend_string *s)
591594

592595
void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
593596
{
594-
php_url *url;
595597
size_t old_len = Z_STRLEN_P(value);
596598

597599
php_filter_url(value, flags, option_array, charset);
@@ -600,52 +602,69 @@ void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
600602
RETURN_VALIDATION_FAILED
601603
}
602604

603-
/* Use parse_url - if it returns false, we return NULL */
604-
url = php_url_parse_ex(Z_STRVAL_P(value), Z_STRLEN_P(value));
605+
/* Parse options */
606+
zval *option_val;
607+
zend_string *parser_name;
608+
int parser_name_set;
609+
FETCH_STR_OPTION(parser_name, URL_OPTION_URI_PARSER_CLASS);
610+
611+
uri_handler_t *uri_handler = php_uri_get_handler(parser_name_set ? parser_name : NULL);
612+
if (uri_handler == NULL) {
613+
zend_throw_error(NULL, "Invalid URI parser used");
614+
RETURN_VALIDATION_FAILED
615+
}
605616

606-
if (url == NULL) {
617+
/* Parse the URI - if it fails, we return NULL */
618+
php_uri *uri = php_uri_parse_to_struct(uri_handler, Z_STR_P(value), URI_COMPONENT_READ_NORMALIZED_ASCII, true);
619+
if (uri == NULL) {
607620
RETURN_VALIDATION_FAILED
608621
}
609622

610-
if (url->scheme != NULL &&
611-
(zend_string_equals_literal_ci(url->scheme, "http") || zend_string_equals_literal_ci(url->scheme, "https"))) {
623+
if (uri->scheme != NULL &&
624+
(zend_string_equals_literal_ci(uri->scheme, "http") || zend_string_equals_literal_ci(uri->scheme, "https"))) {
612625

613-
if (url->host == NULL) {
614-
goto bad_url;
626+
if (uri->host == NULL) {
627+
php_uri_struct_free(uri);
628+
RETURN_VALIDATION_FAILED
615629
}
616630

617631
if (
632+
/* @todo Find a better solution than hardcoding the uri handler name. Skipping these checks is needed because
633+
* both uriparser and lexbor performs comprehensive validations. Also, the [] pair is removed by these
634+
* libraries in case of ipv6 URIs, therefore php_filter_is_valid_ipv6_hostname() would case false positive
635+
* failures. */
636+
strcmp(uri_handler->name, URI_PARSER_PHP) == 0 &&
618637
/* An IPv6 enclosed by square brackets is a valid hostname.*/
619-
!php_filter_is_valid_ipv6_hostname(url->host) &&
638+
!php_filter_is_valid_ipv6_hostname(uri->host) &&
620639
/* Validate domain.
621640
* This includes a loose check for an IPv4 address. */
622-
!php_filter_validate_domain_ex(url->host, FILTER_FLAG_HOSTNAME)
641+
!php_filter_validate_domain_ex(uri->host, FILTER_FLAG_HOSTNAME)
623642
) {
624-
php_url_free(url);
643+
php_uri_struct_free(uri);
625644
RETURN_VALIDATION_FAILED
626645
}
627646
}
628647

629-
if (
630-
url->scheme == NULL ||
631-
/* some schemas allow the host to be empty */
632-
(url->host == NULL && (!zend_string_equals_literal(url->scheme, "mailto") && !zend_string_equals_literal(url->scheme, "news") && !zend_string_equals_literal(url->scheme, "file"))) ||
633-
((flags & FILTER_FLAG_PATH_REQUIRED) && url->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && url->query == NULL)
648+
if (uri->scheme == NULL ||
649+
/* some schemes allow the host to be empty */
650+
(uri->host == NULL && (!zend_string_equals_literal(uri->scheme, "mailto") && !zend_string_equals_literal(uri->scheme, "news") && !zend_string_equals_literal(uri->scheme, "file"))) ||
651+
((flags & FILTER_FLAG_PATH_REQUIRED) && uri->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && uri->query == NULL)
634652
) {
635-
bad_url:
636-
php_url_free(url);
653+
php_uri_struct_free(uri);
637654
RETURN_VALIDATION_FAILED
638655
}
639656

640-
if ((url->user != NULL && !is_userinfo_valid(url->user))
641-
|| (url->pass != NULL && !is_userinfo_valid(url->pass))
657+
if (strcmp(uri_handler->name, "parse_url") == 0 &&
658+
(
659+
(uri->user != NULL && !is_userinfo_valid(uri->user)) ||
660+
(uri->password != NULL && !is_userinfo_valid(uri->password))
661+
)
642662
) {
643-
php_url_free(url);
663+
php_uri_struct_free(uri);
644664
RETURN_VALIDATION_FAILED
645-
646665
}
647666

648-
php_url_free(url);
667+
php_uri_struct_free(uri);
649668
}
650669
/* }}} */
651670

ext/filter/tests/062.phpt

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
--TEST--
2+
filter_var() and FILTER_VALIDATE_URL with different URI parsers
3+
--EXTENSIONS--
4+
filter
5+
--FILE--
6+
<?php
7+
8+
function validateUrls(string $parserName)
9+
{
10+
$values = [
11+
'http://example.com/index.html',
12+
'http://www.example.com/index.php',
13+
'http://www.example/img/test.png',
14+
'http://www.example/img/dir/',
15+
'http://www.example/img/dir',
16+
'http://www.thelongestdomainnameintheworldandthensomeandthensomemoreandmore.com/',
17+
'http://toolongtoolongtoolongtoolongtoolongtoolongtoolongtoolongtoolongtoolong.com',
18+
'http://eauBcFReEmjLcoZwI0RuONNnwU4H9r151juCaqTI5VeIP5jcYIqhx1lh5vV00l2rTs6y7hOp7rYw42QZiq6VIzjcYrRm8gFRMk9U9Wi1grL8Mr5kLVloYLthHgyA94QK3SaXCATklxgo6XvcbXIqAGG7U0KxTr8hJJU1p2ZQ2mXHmp4DhYP8N9SRuEKzaCPcSIcW7uj21jZqBigsLsNAXEzU8SPXZjmVQVtwQATPWeWyGW4GuJhjP4Q8o0.com',
19+
'http://kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.CQ1oT5Uq3jJt6Uhy3VH9u3Gi5YhfZCvZVKgLlaXNFhVKB1zJxvunR7SJa.com.',
20+
'http://kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58R.example.com',
21+
'http://[2001:0db8:0000:85a3:0000:0000:ac1f:8001]',
22+
'http://[2001:db8:0:85a3:0:0:ac1f:8001]:123/me.html',
23+
'http://[2001:db8:0:85a3::ac1f:8001]/',
24+
'http://[::1]',
25+
'http://cont-ains.h-yph-en-s.com',
26+
'http://..com',
27+
'http://a.-bc.com',
28+
'http://ab.cd-.com',
29+
'http://-.abc.com',
30+
'http://abc.-.abc.com',
31+
'http://underscore_.example.com',
32+
'http//www.example/wrong/url/',
33+
'http:/www.example',
34+
'file:///tmp/test.c',
35+
'ftp://ftp.example.com/tmp/',
36+
'/tmp/test.c',
37+
'/',
38+
'http://',
39+
'http:/',
40+
'http:',
41+
'http',
42+
'',
43+
-1,
44+
[],
45+
46+
'news:news.php.net',
47+
'file://foo/bar',
48+
"http://\r\n/bar",
49+
"http://example.com:qq",
50+
"http://example.com:-2",
51+
"http://example.com:65536",
52+
"http://example.com:65537",
53+
];
54+
55+
foreach ($values as $value) {
56+
var_dump(filter_var($value, FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName]));
57+
}
58+
59+
var_dump(filter_var("qwe", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName]));
60+
var_dump(filter_var("http://qwe", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName]));
61+
var_dump(filter_var("http://", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName]));
62+
var_dump(filter_var("/tmp/test", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName]));
63+
var_dump(filter_var("http://www.example.com", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName]));
64+
var_dump(filter_var("http://www.example.com", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName, "flags" => FILTER_FLAG_PATH_REQUIRED]));
65+
var_dump(filter_var("http://www.example.com/path/at/the/server/", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName, "flags" => FILTER_FLAG_PATH_REQUIRED]));
66+
var_dump(filter_var("http://www.example.com/index.html", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName, "flags" => FILTER_FLAG_QUERY_REQUIRED]));
67+
var_dump(filter_var("http://www.example.com/index.php?a=b&c=d", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName, "flags" => FILTER_FLAG_QUERY_REQUIRED]));
68+
}
69+
70+
echo "RFC3986:\n";
71+
validateUrls(Uri\Rfc3986Uri::class);
72+
73+
echo "\nWHATWG:\n";
74+
validateUrls(Uri\WhatWgUri::class);
75+
76+
echo "Done\n";
77+
?>
78+
--EXPECT--
79+
RFC3986:
80+
string(29) "http://example.com/index.html"
81+
string(32) "http://www.example.com/index.php"
82+
string(31) "http://www.example/img/test.png"
83+
string(27) "http://www.example/img/dir/"
84+
string(26) "http://www.example/img/dir"
85+
string(79) "http://www.thelongestdomainnameintheworldandthensomeandthensomemoreandmore.com/"
86+
string(81) "http://toolongtoolongtoolongtoolongtoolongtoolongtoolongtoolongtoolongtoolong.com"
87+
string(261) "http://eauBcFReEmjLcoZwI0RuONNnwU4H9r151juCaqTI5VeIP5jcYIqhx1lh5vV00l2rTs6y7hOp7rYw42QZiq6VIzjcYrRm8gFRMk9U9Wi1grL8Mr5kLVloYLthHgyA94QK3SaXCATklxgo6XvcbXIqAGG7U0KxTr8hJJU1p2ZQ2mXHmp4DhYP8N9SRuEKzaCPcSIcW7uj21jZqBigsLsNAXEzU8SPXZjmVQVtwQATPWeWyGW4GuJhjP4Q8o0.com"
88+
string(261) "http://kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.CQ1oT5Uq3jJt6Uhy3VH9u3Gi5YhfZCvZVKgLlaXNFhVKB1zJxvunR7SJa.com."
89+
string(83) "http://kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58R.example.com"
90+
string(48) "http://[2001:0db8:0000:85a3:0000:0000:ac1f:8001]"
91+
string(50) "http://[2001:db8:0:85a3:0:0:ac1f:8001]:123/me.html"
92+
string(36) "http://[2001:db8:0:85a3::ac1f:8001]/"
93+
string(12) "http://[::1]"
94+
string(31) "http://cont-ains.h-yph-en-s.com"
95+
string(12) "http://..com"
96+
string(16) "http://a.-bc.com"
97+
string(17) "http://ab.cd-.com"
98+
string(16) "http://-.abc.com"
99+
string(20) "http://abc.-.abc.com"
100+
string(30) "http://underscore_.example.com"
101+
bool(false)
102+
bool(false)
103+
string(18) "file:///tmp/test.c"
104+
string(26) "ftp://ftp.example.com/tmp/"
105+
bool(false)
106+
bool(false)
107+
string(7) "http://"
108+
bool(false)
109+
bool(false)
110+
bool(false)
111+
bool(false)
112+
bool(false)
113+
bool(false)
114+
string(18) "mailto:[email protected]"
115+
string(17) "news:news.php.net"
116+
string(14) "file://foo/bar"
117+
bool(false)
118+
bool(false)
119+
bool(false)
120+
string(24) "http://example.com:65536"
121+
string(24) "http://example.com:65537"
122+
bool(false)
123+
string(10) "http://qwe"
124+
string(7) "http://"
125+
bool(false)
126+
string(22) "http://www.example.com"
127+
bool(false)
128+
string(42) "http://www.example.com/path/at/the/server/"
129+
bool(false)
130+
string(40) "http://www.example.com/index.php?a=b&c=d"
131+
132+
WHATWG:
133+
string(29) "http://example.com/index.html"
134+
string(32) "http://www.example.com/index.php"
135+
string(31) "http://www.example/img/test.png"
136+
string(27) "http://www.example/img/dir/"
137+
string(26) "http://www.example/img/dir"
138+
string(79) "http://www.thelongestdomainnameintheworldandthensomeandthensomemoreandmore.com/"
139+
string(81) "http://toolongtoolongtoolongtoolongtoolongtoolongtoolongtoolongtoolongtoolong.com"
140+
string(261) "http://eauBcFReEmjLcoZwI0RuONNnwU4H9r151juCaqTI5VeIP5jcYIqhx1lh5vV00l2rTs6y7hOp7rYw42QZiq6VIzjcYrRm8gFRMk9U9Wi1grL8Mr5kLVloYLthHgyA94QK3SaXCATklxgo6XvcbXIqAGG7U0KxTr8hJJU1p2ZQ2mXHmp4DhYP8N9SRuEKzaCPcSIcW7uj21jZqBigsLsNAXEzU8SPXZjmVQVtwQATPWeWyGW4GuJhjP4Q8o0.com"
141+
string(261) "http://kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.CQ1oT5Uq3jJt6Uhy3VH9u3Gi5YhfZCvZVKgLlaXNFhVKB1zJxvunR7SJa.com."
142+
string(83) "http://kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58R.example.com"
143+
string(48) "http://[2001:0db8:0000:85a3:0000:0000:ac1f:8001]"
144+
string(50) "http://[2001:db8:0:85a3:0:0:ac1f:8001]:123/me.html"
145+
string(36) "http://[2001:db8:0:85a3::ac1f:8001]/"
146+
string(12) "http://[::1]"
147+
string(31) "http://cont-ains.h-yph-en-s.com"
148+
string(12) "http://..com"
149+
string(16) "http://a.-bc.com"
150+
string(17) "http://ab.cd-.com"
151+
string(16) "http://-.abc.com"
152+
string(20) "http://abc.-.abc.com"
153+
string(30) "http://underscore_.example.com"
154+
bool(false)
155+
string(17) "http:/www.example"
156+
string(18) "file:///tmp/test.c"
157+
string(26) "ftp://ftp.example.com/tmp/"
158+
bool(false)
159+
bool(false)
160+
bool(false)
161+
bool(false)
162+
bool(false)
163+
bool(false)
164+
bool(false)
165+
bool(false)
166+
bool(false)
167+
string(18) "mailto:[email protected]"
168+
string(17) "news:news.php.net"
169+
string(14) "file://foo/bar"
170+
bool(false)
171+
bool(false)
172+
bool(false)
173+
bool(false)
174+
bool(false)
175+
bool(false)
176+
string(10) "http://qwe"
177+
bool(false)
178+
bool(false)
179+
string(22) "http://www.example.com"
180+
bool(false)
181+
string(42) "http://www.example.com/path/at/the/server/"
182+
bool(false)
183+
string(40) "http://www.example.com/index.php?a=b&c=d"
184+
Done

ext/openssl/xp_ssl.c

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "php.h"
2525
#include "ext/standard/file.h"
2626
#include "ext/standard/url.h"
27+
#include "ext/uri/php_uri.h"
2728
#include "streams/php_streams_int.h"
2829
#include "zend_smart_str.h"
2930
#include "php_openssl.h"
@@ -2622,21 +2623,25 @@ static zend_long php_openssl_get_crypto_method(
26222623
static char *php_openssl_get_url_name(const char *resourcename,
26232624
size_t resourcenamelen, int is_persistent) /* {{{ */
26242625
{
2625-
php_url *url;
2626-
26272626
if (!resourcename) {
26282627
return NULL;
26292628
}
26302629

2631-
url = php_url_parse_ex(resourcename, resourcenamelen);
2632-
if (!url) {
2630+
uri_handler_t *uri_handler = php_uri_get_handler(NULL);
2631+
2632+
zend_string *resource = zend_string_init(resourcename, resourcenamelen, false);
2633+
uri_internal_t *internal_uri = php_uri_parse(uri_handler, resource, NULL);
2634+
if (internal_uri == NULL) {
2635+
zend_string_release(resource);
26332636
return NULL;
26342637
}
26352638

2636-
if (url->host) {
2637-
const char * host = ZSTR_VAL(url->host);
2639+
zval host_zv;
2640+
zend_result result = php_uri_get_host(internal_uri, URI_COMPONENT_READ_RAW, &host_zv);
2641+
if (result == SUCCESS && Z_TYPE(host_zv) == IS_STRING) {
2642+
const char * host = Z_STRVAL(host_zv);
26382643
char * url_name = NULL;
2639-
size_t len = ZSTR_LEN(url->host);
2644+
size_t len = Z_STRLEN(host_zv);
26402645

26412646
/* skip trailing dots */
26422647
while (len && host[len-1] == '.') {
@@ -2647,11 +2652,15 @@ static char *php_openssl_get_url_name(const char *resourcename,
26472652
url_name = pestrndup(host, len, is_persistent);
26482653
}
26492654

2650-
php_url_free(url);
2655+
zend_string_release(resource);
2656+
php_uri_free(internal_uri);
2657+
zval_ptr_dtor(&host_zv);
26512658
return url_name;
26522659
}
26532660

2654-
php_url_free(url);
2661+
zend_string_release(resource);
2662+
php_uri_free(internal_uri);
2663+
zval_ptr_dtor(&host_zv);
26552664
return NULL;
26562665
}
26572666
/* }}} */

0 commit comments

Comments
 (0)