Skip to content

Add the Uri\Rfc3986\Uri class to ext/uri without wither support #18836

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Prev Previous commit
Next Next commit
Review round 1 fixes
  • Loading branch information
kocsismate committed Jul 1, 2025
commit ca857794be6e8094678f2ba2058ffbbc18fe641c
2 changes: 1 addition & 1 deletion Zend/zend_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -597,8 +597,8 @@ EMPTY_SWITCH_DEFAULT_CASE()
_(ZEND_STR_SCHEME, "scheme") \
_(ZEND_STR_HOST, "host") \
_(ZEND_STR_PORT, "port") \
_(ZEND_STR_USERINFO, "userinfo") \
_(ZEND_STR_USER, "user") \
_(ZEND_STR_USERINFO, "userinfo") \
_(ZEND_STR_USERNAME, "username") \
_(ZEND_STR_PASS, "pass") \
_(ZEND_STR_PASSWORD, "password") \
Expand Down
12 changes: 5 additions & 7 deletions ext/uri/php_uri.c
Original file line number Diff line number Diff line change
Expand Up @@ -488,9 +488,9 @@ PHP_METHOD(Uri_Rfc3986_Uri, resolve)
{
zend_string *uri_str;

ZEND_PARSE_PARAMETERS_START(1, 1) \
Z_PARAM_PATH_STR(uri_str) \
ZEND_PARSE_PARAMETERS_END(); \
ZEND_PARSE_PARAMETERS_START(1, 1)
Z_PARAM_PATH_STR(uri_str)
ZEND_PARSE_PARAMETERS_END();

zend_object *this_object = Z_OBJ_P(ZEND_THIS);
uri_internal_t *internal_uri = uri_internal_from_obj(this_object);
Expand Down Expand Up @@ -850,6 +850,8 @@ static PHP_MINIT_FUNCTION(uri)
return FAILURE;
}

uriparser_module_init();

if (uri_handler_register(&lexbor_uri_handler) == FAILURE) {
return FAILURE;
}
Expand All @@ -874,10 +876,6 @@ static PHP_MSHUTDOWN_FUNCTION(uri)

PHP_RINIT_FUNCTION(uri)
{
if (uriparser_request_init() == FAILURE) {
return FAILURE;
}

if (lexbor_request_init() == FAILURE) {
return FAILURE;
}
Expand Down
179 changes: 87 additions & 92 deletions ext/uri/php_uriparser.c
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something I'm only noticing now: I think this should rather be called ext/uri/parser_rfc3986.c and php_lexbor.c would be ext/uri/parser_whatwg.c. The php_uriparser.c name is confusing to me, because uriparser is an extremely generic term.

To give another comparison with ext/random, since it is architecturally similar: Each engine has its own engine_enginename.c file, e.g. engine_xoshiro256starstar.c.

Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@ void *uriparser_parse_uri(const zend_string *uri_str, const void *base_url, zval
static void uriparser_free_uri(void *uri);
static void throw_invalid_uri_exception(void);

HashTable uriparser_property_handlers;
UriMemoryManager uriparser_memory_manager;

static void uriparser_copy_text_range(UriTextRangeA *text_range, UriTextRangeA *new_text_range, bool use_safe)
{
if (text_range->first == NULL || text_range->afterLast == NULL || (text_range->first > text_range->afterLast && !use_safe)) {
Expand Down Expand Up @@ -99,34 +96,43 @@ static UriUriA *uriparser_copy_uri(UriUriA *uriparser_uri) // TODO add to uripar

static zend_result uriparser_normalize_uri(UriUriA *uriparser_uri)
{
if (uriNormalizeSyntaxExMmA(uriparser_uri, (unsigned int)-1, &uriparser_memory_manager) != URI_SUCCESS) {
if (uriNormalizeSyntaxExA(uriparser_uri, (unsigned int)-1) != URI_SUCCESS) {
return FAILURE;
}

return SUCCESS;
}

#define URIPARSER_READ_URI(uriparser_uri, uriparser_uris, read_mode) do { \
if (read_mode == URI_COMPONENT_READ_RAW) { \
uriparser_uri = (UriUriA *) uriparser_uris->uri; \
} else if (read_mode == URI_COMPONENT_READ_NORMALIZED_UNICODE || read_mode == URI_COMPONENT_READ_NORMALIZED_ASCII) { \
if (uriparser_uris->normalized_uri == NULL) { \
uriparser_uris->normalized_uri = uriparser_copy_uri(uriparser_uris->uri); \
if (uriparser_normalize_uri(uriparser_uris->normalized_uri) == FAILURE) { \
return FAILURE; \
} \
} \
uriparser_uri = uriparser_uris->normalized_uri; \
} else { \
ZEND_UNREACHABLE(); \
} \
} while (0)
static UriUriA *uriparser_read_uri(uriparser_uris_t *uriparser_uris, uri_component_read_mode_t read_mode)
{
switch (read_mode) {
case URI_COMPONENT_READ_RAW:
return uriparser_uris->uri;
case URI_COMPONENT_READ_NORMALIZED_ASCII:
ZEND_FALLTHROUGH;
case URI_COMPONENT_READ_NORMALIZED_UNICODE:
if (uriparser_uris->normalized_uri == NULL) {
uriparser_uris->normalized_uri = uriparser_copy_uri(uriparser_uris->uri);
if (uriparser_normalize_uri(uriparser_uris->normalized_uri) == FAILURE) {
uriFreeUriMembersA(uriparser_uris->normalized_uri);
efree(uriparser_uris->normalized_uri);
uriparser_uris->normalized_uri = NULL;

return NULL;
}
}

return uriparser_uris->normalized_uri;
EMPTY_SWITCH_DEFAULT_CASE()
}
}

static zend_result uriparser_read_scheme(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
{
uriparser_uris_t *uriparser_uris = (uriparser_uris_t *) internal_uri->uri;
UriUriA *uriparser_uri;
URIPARSER_READ_URI(uriparser_uri, uriparser_uris, read_mode);
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
if (UNEXPECTED(uriparser_uri == NULL)) {
return FAILURE;
}

if (uriparser_uri->scheme.first != NULL && uriparser_uri->scheme.afterLast != NULL) {
zend_string *str = zend_string_init(uriparser_uri->scheme.first, uriparser_uri->scheme.afterLast - uriparser_uri->scheme.first, false);
Expand All @@ -140,9 +146,10 @@ static zend_result uriparser_read_scheme(const uri_internal_t *internal_uri, uri

zend_result uriparser_read_userinfo(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
{
uriparser_uris_t *uriparser_uris = (uriparser_uris_t *) internal_uri->uri;
UriUriA *uriparser_uri;
URIPARSER_READ_URI(uriparser_uri, uriparser_uris, read_mode);
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
if (UNEXPECTED(uriparser_uri == NULL)) {
return FAILURE;
}

if (uriparser_uri->userInfo.first != NULL && uriparser_uri->userInfo.afterLast != NULL) {
ZVAL_STRINGL(retval, uriparser_uri->userInfo.first, uriparser_uri->userInfo.afterLast - uriparser_uri->userInfo.first);
Expand All @@ -155,14 +162,17 @@ zend_result uriparser_read_userinfo(const uri_internal_t *internal_uri, uri_comp

static zend_result uriparser_read_username(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
{
uriparser_uris_t *uriparser_uris = (uriparser_uris_t *) internal_uri->uri;
UriUriA *uriparser_uri;
URIPARSER_READ_URI(uriparser_uri, uriparser_uris, read_mode);
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
if (UNEXPECTED(uriparser_uri == NULL)) {
return FAILURE;
}

if (uriparser_uri->userInfo.first != NULL && uriparser_uri->userInfo.afterLast != NULL) {
char *c = strchr(uriparser_uri->userInfo.first, ':');
if (c == NULL && uriparser_uri->userInfo.afterLast - uriparser_uri->userInfo.first > 0) {
ZVAL_STRINGL(retval, uriparser_uri->userInfo.first, uriparser_uri->userInfo.afterLast - uriparser_uri->userInfo.first);
size_t length = uriparser_uri->userInfo.afterLast - uriparser_uri->userInfo.first;
char *c = memchr(uriparser_uri->userInfo.first, ':', length);

if (c == NULL && length > 0) {
ZVAL_STRINGL(retval, uriparser_uri->userInfo.first, length);
} else if (c != NULL && c - uriparser_uri->userInfo.first > 0) {
ZVAL_STRINGL(retval, uriparser_uri->userInfo.first, c - uriparser_uri->userInfo.first);
} else {
Expand All @@ -177,12 +187,14 @@ static zend_result uriparser_read_username(const uri_internal_t *internal_uri, u

static zend_result uriparser_read_password(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
{
uriparser_uris_t *uriparser_uris = (uriparser_uris_t *) internal_uri->uri;
UriUriA *uriparser_uri;
URIPARSER_READ_URI(uriparser_uri, uriparser_uris, read_mode);
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
if (UNEXPECTED(uriparser_uri == NULL)) {
return FAILURE;
}

if (uriparser_uri->userInfo.first != NULL && uriparser_uri->userInfo.afterLast != NULL) {
char *c = strchr(uriparser_uri->userInfo.first, ':');
const char *c = memchr(uriparser_uri->userInfo.first, ':', uriparser_uri->userInfo.afterLast - uriparser_uri->userInfo.first);

if (c != NULL && uriparser_uri->userInfo.afterLast - c - 1 > 0) {
ZVAL_STRINGL(retval, c + 1, uriparser_uri->userInfo.afterLast - c - 1);
} else {
Expand All @@ -197,9 +209,10 @@ static zend_result uriparser_read_password(const uri_internal_t *internal_uri, u

static zend_result uriparser_read_host(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
{
uriparser_uris_t *uriparser_uris = (uriparser_uris_t *) internal_uri->uri;
UriUriA *uriparser_uri;
URIPARSER_READ_URI(uriparser_uri, uriparser_uris, read_mode);
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
if (UNEXPECTED(uriparser_uri == NULL)) {
return FAILURE;
}

if (uriparser_uri->hostText.first != NULL && uriparser_uri->hostText.afterLast != NULL && uriparser_uri->hostText.afterLast - uriparser_uri->hostText.first > 0) {
if (uriparser_uri->hostData.ip6 != NULL) {
Expand All @@ -209,7 +222,7 @@ static zend_result uriparser_read_host(const uri_internal_t *internal_uri, uri_c
smart_str_appendl(&host_str, uriparser_uri->hostText.first, uriparser_uri->hostText.afterLast - uriparser_uri->hostText.first);
smart_str_appendc(&host_str, ']');

ZVAL_STR(retval, smart_str_extract(&host_str));
ZVAL_NEW_STR(retval, smart_str_extract(&host_str));
} else {
ZVAL_STRINGL(retval, uriparser_uri->hostText.first, uriparser_uri->hostText.afterLast - uriparser_uri->hostText.first);
}
Expand All @@ -236,26 +249,27 @@ static zend_result uriparser_read_port(const uri_internal_t *internal_uri, uri_c

static zend_result uriparser_read_path(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
{
uriparser_uris_t *uriparser_uris = (uriparser_uris_t *) internal_uri->uri;
UriUriA *uriparser_uri;
URIPARSER_READ_URI(uriparser_uri, uriparser_uris, read_mode);
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
if (UNEXPECTED(uriparser_uri == NULL)) {
return FAILURE;
}

if (uriparser_uri->pathHead != NULL) {
const UriPathSegmentA *p;
smart_str str = {0};

if (uriparser_uri->absolutePath || uriIsHostSetA(uriparser_uri)) {
smart_str_appends(&str, "/");
smart_str_appendc(&str, '/');
}

smart_str_appendl(&str, uriparser_uri->pathHead->text.first, (int) ((uriparser_uri->pathHead->text).afterLast - (uriparser_uri->pathHead->text).first));
smart_str_appendl(&str, uriparser_uri->pathHead->text.first, (size_t) ((uriparser_uri->pathHead->text).afterLast - (uriparser_uri->pathHead->text).first));

for (p = uriparser_uri->pathHead->next; p != NULL; p = p->next) {
smart_str_appends(&str, "/");
smart_str_appendc(&str, '/');
smart_str_appendl(&str, p->text.first, (int) ((p->text).afterLast - (p->text).first));
}

ZVAL_STR(retval, smart_str_extract(&str));
ZVAL_NEW_STR(retval, smart_str_extract(&str));
} else if (uriparser_uri->absolutePath) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need to be:

Suggested change
} else if (uriparser_uri->absolutePath) {
} else if (uriparser_uri->absolutePath || uriHasHostA(uriparser_uri)) {

?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, this would make a few tests fail:

$uri = new Uri\Rfc3986\Uri("https://example.com");
var_dump($uri->getPath());

Currently, this outputs string(0) "", but it would then output string(1) "/". Handling of the leading slash is a bit convoluted with uriparser. I filed a ticket with my question (uriparser/uriparser#215) and the uriparser/uriparser#8 (comment) table should clear all the questions.

ZVAL_CHAR(retval, '/');
} else {
Expand All @@ -267,9 +281,10 @@ static zend_result uriparser_read_path(const uri_internal_t *internal_uri, uri_c

static zend_result uriparser_read_query(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
{
uriparser_uris_t *uriparser_uris = (uriparser_uris_t *) internal_uri->uri;
UriUriA *uriparser_uri;
URIPARSER_READ_URI(uriparser_uri, uriparser_uris, read_mode);
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
if (UNEXPECTED(uriparser_uri == NULL)) {
return FAILURE;
}

if (uriparser_uri->query.first != NULL && uriparser_uri->query.afterLast != NULL) {
ZVAL_STRINGL(retval, uriparser_uri->query.first, uriparser_uri->query.afterLast - uriparser_uri->query.first);
Expand All @@ -282,9 +297,10 @@ static zend_result uriparser_read_query(const uri_internal_t *internal_uri, uri_

static zend_result uriparser_read_fragment(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
{
uriparser_uris_t *uriparser_uris = (uriparser_uris_t *) internal_uri->uri;
UriUriA *uriparser_uri;
URIPARSER_READ_URI(uriparser_uri, uriparser_uris, read_mode);
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
if (UNEXPECTED(uriparser_uri == NULL)) {
return FAILURE;
}

if (uriparser_uri->fragment.first != NULL && uriparser_uri->fragment.afterLast != NULL) {
ZVAL_STRINGL(retval, uriparser_uri->fragment.first, uriparser_uri->fragment.afterLast - uriparser_uri->fragment.first);
Expand All @@ -307,38 +323,26 @@ static void *uriparser_calloc(UriMemoryManager *memory_manager, size_t nmemb, si

static void *uriparser_realloc(UriMemoryManager *memory_manager, void *ptr, size_t size)
{
return realloc(ptr, size);
return erealloc(ptr, size);
}

static void *uriparser_reallocarray(UriMemoryManager *memory_manager, void *ptr, size_t nmemb, size_t size)
{
const size_t total_size = nmemb * size;

/* check for unsigned overflow */
if ((nmemb != 0) && (total_size / nmemb != size)) {
errno = ENOMEM;
return NULL;
}

return erealloc(ptr, total_size);
return safe_erealloc(ptr, nmemb, size, 0);
}

static void uriparser_free(UriMemoryManager *memory_manager, void *ptr)
{
efree(ptr);
}

zend_result uriparser_request_init(void)
void uriparser_module_init(void)
{
uriparser_memory_manager.calloc = uriparser_calloc;
uriparser_memory_manager.malloc = uriparser_malloc;
uriparser_memory_manager.realloc = uriparser_realloc;
uriparser_memory_manager.reallocarray = uriparser_reallocarray;
uriparser_memory_manager.free = uriparser_free;

zend_hash_init(&uriparser_property_handlers, 8, NULL, NULL, true);

return SUCCESS;
defaultMemoryManager.malloc = uriparser_malloc;
defaultMemoryManager.calloc = uriparser_calloc;
defaultMemoryManager.realloc = uriparser_realloc;
defaultMemoryManager.reallocarray = uriparser_reallocarray;
defaultMemoryManager.free = uriparser_free;
}

static uriparser_uris_t *uriparser_create_uris(
Expand All @@ -357,16 +361,7 @@ static uriparser_uris_t *uriparser_create_uris(

static void throw_invalid_uri_exception(void)
{
zval exception;

object_init_ex(&exception, uri_invalid_uri_exception_ce);

zval value;
ZVAL_STRING(&value, "URI parsing failed");
zend_update_property_ex(uri_whatwg_invalid_url_exception_ce, Z_OBJ(exception), ZSTR_KNOWN(ZEND_STR_MESSAGE), &value);
zval_ptr_dtor_str(&value);

zend_throw_exception_object(&exception);
zend_throw_exception(uri_invalid_uri_exception_ce, "URI parsing failed", 0);
}

void *uriparser_parse_uri_ex(const zend_string *uri_str, const uriparser_uris_t *uriparser_base_urls, bool silent)
Expand All @@ -376,7 +371,7 @@ void *uriparser_parse_uri_ex(const zend_string *uri_str, const uriparser_uris_t
/* uriparser keeps the originally passed in string, while lexbor may allocate a new one. */
zend_string *original_uri_str = zend_string_init(ZSTR_VAL(uri_str), ZSTR_LEN(uri_str), false);
if (ZSTR_LEN(original_uri_str) == 0 ||
uriParseSingleUriExMmA(uriparser_uri, ZSTR_VAL(original_uri_str), ZSTR_VAL(original_uri_str) + ZSTR_LEN(original_uri_str), NULL, &uriparser_memory_manager) != URI_SUCCESS
uriParseSingleUriExA(uriparser_uri, ZSTR_VAL(original_uri_str), ZSTR_VAL(original_uri_str) + ZSTR_LEN(original_uri_str), NULL) != URI_SUCCESS
) {
efree(uriparser_uri);
zend_string_release_ex(original_uri_str, false);
Expand All @@ -395,11 +390,11 @@ void *uriparser_parse_uri_ex(const zend_string *uri_str, const uriparser_uris_t

UriUriA *absolute_uri = emalloc(sizeof(UriUriA));

if (uriAddBaseUriExMmA(absolute_uri, uriparser_uri, uriparser_base_url, URI_RESOLVE_STRICTLY, &uriparser_memory_manager) != URI_SUCCESS) {
if (uriAddBaseUriExA(absolute_uri, uriparser_uri, uriparser_base_url, URI_RESOLVE_STRICTLY) != URI_SUCCESS) {
zend_string_release_ex(original_uri_str, false);
uriFreeUriMembersMmA(uriparser_uri, &uriparser_memory_manager);
uriFreeUriMembersA(uriparser_uri);
efree(uriparser_uri);
uriFreeUriMembersMmA(uriparser_base_url, &uriparser_memory_manager);
uriFreeUriMembersA(uriparser_base_url);
efree(uriparser_base_url);
efree(absolute_uri);

Expand All @@ -412,9 +407,9 @@ void *uriparser_parse_uri_ex(const zend_string *uri_str, const uriparser_uris_t

/* TODO fix freeing: if the following code runs, then we'll have use-after-free-s because uriparser doesn't
copy the input. If we don't run the following code, then we'll have memory leaks...
uriFreeUriMembersMmA(uriparser_base_url, &uriparser_memory_manager);
uriFreeUriMembersA(uriparser_base_url);
efree(uriparser_base_url);
uriFreeUriMembersMmA(uriparser_uri, &uriparser_memory_manager);
uriFreeUriMembersA(uriparser_uri);
efree(uriparser_uri);
*/

Expand Down Expand Up @@ -462,12 +457,12 @@ static zend_string *uriparser_uri_to_string(void *uri, uri_recomposition_mode_t

zend_string *uri_string = zend_string_alloc(charsRequired - 1, false);
if (uriToStringA(ZSTR_VAL(uri_string), uriparser_uri, charsRequired, NULL) != URI_SUCCESS) {
zend_string_release(uri_string);
zend_string_efree(uri_string);
return NULL;
}

if (exclude_fragment) {
char *pos = strrchr(ZSTR_VAL(uri_string), '#');
const char *pos = zend_memrchr(ZSTR_VAL(uri_string), '#', ZSTR_LEN(uri_string));
if (pos != NULL) {
uri_string = zend_string_truncate(uri_string, (pos - ZSTR_VAL(uri_string)), false);
}
Expand All @@ -486,7 +481,7 @@ static void uriparser_free_uri(void *uri)
uriparser_uris->uri_str = NULL;
}

uriFreeUriMembersMmA(uriparser_uris->uri, &uriparser_memory_manager);
uriFreeUriMembersA(uriparser_uris->uri);
efree(uriparser_uris->uri);
uriparser_uris->uri = NULL;
}
Expand All @@ -498,7 +493,7 @@ static void uriparser_free_uri(void *uri)
uriparser_uris->normalized_uri_str = NULL;
}

uriFreeUriMembersMmA(uriparser_uris->normalized_uri, &uriparser_memory_manager);
uriFreeUriMembersA(uriparser_uris->normalized_uri);
efree(uriparser_uris->normalized_uri);
uriparser_uris->normalized_uri = NULL;
}
Expand Down
Loading