srctree

Robin Linden parent 43c2f998 3ab18413
url: Fix hang when fed invalid UTF-8

inlinesplit
url/url.cpp added: 13, removed: 6, total 7
@@ -1659,13 +1659,10 @@ std::optional<std::string> UrlParser::parse_opaque_host(std::string_view input)
}
 
std::string_view tmp = input;
int len = 0;
 
while (!tmp.empty()) {
std::uint32_t cp = util::utf8_to_utf32(tmp);
 
len = util::unicode_utf8_byte_count(cp);
 
if (!is_url_codepoint(cp)) {
validation_error(ValidationError::InvalidUrlUnit);
}
@@ -1675,7 +1672,11 @@ std::optional<std::string> UrlParser::parse_opaque_host(std::string_view input)
}
 
// I don't *think* this can remove > size(), but maybe i should clamp it anyway
tmp.remove_prefix(len);
 
// len is 0 if the codepoint is larger than the maximum valid code
// point, 0x10ffff, meaning it'll have to take up at least 4 bytes.
int len = util::unicode_utf8_byte_count(cp);
tmp.remove_prefix(len == 0 ? 4 : len);
}
 
return util::percent_encode(input, PercentEncodeSet::c0_control);
 
url/url_test.cpp added: 13, removed: 6, total 7
@@ -16,6 +16,7 @@
#include <regex>
#include <string>
#include <string_view>
#include <tuple>
#include <utility>
#include <variant>
#include <vector>
@@ -661,6 +662,11 @@ int main() {
etest::expect_eq(*url, url::Url{.scheme = "a", .host = url::Host{.type = url::HostType::Opaque}});
});
 
etest::test("URL parsing: invalid utf-8", [] {
url::UrlParser p;
std::ignore = p.parse("\x6f\x3a\x2f\x2f\x26\xe1\xd2\x2e\x3b\xf5\x26\xe1\xd2\x0b\x0a\x26\xe1\xd2\xc9");
});
 
etest::test("URL parsing: file url with base", [] {
url::UrlParser p;