srctree

David Zero parent db3fc4a4 2d77b44e
url: Implement WPT url tests

Also fixes the bugs that the tests uncovered.

inlinesplit
WORKSPACE added: 199, removed: 14, total 185
@@ -156,6 +156,14 @@ http_archive(
url = "https://github.com/eliasdaler/imgui-sfml/archive/v2.6.tar.gz",
)
 
http_archive(
name = "simdjson", # Apache-2.0
build_file = "//third_party:simdjson.BUILD",
sha256 = "13a702536e051db612cdca82bf8585f2c69d9c6fd156ef291b170f13202c1b4c",
strip_prefix = "simdjson-3.2.2",
url = "https://github.com/simdjson/simdjson/archive/refs/tags/v3.2.2.tar.gz",
)
 
# https://github.com/glennrp/libpng
http_archive(
name = "libpng", # Libpng
@@ -234,6 +242,14 @@ http_archive(
url = "https://github.com/KhronosGroup/Vulkan-Hpp/archive/v%s.tar.gz" % VULKAN_TAG,
)
 
http_archive(
name = "wpt", # BSD-3-Clause
build_file_content = "exports_files([\"url/resources/urltestdata.json\"])",
sha256 = "0bec4b1c5bbae0f30b83049d61109186b69ce98cb2af32a0f95eb44be96228c8",
strip_prefix = "wpt-merge_pr_41277",
url = "https://github.com/web-platform-tests/wpt/archive/refs/tags/merge_pr_41277.tar.gz",
)
 
# The freedesktop GitLab goes down too often to be trusted.
http_archive(
name = "xext", # MIT
 
filename was Deleted added: 199, removed: 14, total 185
@@ -0,0 +1,9 @@
load("@rules_cc//cc:defs.bzl", "cc_library")
 
cc_library(
name = "simdjson",
srcs = ["singleheader/simdjson.cpp"],
hdrs = ["singleheader/simdjson.h"],
includes = ["singleheader/"],
visibility = ["//visibility:public"],
)
 
url/BUILD added: 199, removed: 14, total 185
@@ -35,11 +35,19 @@ cc_test(
name = "url_test",
size = "small",
srcs = ["url_test.cpp"],
copts = HASTUR_COPTS,
copts = HASTUR_COPTS + select({
"@platforms//os:windows": [
"/wd4100",
"/wd4706",
],
"//conditions:default": [],
}),
data = ["@wpt//:url/resources/urltestdata.json"],
deps = [
":url",
"//etest",
"@icu//:common",
"@simdjson",
],
)
 
 
url/url.cpp added: 199, removed: 14, total 185
@@ -329,7 +329,7 @@ Origin Url::origin() const {
}
 
void UrlParser::validation_error(ValidationError err) const {
spdlog::debug("url: InputPos: {}, ParserState: {}, Validation Error: {} {}",
spdlog::warn("url: InputPos: {}, ParserState: {}, Validation Error: {} {}",
current_pos(),
std::to_underlying(state_),
std::to_underlying(err),
@@ -571,6 +571,9 @@ void UrlParser::state_scheme() {
state_ = ParserState::NoScheme;
 
reset();
 
// This can underflow pos_; that's ok, because it's incremented again before it's ever used.
back(1);
} else {
state_ = ParserState::Failure;
 
@@ -932,7 +935,7 @@ void UrlParser::state_file() {
} else if (!is_eof()) {
url_.query = std::nullopt;
 
if (!starts_with_windows_drive_letter(remaining_from(1))) {
if (!starts_with_windows_drive_letter(remaining_from(0))) {
shorten_url_path(url_);
} else {
validation_error(ValidationError::FileInvalidWindowsDriveLetter);
@@ -963,7 +966,7 @@ void UrlParser::state_file_slash() {
if (base_.has_value() && base_->scheme == "file") {
url_.host = base_->host;
 
if (!starts_with_windows_drive_letter(remaining_from(1))
if (!starts_with_windows_drive_letter(remaining_from(0))
&& is_normal_windows_drive_letter(std::get<1>(base_->path)[0])) {
std::get<1>(url_.path).push_back(std::get<1>(base_->path)[0]);
}
@@ -1223,12 +1226,13 @@ std::optional<std::string> UrlParser::domain_to_ascii(std::string_view domain, b
proc_err &= ~UIDNA_ERROR_HYPHEN_3_4;
 
if (!be_strict) {
proc_err &= ~UIDNA_ERROR_EMPTY_LABEL;
proc_err &= ~UIDNA_ERROR_LABEL_TOO_LONG;
proc_err &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
}
 
// If domain or any label is empty, proc_err should contain UIDNA_ERROR_EMPTY_LABEL
if (U_FAILURE(err) || proc_err != 0) {
if (U_FAILURE(err) || proc_err != 0 || ascii_domain.empty()) {
validation_error(ValidationError::DomainToAscii);
 
return std::nullopt;
@@ -1460,16 +1464,25 @@ std::optional<std::tuple<std::uint64_t, bool>> UrlParser::parse_ipv4_number(std:
}
}
 
// TODO(zero-one): Differ width based on largest integer value supported by platform?
std::uint64_t out;
 
auto res = std::from_chars(input.data(), input.data() + input.size(), out, r);
 
if (res.ec == std::errc::invalid_argument || res.ec == std::errc::result_out_of_range) {
spdlog::info("Invalid ipv4 number");
if (res.ec == std::errc::invalid_argument) {
spdlog::info("Invalid IPv4 number");
 
return std::nullopt;
}
 
// This deviation from the spec is necessary, because the spec assumes arbitrary precision
if (res.ec == std::errc::result_out_of_range) {
spdlog::info("IPv4 number > 2^64");
 
// The number returned here is an error value
return {{-1, true}};
}
 
return {{out, v_err}};
}
 
 
url/url_test.cpp added: 199, removed: 14, total 185
@@ -7,8 +7,11 @@
 
#include "etest/etest.h"
 
#include <simdjson.h>
 
#include <array>
#include <cstdint>
#include <fstream>
#include <iostream>
#include <optional>
#include <regex>
@@ -634,6 +637,142 @@ int main() {
etest::expect_eq(*url, url::Url{.scheme = "a", .host = url::Host{.type = url::HostType::Opaque}});
});
 
etest::test("URL parsing: file url with base", [] {
url::UrlParser p;
 
std::optional<url::Url> file_base = p.parse("file:///usr/bin/vim");
 
etest::require(file_base.has_value());
 
std::optional<url::Url> url = p.parse("file:usr/bin/emacs", file_base);
 
etest::require(url.has_value());
 
etest::expect_eq(url->scheme, "file");
etest::expect_eq(url->serialize(), "file:///usr/bin/usr/bin/emacs");
etest::expect_eq(url->host->serialize(), "");
etest::expect_eq(url->serialize_path(), "/usr/bin/usr/bin/emacs");
});
 
etest::test("URL parsing: file url backslash with base", [] {
url::UrlParser p;
 
std::optional<url::Url> file_base = p.parse("file:///usr/bin/vim");
 
etest::require(file_base.has_value());
 
std::optional<url::Url> url = p.parse("file:\\usr/bin/emacs", file_base);
 
etest::require(url.has_value());
 
etest::expect_eq(url->scheme, "file");
etest::expect_eq(url->serialize(), "file:///usr/bin/emacs");
etest::expect_eq(url->host->serialize(), "");
etest::expect_eq(url->serialize_path(), "/usr/bin/emacs");
});
 
etest::test("Web Platform Tests", [] {
url::UrlParser p;
 
simdjson::ondemand::parser parser;
 
auto json = simdjson::padded_string::load("external/wpt/url/resources/urltestdata.json");
 
simdjson::ondemand::document doc = parser.iterate(json);
 
simdjson::ondemand::array arr = doc.get_array();
 
for (auto obj : arr) {
// Skip strings, those are just comments
if (obj.type() == simdjson::ondemand::json_type::string) {
continue;
}
 
bool should_fail = false;
 
// Check if test expects failure
if (obj.find_field("failure").error() != simdjson::error_code::NO_SUCH_FIELD) {
should_fail = true;
}
 
// Get input URL
std::string_view input = obj["input"].get_string(true);
 
// Parse base URL if it exists
std::optional<url::Url> base_test;
 
if (!obj["base"].is_null()) {
std::string_view base_str = obj["base"].get_string(true);
 
base_test = p.parse(std::string{base_str});
 
if (!should_fail) {
etest::expect(base_test.has_value(), "Parsing base URL:(" + std::string{base_str} + ") failed");
 
continue;
}
}
 
// Parse input URL
std::optional<url::Url> url = p.parse(std::string{input}, base_test);
 
if (!should_fail) {
etest::expect(url.has_value(), "Parsing input URL:(" + std::string{input} + ") failed");
 
if (!url.has_value()) {
continue;
}
} else {
etest::require(!url.has_value(),
"Parsing input URL:(" + std::string{input} + ") succeeded when it was supposed to fail");
 
// If this test was an expected failure, test ends here
continue;
}
 
// Check URL fields against test
 
std::string_view href = obj["href"];
etest::expect_eq(url->serialize(), href);
 
if (obj.find_field("failure").error() != simdjson::error_code::NO_SUCH_FIELD) {
std::string_view origin = obj["origin"];
 
etest::expect_eq(url->origin().serialize(), origin);
}
 
std::string_view protocol = obj["protocol"];
etest::expect_eq(url->scheme + ":", protocol);
 
std::string_view username = obj["username"];
etest::expect_eq(url->user, username);
 
std::string_view password = obj["password"];
etest::expect_eq(url->passwd, password);
 
std::string_view hostname = obj["hostname"];
etest::expect_eq(url->host.has_value() ? url->host->serialize() : "", hostname);
 
std::string_view host = obj["host"];
std::string host_serialized = url->host.has_value() ? url->host->serialize() : "";
std::string host_port = url->port.has_value() ? std::string{":"} + std::to_string(*url->port) : "";
etest::expect_eq(host_serialized + host_port, host);
 
std::string_view port = obj["port"];
etest::expect_eq(url->port.has_value() ? std::to_string(*url->port) : "", port);
 
std::string_view pathname = obj["pathname"];
etest::expect_eq(url->serialize_path(), pathname);
 
std::string_view search = obj["search"];
etest::expect_eq(url->query.has_value() && *url->query != "" ? std::string{"?"} + *url->query : "", search);
 
std::string_view hash = obj["hash"];
etest::expect_eq(
url->fragment.has_value() && *url->fragment != "" ? std::string{"#"} + *url->fragment : "", hash);
}
});
 
int ret = etest::run_all_tests();
 
url::icu_cleanup();
 
util/string.h added: 199, removed: 14, total 185
@@ -214,7 +214,7 @@ inline std::string percent_encode(
if (space_as_plus && i == ' ') {
out << '+';
} else if (in_encode_set(i)) {
out << '%' << std::setw(2) << std::uppercase << std::hex
out << '%' << std::setfill('0') << std::setw(2) << std::uppercase << std::hex
<< static_cast<unsigned int>(static_cast<unsigned char>(i));
} else {
out << i;