srctree

David Zero parent 3b857850 456968a1
url: Add URL serialization

inlinesplit
url/url.cpp added: 139, removed: 11, total 128
@@ -191,6 +191,74 @@ std::string blob_url_create(Origin const &origin) {
return result;
}
 
// https://url.spec.whatwg.org/#concept-host-serializer
std::string Host::serialize() const {
if (type == HostType::Ip4Addr) {
return util::ipv4_serialize(std::get<std::uint32_t>(data));
} else if (type == HostType::Ip6Addr) {
return "[" + util::ipv6_serialize(std::get<2>(data)) + "]";
}
 
return std::get<std::string>(data);
}
 
// https://url.spec.whatwg.org/#url-path-serializer
std::string Url::serialize_path() const {
if (has_opaque_path()) {
return std::get<0>(path);
}
 
std::string output = "";
 
for (auto const &part : std::get<1>(path)) {
output += "/" + part;
}
 
return output;
}
 
// https://url.spec.whatwg.org/#concept-url-serializer
std::string Url::serialize(bool exclude_fragment) const {
std::string output = scheme + ":";
 
if (host.has_value()) {
output += "//";
 
if (includes_credentials()) {
output += user;
 
if (!passwd.empty()) {
output += ":" + passwd;
}
 
output += "@";
}
 
output += host->serialize();
 
if (port.has_value()) {
output += ":" + std::to_string(*port);
}
}
 
if (!host.has_value() && std::holds_alternative<std::vector<std::string>>(path) && std::get<1>(path).size() > 1
&& std::get<1>(path)[0].empty()) {
output += "/.";
}
 
output += serialize_path();
 
if (query.has_value()) {
output += "?" + *query;
}
 
if (!exclude_fragment && fragment.has_value()) {
output += "#" + *fragment;
}
 
return output;
}
 
void UrlParser::validation_error(ValidationError err) const {
spdlog::debug("url: InputPos: {}, ParserState: {}, Validation Error: {} {}",
current_pos(),
@@ -381,7 +449,7 @@ void UrlParser::state_scheme() {
 
return;
}
if ((includes_credentials(url_) || url_.port.has_value()) && buffer_ == "file") {
if ((url_.includes_credentials() || url_.port.has_value()) && buffer_ == "file") {
state_ = ParserState::Terminate;
 
return;
@@ -443,13 +511,13 @@ void UrlParser::state_scheme() {
 
// https://url.spec.whatwg.org/#no-scheme-state
void UrlParser::state_no_scheme() {
if (auto c = peek(); !base_.has_value() || (has_opaque_path(*base_) && c != '#')) {
if (auto c = peek(); !base_.has_value() || (base_->has_opaque_path() && c != '#')) {
validation_error(ValidationError::MissingSchemeNonRelativeUrl);
 
state_ = ParserState::Failure;
 
return;
} else if (has_opaque_path(*base_) && c == '#') {
} else if (base_->has_opaque_path() && c == '#') {
url_.scheme = base_->scheme;
url_.path = base_->path;
url_.query = base_->query;
@@ -678,7 +746,7 @@ void UrlParser::state_host() {
 
return;
} else if (state_override_.has_value() && buffer_.empty()
&& (includes_credentials(url_) || url_.port.has_value())) {
&& (url_.includes_credentials() || url_.port.has_value())) {
state_ = ParserState::Terminate;
 
return;
 
url/url.h added: 139, removed: 11, total 128
@@ -26,6 +26,8 @@ struct Host {
HostType type;
 
std::variant<std::string, std::uint32_t, std::array<std::uint16_t, 8>> data;
 
std::string serialize() const;
};
 
struct Origin {
@@ -51,6 +53,15 @@ struct Url {
std::variant<std::string, std::vector<std::string>> path;
std::optional<std::string> query;
std::optional<std::string> fragment;
 
std::string serialize(bool exclude_fragment = false) const;
std::string serialize_path() const;
 
constexpr bool includes_credentials() const { return !user.empty() || !passwd.empty(); }
constexpr bool has_opaque_path() const { return std::holds_alternative<std::string>(path); }
 
// https://url.spec.whatwg.org/#url-equivalence
bool operator==(Url const &b) const { return serialize() == b.serialize(); }
};
 
// This parser is current with the WHATWG URL specification as of 1 March 2023
@@ -167,10 +178,6 @@ private:
bool starts_with_windows_drive_letter(std::string_view) const;
void shorten_url_path(Url &) const;
 
constexpr bool includes_credentials(Url &url) const { return !url.user.empty() || !url.passwd.empty(); }
 
constexpr bool has_opaque_path(Url &url) const { return std::holds_alternative<std::string>(url.path); }
 
constexpr bool is_windows_drive_letter(std::string_view input) const {
return input.size() == 2 && util::is_alpha(input[0]) && (input[1] == ':' || input[1] == '|');
}
 
url/url_test.cpp added: 139, removed: 11, total 128
@@ -64,6 +64,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[0], "index.html");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "https://example.com:8080/index.html");
});
 
etest::test("URL parsing: 1 unicode char", [] {
@@ -79,6 +81,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[0], "");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "http://xn--bcher-kva.de/");
});
 
etest::test("URL parsing: 1 unicode char with path", [] {
@@ -95,6 +99,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[1], "itunes.gif");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "https://xn--19g.com/i/itunes.gif");
});
 
etest::test("URL parsing: unicode path", [] {
@@ -111,6 +117,9 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[1], "%D9%86%D8%AC%D9%8A%D8%A8_%D9%85%D8%AD%D9%81%D9%88%D8%B8");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(),
"https://ar.wikipedia.org/wiki/%D9%86%D8%AC%D9%8A%D8%A8_%D9%85%D8%AD%D9%81%D9%88%D8%B8");
});
 
etest::test("URL parsing: tel URI", [] {
@@ -126,6 +135,8 @@ int main() {
etest::expect_eq(std::get<0>(url->path), "+1-555-555-5555");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "tel:+1-555-555-5555");
});
 
etest::test("URL parsing: username and passwd in authority", [] {
@@ -143,6 +154,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[0], "login.php");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "https://zero-one:testpass123@example.com/login.php");
});
 
etest::test("URL parsing: query", [] {
@@ -159,6 +172,9 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[0], "watch");
etest::expect_eq(url->query, "v=2g5xkLqIElUlist=PLHwvDXmNUa92NlFPooY1P5tfDo4T85ORzindex=3");
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(),
"https://www.youtube.com/watch?v=2g5xkLqIElUlist=PLHwvDXmNUa92NlFPooY1P5tfDo4T85ORzindex=3");
});
 
etest::test("URL parsing: Welsh", [] {
@@ -177,6 +193,9 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[1], "platformticket.gif");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(),
"https://llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch.co.uk/images/platformticket.gif");
});
 
// This domain exceeds the maximum length of both a domain component/label and a FQDN
@@ -202,6 +221,13 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[0], "");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(),
"https://"
"llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogochobwllllantysiliogogogochanfairpwllgw"
"yngyllgogerychgogerychwyrndrobwllllantysiliogogogochobwllllantysiliogogogochllanfairpwllgwyngy"
"llgogerychwyrndrobwllllantysiliogogogochobwllllantysiliogogogochanfairpwllgwyngyllgogerychgoge"
"rychwyrndrobwllllantysiliogogogochobwllllantysiliogogogoch.co.uk/");
});
 
etest::test("URL parsing: path, query, and fragment", [] {
@@ -224,6 +250,9 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[6], "7795829478");
etest::expect_eq(url->query, "pr=476");
etest::expect_eq(url->fragment, "step:7:31");
 
etest::expect_eq(url->serialize(),
"https://github.com/robinlinden/hastur/actions/runs/4441133331/jobs/7795829478?pr=476#step:7:31");
});
 
etest::test("URL parsing: ipv4 and port", [] {
@@ -239,6 +268,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[0], "");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "https://127.0.0.1:631/");
});
 
etest::test("URL parsing: ipv6 and port", [] {
@@ -256,6 +287,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[0], "");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "https://[2001:db8:85a3::8a2e:370:7334]:631/");
});
 
etest::test("URL parsing: ipv6 v4-mapped with port", [] {
@@ -273,6 +306,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[0], "");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "https://[::ffff:4ccb:8c22]:631/");
});
 
etest::test("URL parsing: ipv6 v4-mapped compressed with dot-decimal", [] {
@@ -290,6 +325,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[0], "");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "https://[::ffff:4ccb:8c22]:631/");
});
 
etest::test("URL parsing: empty input", [] {
@@ -314,6 +351,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[1], "index.php");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "https://example.com:8080/test/index.php");
});
 
etest::test("URL parsing: query input with base URL", [&base] {
@@ -330,6 +369,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[1], "index.php");
etest::expect_eq(url->query, "view=table");
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "https://example.com:8080/test/index.php?view=table");
});
 
etest::test("URL parsing: file URL", [] {
@@ -349,6 +390,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[4], "README.md");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "file:///home/zero-one/repos/hastur/README.md");
});
 
etest::test("URL parsing: file URL with double-dot", [] {
@@ -367,6 +410,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[3], "README.md");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "file:///home/zero-one/hastur/README.md");
});
 
etest::test("URL parsing: file URL with double-dot 2", [] {
@@ -384,6 +429,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[2], "README.md");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "file:///home/zero-one/README.md");
});
 
etest::test("URL parsing: file URL with double-dot 3", [] {
@@ -401,6 +448,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[2], "repos");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "file:///home/zero-one/repos/");
});
 
etest::test("URL parsing: file URL with single-dot", [] {
@@ -420,6 +469,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[4], "README.md");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), "file:///home/zero-one/repos/hastur/README.md");
});
 
etest::test("URL parsing: file URL with windows path", [] {
@@ -440,6 +491,8 @@ int main() {
etest::expect_eq(std::get<1>(url->path)[5], "README.md");
etest::expect(!url->query.has_value());
etest::expect(!url->fragment.has_value());
 
etest::expect_eq(url->serialize(), R"(file:///C:/Users/zero-one/repos/hastur/README.md)");
});
 
int ret = etest::run_all_tests();