srctree

Robin Linden parent bd2079c4 3d474dec
html2: Finish the BeforeHtml parser state

inlinesplit
html2/parser_states.cpp added: 35, removed: 7, total 28
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2023 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2023-2024 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
@@ -223,6 +223,11 @@ std::optional<InsertionMode> Initial::process(IActions &a, html2::Token const &t
 
// https://html.spec.whatwg.org/multipage/parsing.html#the-before-html-insertion-mode
std::optional<InsertionMode> BeforeHtml::process(IActions &a, html2::Token const &token) {
if (std::holds_alternative<html2::DoctypeToken>(token)) {
// Parse error.
return {};
}
 
if (std::holds_alternative<html2::CommentToken>(token)) {
// TODO(robinlinden): Insert as last child.
return {};
@@ -237,6 +242,15 @@ std::optional<InsertionMode> BeforeHtml::process(IActions &a, html2::Token const
return BeforeHead{};
}
 
static constexpr auto kAcceptableEndTags = std::to_array<std::string_view>({"head", "body", "html", "br"});
if (auto const *end = std::get_if<html2::EndTagToken>(&token);
end != nullptr && (is_in_array<kAcceptableEndTags>(end->tag_name))) {
// Fall through to "anything else."
} else if (end != nullptr) {
// Parse error.
return {};
}
 
a.insert_element_for(html2::StartTagToken{.tag_name = "html"});
auto mode_override = current_insertion_mode_override(a, BeforeHead{});
return BeforeHead{}.process(mode_override, token).value_or(BeforeHead{});
 
html2/parser_states.h added: 35, removed: 7, total 28
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2023 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2023-2024 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
@@ -88,7 +88,6 @@ struct Initial {
};
 
// https://html.spec.whatwg.org/multipage/parsing.html#the-before-html-insertion-mode
// Incomplete.
struct BeforeHtml {
std::optional<InsertionMode> process(IActions &, html2::Token const &);
};
 
html2/parser_states_test.cpp added: 35, removed: 7, total 28
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2023 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2023-2024 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
@@ -117,6 +117,11 @@ void initial_tests() {
}
 
void before_html_tests() {
etest::test("BeforeHtml: doctype", [] {
auto res = parse("<!DOCTYPE html>", {.initial_insertion_mode = html2::BeforeHtml{}});
expect_eq(res.document.html(), dom::Element{"html", {}, {dom::Element{"head"}}});
});
 
etest::test("BeforeHtml: comment", [] {
auto res = parse("<!DOCTYPE html><!-- hello --><html foo='bar'>", {});
expect_eq(res.document.html(), dom::Element{"html", {{"foo", "bar"}}, {dom::Element{"head"}}});
@@ -131,6 +136,16 @@ void before_html_tests() {
auto res = parse("<!DOCTYPE asdf>\t\n\f\r <html foo='bar'>", {});
expect_eq(res.document.html(), dom::Element{"html", {{"foo", "bar"}}, {dom::Element{"head"}}});
});
 
etest::test("BeforeHtml: head end-tag", [] {
auto res = parse("</head>", {.initial_insertion_mode = html2::BeforeHtml{}});
expect_eq(res.document.html(), dom::Element{"html", {}, {dom::Element{"head"}}});
});
 
etest::test("BeforeHtml: dropped end-tag", [] {
auto res = parse("</img>", {.initial_insertion_mode = html2::BeforeHtml{}});
expect_eq(res.document.html(), dom::Element{"html", {}, {dom::Element{"head"}}});
});
}
 
void before_head_tests() {