srctree

Robin Linden parent 9b55bcfe cb558918
html: Support omission of the html element's start and end tags

inlinesplit
html/parser.cpp added: 35, removed: 8, total 27
@@ -114,16 +114,18 @@ void Parser::operator()(html2::StartTagToken const &start_tag) {
return;
}
 
// https://html.spec.whatwg.org/multipage/semantics.html#the-html-element
if (open_elements_.empty() && !seen_html_tag_) {
doc_.html().name = "html"s;
open_elements_.push(&doc_.html());
seen_html_tag_ = true;
}
 
if (start_tag.tag_name == "script"sv) {
tokenizer_.set_state(html2::State::ScriptData);
}
 
if (open_elements_.empty() && !seen_html_tag_) {
spdlog::warn("Start tag [{}] encountered before html element was opened", start_tag.tag_name);
doc_.html().name = "html"s;
open_elements_.push(&doc_.html());
seen_html_tag_ = true;
} else if (open_elements_.empty()) {
if (open_elements_.empty()) {
spdlog::warn("Start tag [{}] encountered with no open elements", start_tag.tag_name);
return;
}
@@ -186,6 +188,11 @@ void Parser::operator()(html2::CharacterToken const &character) {
}
 
void Parser::operator()(html2::EndOfFileToken const &) {
// https://html.spec.whatwg.org/multipage/semantics.html#the-html-element
if (!open_elements_.empty() && open_elements_.top()->name == "html") {
open_elements_.pop();
}
 
if (!open_elements_.empty()) {
spdlog::warn("EOF reached with [{}] elements still open", open_elements_.size());
}
 
html/parser_test.cpp added: 35, removed: 8, total 27
@@ -205,5 +205,25 @@ int main() {
expect_eq(p2_text, dom::Text{"world"});
});
 
etest::test("special rules, html tag omission", [] {
auto html = html::parse("<head></head><body>hello</body>"sv).html();
require_eq(html.children.size(), std::size_t{2});
 
auto const &head = std::get<dom::Element>(html.children[0]);
expect_eq(head.name, "head");
 
auto const &body = std::get<dom::Element>(html.children[1]);
expect_eq(body.name, "body");
 
require_eq(body.children.size(), std::size_t{1});
auto const &body_text = std::get<dom::Text>(body.children[0]);
expect_eq(body_text, dom::Text{"hello"});
});
 
etest::test("special rules, an empty string still parses as html", [] {
auto html = html::parse("").html();
expect_eq(html.children.size(), std::size_t{0});
});
 
return etest::run_all_tests();
}