srctree

Robin Linden parent 830ab9a4 5e7e959c
html: Fix doctypes not being ignored often enough

The correct doctype-handling is in the new parser, and the spec says anydoctype tokens after the 'Initial' state should be dropped.

inlinesplit
html/parser.cpp added: 12, removed: 16, total 0
@@ -129,12 +129,6 @@ void Parser::on_token(html2::Tokenizer &, html2::Token &&token) {
}
}
 
void Parser::operator()(html2::DoctypeToken const &doctype) {
if (doctype.name.has_value()) {
doc_.doctype = *doctype.name;
}
}
 
void Parser::operator()(html2::StartTagToken const &start_tag) {
if (start_tag.tag_name == "script"sv) {
tokenizer_.set_state(html2::State::ScriptData);
@@ -220,10 +214,6 @@ void Parser::operator()(html2::EndTagToken const &end_tag) {
open_elements_.pop_back();
}
 
void Parser::operator()(html2::CommentToken const &) {
// Do nothing.
}
 
void Parser::operator()(html2::CharacterToken const &character) {
current_text_ << character.data;
}
 
html/parser.h added: 12, removed: 16, total 0
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2021-2023 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2021-2024 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
@@ -32,12 +32,13 @@ public:
}
 
// These must be public for std::visit to be happy with Parser as a visitor.
void operator()(html2::DoctypeToken const &);
void operator()(html2::StartTagToken const &);
void operator()(html2::EndTagToken const &);
void operator()(html2::CommentToken const &);
void operator()(html2::CharacterToken const &);
void operator()(html2::EndOfFileToken const &);
void operator()(auto const &) {
// We're ignoring doctypes and comments in the old parser.
}
 
private:
Parser(std::string_view input, ParserOptions const &opts)
 
html/parser_test.cpp added: 12, removed: 16, total 0
@@ -415,5 +415,10 @@ int main() {
expect_eq(doc.doctype, "abcd");
});
 
etest::test("doctype, but too late!", [] {
auto doc = html::parse("<!doctype abcd></head><!doctype html>");
expect_eq(doc.doctype, "abcd");
});
 
return etest::run_all_tests();
}