srctree

Robin Linden parent 4c2f55e8 64a2ecd7
html2: Drop unnecessary self-closing flag from EndTagToken

This doesn't really have any purpose outside of telling the tokenizer to emit a warning, and we can do that with a flag in the tokenizer. Outside of the tokenizer, a self-closing end tag should be treated as a regular end tag.
html2/token.cpp added: 19, removed: 19, total 0
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2021-2023 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2021-2024 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
@@ -23,7 +23,7 @@ public:
t.system_identifier.value_or(R"("")"));
}
std::string operator()(StartTagToken const &t) { return fmt::format("StartTag {} {}", t.tag_name, t.self_closing); }
std::string operator()(EndTagToken const &t) { return fmt::format("EndTag {} {}", t.tag_name, t.self_closing); }
std::string operator()(EndTagToken const &t) { return fmt::format("EndTag {}", t.tag_name); }
std::string operator()(CommentToken const &t) { return fmt::format("Comment {}", t.data); }
std::string operator()(CharacterToken const &t) { return fmt::format("Character {}", t.data); }
std::string operator()(EndOfFileToken const &) { return "EndOfFile"; }
 
html2/token.h added: 19, removed: 19, total 0
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2021-2022 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2021-2024 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
@@ -35,7 +35,6 @@ struct StartTagToken {
 
struct EndTagToken {
std::string tag_name{};
bool self_closing{false};
std::vector<Attribute> attributes{};
[[nodiscard]] bool operator==(EndTagToken const &) const = default;
};
 
html2/token_test.cpp added: 19, removed: 19, total 0
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2023 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2023-2024 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
@@ -27,8 +27,8 @@ int main() {
});
 
etest::test("to_string(EndTag)", [] {
expect_eq(to_string(EndTagToken{.tag_name = "p", .self_closing = false}), "EndTag p false");
expect_eq(to_string(EndTagToken{.tag_name = "img", .self_closing = true}), "EndTag img true");
expect_eq(to_string(EndTagToken{.tag_name = "p"}), "EndTag p");
expect_eq(to_string(EndTagToken{.tag_name = "img"}), "EndTag img");
});
 
etest::test("to_string(Comment)", [] {
 
html2/tokenizer.cpp added: 19, removed: 19, total 0
@@ -1244,7 +1244,7 @@ void Tokenizer::run() {
if (auto *start_tag = std::get_if<StartTagToken>(&current_token_)) {
start_tag->self_closing = true;
} else {
std::get<EndTagToken>(current_token_).self_closing = true;
self_closing_end_tag_detected_ = true;
}
state_ = State::Data;
emit(std::move(current_token_));
@@ -2485,7 +2485,7 @@ void Tokenizer::emit(Token &&token) {
} else if (auto *end_tag = std::get_if<EndTagToken>(&token)) {
deduplicate(end_tag->attributes);
// https://html.spec.whatwg.org/multipage/parsing.html#tokenization:parse-error-end-tag-with-trailing-solidus
if (end_tag->self_closing) {
if (std::exchange(self_closing_end_tag_detected_, false)) {
emit(ParseError::EndTagWithTrailingSolidus);
}
}
 
html2/tokenizer.h added: 19, removed: 19, total 0
@@ -186,6 +186,7 @@ private:
 
std::uint32_t character_reference_code_{};
bool adjusted_current_node_in_html_namespace_{true};
bool self_closing_end_tag_detected_{false};
 
std::function<void(Tokenizer &, Token &&)> on_emit_{};
std::function<void(Tokenizer &, ParseError)> on_error_{};
 
html2/tokenizer_test.cpp added: 19, removed: 19, total 0
@@ -277,7 +277,7 @@ void rawtext_tests() {
auto tokens = run_tokenizer("<style><div></style/>");
expect_token(tokens, StartTagToken{.tag_name = "style"});
expect_text(tokens, "<div>");
expect_token(tokens, EndTagToken{.tag_name = "style", .self_closing = true});
expect_token(tokens, EndTagToken{.tag_name = "style"});
expect_token(tokens, EndOfFileToken{});
expect_error(tokens, ParseError::EndTagWithTrailingSolidus);
});
@@ -343,7 +343,7 @@ void rcdata_tests() {
auto tokens = run_tokenizer("<title><div></title/>");
expect_token(tokens, StartTagToken{.tag_name = "title"});
expect_text(tokens, "<div>");
expect_token(tokens, EndTagToken{.tag_name = "title", .self_closing = true});
expect_token(tokens, EndTagToken{.tag_name = "title"});
expect_token(tokens, EndOfFileToken{});
expect_error(tokens, ParseError::EndTagWithTrailingSolidus);
});
@@ -926,7 +926,7 @@ int main() {
auto tokens = run_tokenizer("<script></script/>");
 
expect_token(tokens, StartTagToken{.tag_name = "script"});
expect_token(tokens, EndTagToken{.tag_name = "script", .self_closing = true});
expect_token(tokens, EndTagToken{.tag_name = "script"});
expect_token(tokens, EndOfFileToken{});
expect_error(tokens, ParseError::EndTagWithTrailingSolidus);
});
@@ -973,7 +973,7 @@ int main() {
 
expect_token(tokens, StartTagToken{.tag_name = "script"});
expect_text(tokens, "<!--"sv);
expect_token(tokens, EndTagToken{.tag_name = "script", .self_closing = true});
expect_token(tokens, EndTagToken{.tag_name = "script"});
expect_text(tokens, "-->"sv);
expect_token(tokens, EndTagToken{.tag_name = "script"});
expect_token(tokens, EndOfFileToken{});