srctree

Robin Linden parent f33aacf6 7790b128
html2: Drop unnecessary attributes from EndTagToken

See 64a2ecd7f7bb6e0c36e86d66a96b80f2addfb405.

inlinesplit
html2/token.h added: 16, removed: 13, total 3
@@ -35,7 +35,6 @@ struct StartTagToken {
 
struct EndTagToken {
std::string tag_name{};
std::vector<Attribute> attributes{};
[[nodiscard]] bool operator==(EndTagToken const &) const = default;
};
 
 
html2/tokenizer.cpp added: 16, removed: 13, total 3
@@ -2482,11 +2482,11 @@ void Tokenizer::emit(Token &&token) {
if (auto *start_tag = std::get_if<StartTagToken>(&token)) {
last_start_tag_name_ = start_tag->tag_name;
deduplicate(start_tag->attributes);
} else if (auto *end_tag = std::get_if<EndTagToken>(&token)) {
deduplicate(end_tag->attributes);
} else if (std::holds_alternative<EndTagToken>(token)) {
// https://html.spec.whatwg.org/multipage/parsing.html#tokenization:parse-error-end-tag-with-attributes
if (!end_tag->attributes.empty()) {
if (!end_tag_attributes_.empty()) {
emit(ParseError::EndTagWithAttributes);
end_tag_attributes_.clear();
}
 
// https://html.spec.whatwg.org/multipage/parsing.html#tokenization:parse-error-end-tag-with-trailing-solidus
@@ -2523,7 +2523,7 @@ std::vector<Attribute> &Tokenizer::attributes_for_current_element() {
if (auto *start_tag = std::get_if<StartTagToken>(&current_token_)) {
return start_tag->attributes;
}
return std::get<EndTagToken>(current_token_).attributes;
return end_tag_attributes_;
}
 
void Tokenizer::start_attribute_in_current_tag_token(Attribute attr) {
 
html2/tokenizer.h added: 16, removed: 13, total 3
@@ -187,7 +187,11 @@ private:
 
std::uint32_t character_reference_code_{};
bool adjusted_current_node_in_html_namespace_{true};
 
// These end-tag bits aren't allowed to leave the tokenizer, but we need to
// keep them around internally to emit warnings when reasonable.
bool self_closing_end_tag_detected_{false};
std::vector<Attribute> end_tag_attributes_{};
 
std::function<void(Tokenizer &, Token &&)> on_emit_{};
std::function<void(Tokenizer &, ParseError)> on_error_{};
 
html2/tokenizer_test.cpp added: 16, removed: 13, total 3
@@ -269,7 +269,7 @@ void rawtext_tests() {
auto tokens = run_tokenizer("<style><div></style hello='1'>");
expect_token(tokens, StartTagToken{.tag_name = "style"});
expect_text(tokens, "<div>");
expect_token(tokens, EndTagToken{.tag_name = "style", .attributes{{"hello", "1"}}});
expect_token(tokens, EndTagToken{.tag_name = "style"});
expect_token(tokens, EndOfFileToken{});
expect_error(tokens, ParseError::EndTagWithAttributes);
});
@@ -336,7 +336,7 @@ void rcdata_tests() {
auto tokens = run_tokenizer("<title><div></title hello='1'>");
expect_token(tokens, StartTagToken{.tag_name = "title"});
expect_text(tokens, "<div>");
expect_token(tokens, EndTagToken{.tag_name = "title", .attributes{{"hello", "1"}}});
expect_token(tokens, EndTagToken{.tag_name = "title"});
expect_token(tokens, EndOfFileToken{});
expect_error(tokens, ParseError::EndTagWithAttributes);
});
@@ -912,7 +912,7 @@ int main() {
auto tokens = run_tokenizer(R"(<script></script src="/foo.js">)");
 
expect_token(tokens, StartTagToken{.tag_name = "script"});
expect_token(tokens, EndTagToken{.tag_name = "script", .attributes = {{"src", "/foo.js"}}});
expect_token(tokens, EndTagToken{.tag_name = "script"});
expect_token(tokens, EndOfFileToken{});
expect_error(tokens, ParseError::EndTagWithAttributes);
});
@@ -956,7 +956,7 @@ int main() {
 
expect_token(tokens, StartTagToken{.tag_name = "script"});
expect_text(tokens, "<!--"sv);
expect_token(tokens, EndTagToken{.tag_name = "script", .attributes = {{"src", "/bar.js"}}});
expect_token(tokens, EndTagToken{.tag_name = "script"});
expect_text(tokens, "-->"sv);
expect_token(tokens, EndTagToken{.tag_name = "script"});
expect_token(tokens, EndOfFileToken{});