srctree

Robin Linden parent 490ff755 005c421e
html2: Tokenize self-closing start tags in a spec-compliant way

inlinesplit
html2/tokenizer.cpp added: 23, removed: 5, total 18
@@ -1226,10 +1226,11 @@ void Tokenizer::run() {
}
}
 
// https://html.spec.whatwg.org/#self-closing-start-tag-state
case State::SelfClosingStartTag: {
auto c = consume_next_input_character();
if (!c) {
// This is an eof-in-tag parse error.
emit(ParseError::EofInTag);
emit(EndOfFileToken{});
return;
}
@@ -1245,7 +1246,7 @@ void Tokenizer::run() {
emit(std::move(current_token_));
continue;
default:
// This is a missing-whitespace-between-attributes parse error.
emit(ParseError::UnexpectedSolidusInTag);
reconsume_in(State::BeforeAttributeName);
continue;
}
 
html2/tokenizer.h added: 23, removed: 5, total 18
@@ -142,6 +142,7 @@ enum class ParseError {
UnexpectedEqualsSignBeforeAttributeName,
UnexpectedNullCharacter,
UnexpectedQuestionMarkInsteadOfTagName,
UnexpectedSolidusInTag,
UnknownNamedCharacterReference,
};
 
 
html2/tokenizer_test.cpp added: 23, removed: 5, total 18
@@ -585,6 +585,21 @@ void after_attribute_value_quoted_tests() {
});
}
 
void self_closing_start_tag_tests() {
etest::test("self-closing start tag: eof", [] {
auto tokens = run_tokenizer("<p/");
expect_error(tokens, ParseError::EofInTag);
expect_token(tokens, EndOfFileToken{});
});
 
etest::test("self-closing start tag: unexpected solidus", [] {
auto tokens = run_tokenizer("<p/ >");
expect_error(tokens, ParseError::UnexpectedSolidusInTag);
expect_token(tokens, StartTagToken{"p"});
expect_token(tokens, EndOfFileToken{});
});
}
 
} // namespace
 
int main() {
@@ -611,6 +626,7 @@ int main() {
attribute_value_double_quoted_tests();
attribute_value_single_quoted_tests();
after_attribute_value_quoted_tests();
self_closing_start_tag_tests();
 
etest::test("script, empty", [] {
auto tokens = run_tokenizer("<script></script>");