srctree

Robin Linden parent 005c421e c2dcb088
html2: Improve spec compliance of comment tokenization

inlinesplit
html2/tokenizer.cpp added: 57, removed: 6, total 51
@@ -1252,6 +1252,7 @@ void Tokenizer::run() {
}
}
 
// https://html.spec.whatwg.org/#bogus-comment-state
case State::BogusComment: {
auto c = consume_next_input_character();
if (!c) {
@@ -1308,6 +1309,7 @@ void Tokenizer::run() {
state_ = State::BogusComment;
continue;
 
// https://html.spec.whatwg.org/#comment-start-state
case State::CommentStart: {
auto c = consume_next_input_character();
if (!c) {
@@ -1330,10 +1332,11 @@ void Tokenizer::run() {
}
}
 
// https://html.spec.whatwg.org/#comment-start-dash-state
case State::CommentStartDash: {
auto c = consume_next_input_character();
if (!c) {
// This is an eof-in-comment parse error.
emit(ParseError::EofInComment);
emit(std::move(current_token_));
emit(EndOfFileToken{});
return;
@@ -1355,6 +1358,7 @@ void Tokenizer::run() {
}
}
 
// https://html.spec.whatwg.org/#comment-state
case State::Comment: {
auto c = consume_next_input_character();
if (!c) {
@@ -1382,6 +1386,7 @@ void Tokenizer::run() {
}
}
 
// https://html.spec.whatwg.org/#comment-less-than-sign-state
case State::CommentLessThanSign: {
auto c = consume_next_input_character();
if (!c) {
@@ -1403,6 +1408,7 @@ void Tokenizer::run() {
}
}
 
// https://html.spec.whatwg.org/#comment-less-than-sign-bang-state
case State::CommentLessThanSignBang: {
auto c = consume_next_input_character();
if (!c) {
@@ -1420,6 +1426,7 @@ void Tokenizer::run() {
}
}
 
// https://html.spec.whatwg.org/#comment-less-than-sign-bang-dash-state
case State::CommentLessThanSignBangDash: {
auto c = consume_next_input_character();
if (!c) {
@@ -1437,6 +1444,7 @@ void Tokenizer::run() {
}
}
 
// https://html.spec.whatwg.org/#comment-less-than-sign-bang-dash-dash-state
case State::CommentLessThanSignBangDashDash: {
auto c = consume_next_input_character();
if (!c) {
@@ -1455,10 +1463,11 @@ void Tokenizer::run() {
}
}
 
// https://html.spec.whatwg.org/#comment-end-dash-state
case State::CommentEndDash: {
auto c = consume_next_input_character();
if (!c) {
// This is an eof-in-comment parse error.
emit(ParseError::EofInComment);
emit(std::move(current_token_));
emit(EndOfFileToken{});
return;
@@ -1475,10 +1484,11 @@ void Tokenizer::run() {
}
}
 
// https://html.spec.whatwg.org/#comment-end-state
case State::CommentEnd: {
auto c = consume_next_input_character();
if (!c) {
// This is an eof-in-comment parse error.
emit(ParseError::EofInComment);
emit(std::move(current_token_));
emit(EndOfFileToken{});
return;
@@ -1502,10 +1512,11 @@ void Tokenizer::run() {
}
}
 
// https://html.spec.whatwg.org/#comment-end-bang-state
case State::CommentEndBang: {
auto c = consume_next_input_character();
if (!c) {
// This is an eof-in-comment parse error.
emit(ParseError::EofInComment);
emit(std::move(current_token_));
emit(EndOfFileToken{});
return;
 
html2/tokenizer_test.cpp added: 57, removed: 6, total 51
@@ -600,6 +600,42 @@ void self_closing_start_tag_tests() {
});
}
 
void comment_start_dash_tests() {
etest::test("comment start dash: eof", [] {
auto tokens = run_tokenizer("<!---");
expect_error(tokens, ParseError::EofInComment);
expect_token(tokens, CommentToken{});
expect_token(tokens, EndOfFileToken{});
});
}
 
void comment_end_dash_tests() {
etest::test("comment end dash: eof", [] {
auto tokens = run_tokenizer("<!-- -");
expect_error(tokens, ParseError::EofInComment);
expect_token(tokens, CommentToken{" "});
expect_token(tokens, EndOfFileToken{});
});
}
 
void comment_end_tests() {
etest::test("comment end: eof", [] {
auto tokens = run_tokenizer("<!-- --");
expect_error(tokens, ParseError::EofInComment);
expect_token(tokens, CommentToken{" "});
expect_token(tokens, EndOfFileToken{});
});
}
 
void comment_end_bang_tests() {
etest::test("comment end bang: eof", [] {
auto tokens = run_tokenizer("<!-- --!");
expect_error(tokens, ParseError::EofInComment);
expect_token(tokens, CommentToken{" "});
expect_token(tokens, EndOfFileToken{});
});
}
 
} // namespace
 
int main() {
@@ -627,6 +663,10 @@ int main() {
attribute_value_single_quoted_tests();
after_attribute_value_quoted_tests();
self_closing_start_tag_tests();
comment_start_dash_tests();
comment_end_dash_tests();
comment_end_tests();
comment_end_bang_tests();
 
etest::test("script, empty", [] {
auto tokens = run_tokenizer("<script></script>");