srctree

Robin Linden parent b5a84976 5ce9a007
html2: Update comment tokenizing to emit errors in more places

inlinesplit
html2/tokenizer.cpp added: 19, removed: 8, total 11
@@ -1060,7 +1060,7 @@ void Tokenizer::run() {
state_ = State::CommentStartDash;
continue;
case '>':
// This is an abrupt closing of empty comment parse error.
emit(ParseError::AbruptClosingOfEmptyComment);
state_ = State::Data;
emit(std::move(current_token_));
continue;
@@ -1084,7 +1084,7 @@ void Tokenizer::run() {
state_ = State::CommentEnd;
continue;
case '>':
// This is an abrupt closing of empty comment parse error.
emit(ParseError::AbruptClosingOfEmptyComment);
state_ = State::Data;
emit(std::move(current_token_));
continue;
@@ -1098,7 +1098,7 @@ void Tokenizer::run() {
case State::Comment: {
auto c = consume_next_input_character();
if (!c) {
// This is an eof-in-comment parse error.
emit(ParseError::EofInComment);
emit(std::move(current_token_));
emit(EndOfFileToken{});
return;
@@ -1189,7 +1189,7 @@ void Tokenizer::run() {
reconsume_in(State::CommentEnd);
continue;
default:
// This is a nested-comment parse error.
emit(ParseError::NestedComment);
reconsume_in(State::CommentEnd);
continue;
}
@@ -1257,7 +1257,7 @@ void Tokenizer::run() {
state_ = State::CommentEndDash;
continue;
case '>':
// This is an incorrectly-closed-comment parse error.
emit(ParseError::IncorrectlyClosedComment);
state_ = State::Data;
emit(std::move(current_token_));
continue;
 
html2/tokenizer.h added: 19, removed: 8, total 11
@@ -101,9 +101,12 @@ enum class State {
};
 
enum class ParseError {
AbruptClosingOfEmptyComment,
AbruptDoctypePublicIdentifier,
EofInComment,
EofInDoctype,
EofInTag,
IncorrectlyClosedComment,
InvalidCharacterSequenceAfterDoctypeName,
InvalidFirstCharacterOfTagName,
MissingDoctypePublicIdentifier,
@@ -111,6 +114,7 @@ enum class ParseError {
MissingQuoteBeforeDoctypeSystemIdentifier,
MissingWhitespaceAfterDoctypePublicKeyword,
MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers,
NestedComment,
UnexpectedCharacterInUnquotedAttributeValue,
UnexpectedNullCharacter,
};
 
html2/tokenizer_test.cpp added: 19, removed: 8, total 11
@@ -455,12 +455,14 @@ int main() {
 
etest::test("comment, nested comment", [] {
auto tokens = run_tokenizer("<!--<!---->");
expect_error(tokens, ParseError::NestedComment);
expect_token(tokens, CommentToken{.data = "<!--"});
expect_token(tokens, EndOfFileToken{});
});
 
etest::test("comment, nested comment closed", [] {
auto tokens = run_tokenizer("<!-- <!-- nested --> -->");
expect_error(tokens, ParseError::NestedComment);
expect_token(tokens, CommentToken{.data = " <!-- nested "});
expect_text(tokens, " -->");
expect_token(tokens, EndOfFileToken{});
@@ -468,30 +470,35 @@ int main() {
 
etest::test("comment, abrupt closing in comment start", [] {
auto tokens = run_tokenizer("<!-->");
expect_error(tokens, ParseError::AbruptClosingOfEmptyComment);
expect_token(tokens, CommentToken{.data = ""});
expect_token(tokens, EndOfFileToken{});
});
 
etest::test("comment, abrupt closing in comment start dash", [] {
auto tokens = run_tokenizer("<!--->");
expect_error(tokens, ParseError::AbruptClosingOfEmptyComment);
expect_token(tokens, CommentToken{.data = ""});
expect_token(tokens, EndOfFileToken{});
});
 
etest::test("comment, incorrectly closed comment", [] {
auto tokens = run_tokenizer("<!--abc--!>");
expect_error(tokens, ParseError::IncorrectlyClosedComment);
expect_token(tokens, CommentToken{.data = "abc"});
expect_token(tokens, EndOfFileToken{});
});
 
etest::test("comment, end before comment", [] {
auto tokens = run_tokenizer("<!--");
expect_error(tokens, ParseError::EofInComment);
expect_token(tokens, CommentToken{.data = ""});
expect_token(tokens, EndOfFileToken{});
});
 
etest::test("comment, eof before comment is closed", [] {
auto tokens = run_tokenizer("<!--abc");
expect_error(tokens, ParseError::EofInComment);
expect_token(tokens, CommentToken{.data = "abc"});
expect_token(tokens, EndOfFileToken{});
});