srctree

Robin Linden parent c0462913 8467d647
html+html2: Move spec-compliant parsing bits from //html to //html2

inlinesplit
html/parser.cpp added: 57, removed: 44, total 13
@@ -97,7 +97,7 @@ constexpr std::array kDisallowsParagraphEndTagOmissionWhenClosed{
 
void Parser::on_token(html2::Tokenizer &, html2::Token &&token) {
// Everything in <head> and earlier is handled by the new parser.
if (!std::holds_alternative<AfterHead>(insertion_mode_)) {
if (!std::holds_alternative<html2::AfterHead>(insertion_mode_)) {
insertion_mode_ = std::visit([&](auto &mode) { return mode.process(actions_, token); }, insertion_mode_)
.value_or(insertion_mode_);
if (auto const *end = std::get_if<html2::EndTagToken>(&token); end != nullptr && end->tag_name == "head") {
@@ -105,7 +105,7 @@ void Parser::on_token(html2::Tokenizer &, html2::Token &&token) {
}
}
 
if (std::holds_alternative<AfterHead>(insertion_mode_)) {
if (std::holds_alternative<html2::AfterHead>(insertion_mode_)) {
std::visit(*this, token);
}
}
 
html/parser.h added: 57, removed: 44, total 13
@@ -6,9 +6,9 @@
#define HTML_PARSER_H_
 
#include "html/parser_actions.h"
#include "html/parser_states.h"
 
#include "dom/dom.h"
#include "html2/parser_states.h"
#include "html2/tokenizer.h"
 
#include <functional>
@@ -56,7 +56,7 @@ private:
std::stack<dom::Element *> open_elements_{};
std::stringstream current_text_{};
bool scripting_{false};
InsertionMode insertion_mode_{};
html2::InsertionMode insertion_mode_{};
Actions actions_{doc_, tokenizer_, scripting_, open_elements_};
};
 
 
html/parser_actions.h added: 57, removed: 44, total 13
@@ -5,10 +5,9 @@
#ifndef HTML_PARSER_ACTIONS_H_
#define HTML_PARSER_ACTIONS_H_
 
#include "html/iparser_actions.h"
#include "html/parser_states.h"
 
#include "dom/dom.h"
#include "html2/iparser_actions.h"
#include "html2/parser_states.h"
#include "html2/tokenizer.h"
 
#include <algorithm>
@@ -21,7 +20,7 @@
 
namespace html {
 
class Actions : public IActions {
class Actions : public html2::IActions {
public:
Actions(dom::Document &document,
html2::Tokenizer &tokenizer,
@@ -31,14 +30,14 @@ public:
 
void set_doctype_name(std::string name) override { document_.doctype = std::move(name); }
 
void set_quirks_mode(QuirksMode mode) override {
void set_quirks_mode(html2::QuirksMode mode) override {
document_.mode = [=] {
switch (mode) {
case QuirksMode::NoQuirks:
case html2::QuirksMode::NoQuirks:
return dom::Document::Mode::NoQuirks;
case QuirksMode::Quirks:
case html2::QuirksMode::Quirks:
return dom::Document::Mode::Quirks;
case QuirksMode::LimitedQuirks:
case html2::QuirksMode::LimitedQuirks:
break;
}
return dom::Document::Mode::LimitedQuirks;
@@ -85,8 +84,11 @@ public:
 
void set_tokenizer_state(html2::State state) override { tokenizer_.set_state(state); }
 
void store_original_insertion_mode(InsertionMode mode) override { original_insertion_mode_ = std::move(mode); }
InsertionMode original_insertion_mode() override { return std::move(original_insertion_mode_); }
void store_original_insertion_mode(html2::InsertionMode mode) override {
original_insertion_mode_ = std::move(mode);
}
 
html2::InsertionMode original_insertion_mode() override { return std::move(original_insertion_mode_); }
 
private:
void insert(dom::Element element) {
@@ -105,7 +107,7 @@ private:
dom::Document &document_;
html2::Tokenizer &tokenizer_;
bool scripting_;
InsertionMode original_insertion_mode_;
html2::InsertionMode original_insertion_mode_;
std::stack<dom::Element *> &open_elements_;
};
 
 
html2/BUILD added: 57, removed: 44, total 13
@@ -18,6 +18,14 @@ cc_library(
],
)
 
# TODO(robinlinden): Remove.
extra_deps = {
"parser_states": [
"//dom",
"//html",
],
}
 
[cc_test(
name = src[:-4],
size = "small",
@@ -27,7 +35,10 @@ cc_library(
":html2",
"//etest",
"@fmt",
],
] + extra_deps.get(
src[:-9],
[],
),
) for src in glob(
include = ["*_test.cpp"],
exclude = ["*_fuzz_test.cpp"],
 
html/iparser_actions.h added: 57, removed: 44, total 13
@@ -2,18 +2,17 @@
//
// SPDX-License-Identifier: BSD-2-Clause
 
#ifndef HTML_IPARSER_ACTIONS_H_
#define HTML_IPARSER_ACTIONS_H_
 
#include "html/parser_states.h"
#ifndef HTML2_IPARSER_ACTIONS_H_
#define HTML2_IPARSER_ACTIONS_H_
 
#include "html2/parser_states.h"
#include "html2/tokenizer.h"
 
#include <span>
#include <string>
#include <string_view>
 
namespace html {
namespace html2 {
 
enum class QuirksMode {
NoQuirks,
@@ -38,6 +37,6 @@ public:
virtual InsertionMode original_insertion_mode() = 0;
};
 
} // namespace html
} // namespace html2
 
#endif
 
html/parser_states.cpp added: 57, removed: 44, total 13
@@ -2,11 +2,11 @@
//
// SPDX-License-Identifier: BSD-2-Clause
 
#include "html/parser_states.h"
 
#include "html/iparser_actions.h"
#include "html2/parser_states.h"
 
#include "html2/iparser_actions.h"
#include "html2/tokenizer.h"
 
#include "util/string.h"
 
#include <array>
@@ -18,7 +18,7 @@
 
using namespace std::literals;
 
namespace html {
namespace html2 {
namespace {
 
// A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE
@@ -352,4 +352,4 @@ std::optional<InsertionMode> Text::process(IActions &a, html2::Token const &toke
return {};
}
 
} // namespace html
} // namespace html2
 
html/parser_states.h added: 57, removed: 44, total 13
@@ -2,14 +2,14 @@
//
// SPDX-License-Identifier: BSD-2-Clause
 
#ifndef HTML_PARSER_STATES_H_
#define HTML_PARSER_STATES_H_
#ifndef HTML2_PARSER_STATES_H_
#define HTML2_PARSER_STATES_H_
 
#include "html2/tokenizer.h"
 
#include <variant>
 
namespace html {
namespace html2 {
 
class IActions;
 
@@ -127,6 +127,6 @@ struct Text {
std::optional<InsertionMode> process(IActions &, html2::Token const &);
};
 
} // namespace html
} // namespace html2
 
#endif
 
html/parser_states_test.cpp added: 57, removed: 44, total 13
@@ -2,11 +2,12 @@
//
// SPDX-License-Identifier: BSD-2-Clause
 
#include "html/parser_actions.h"
#include "html/parser_states.h"
#include "html2/parser_states.h"
 
#include "html2/tokenizer.h"
 
#include "etest/etest.h"
#include "html2/tokenizer.h"
#include "html/parser_actions.h"
 
using etest::expect_eq;
 
@@ -18,7 +19,7 @@ struct ParseResult {
};
 
struct ParseOptions {
html::InsertionMode initial_insertion_mode{};
html2::InsertionMode initial_insertion_mode{};
bool scripting{false};
};
 
@@ -28,7 +29,7 @@ ParseResult parse(std::string_view html, ParseOptions opts) {
}};
 
ParseResult res{};
html::InsertionMode mode{opts.initial_insertion_mode};
html2::InsertionMode mode{opts.initial_insertion_mode};
std::stack<dom::Element *> open_elements{};
html::Actions actions{res.document, tokenizer, opts.scripting, open_elements};