srctree

Robin Linden parent f72a8e94 c0462913
html: Set up an interface for the parser actions

inlinesplit
filename was Deleted added: 78, removed: 40, total 38
@@ -0,0 +1,43 @@
// SPDX-FileCopyrightText: 2023 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
#ifndef HTML_IPARSER_ACTIONS_H_
#define HTML_IPARSER_ACTIONS_H_
 
#include "html/parser_states.h"
 
#include "html2/tokenizer.h"
 
#include <span>
#include <string>
#include <string_view>
 
namespace html {
 
enum class QuirksMode {
NoQuirks,
Quirks,
LimitedQuirks,
};
 
class IActions {
public:
virtual ~IActions() = default;
 
virtual void set_doctype_name(std::string) = 0;
virtual void set_quirks_mode(QuirksMode) = 0;
virtual bool scripting() const = 0;
virtual void insert_element_for(html2::StartTagToken const &) = 0;
virtual void pop_current_node() = 0;
virtual std::string_view current_node_name() const = 0;
virtual void merge_into_html_node(std::span<html2::Attribute const>) = 0;
virtual void insert_character(html2::CharacterToken const &) = 0;
virtual void set_tokenizer_state(html2::State) = 0;
virtual void store_original_insertion_mode(InsertionMode) = 0;
virtual InsertionMode original_insertion_mode() = 0;
};
 
} // namespace html
 
#endif
 
html/parser_actions.h added: 78, removed: 40, total 38
@@ -5,6 +5,7 @@
#ifndef HTML_PARSER_ACTIONS_H_
#define HTML_PARSER_ACTIONS_H_
 
#include "html/iparser_actions.h"
#include "html/parser_states.h"
 
#include "dom/dom.h"
@@ -20,13 +21,7 @@
 
namespace html {
 
enum class QuirksMode {
NoQuirks,
Quirks,
LimitedQuirks,
};
 
class Actions {
class Actions : public IActions {
public:
Actions(dom::Document &document,
html2::Tokenizer &tokenizer,
@@ -34,9 +29,9 @@ public:
std::stack<dom::Element *> &open_elements)
: document_{document}, tokenizer_{tokenizer}, scripting_{scripting}, open_elements_{open_elements} {}
 
void set_doctype_name(std::string name) { document_.doctype = std::move(name); }
void set_doctype_name(std::string name) override { document_.doctype = std::move(name); }
 
void set_quirks_mode(QuirksMode mode) {
void set_quirks_mode(QuirksMode mode) override {
document_.mode = [=] {
switch (mode) {
case QuirksMode::NoQuirks:
@@ -50,9 +45,9 @@ public:
}();
}
 
bool scripting() const { return scripting_; }
bool scripting() const override { return scripting_; }
 
void insert_element_for(html2::StartTagToken const &token) {
void insert_element_for(html2::StartTagToken const &token) override {
auto into_dom_attributes = [](std::vector<html2::Attribute> const &attributes) -> dom::AttrMap {
dom::AttrMap attrs{};
for (auto const &[name, value] : attributes) {
@@ -65,10 +60,10 @@ public:
insert({token.tag_name, into_dom_attributes(token.attributes)});
}
 
void pop_current_node() { open_elements_.pop(); }
std::string_view current_node_name() const { return open_elements_.top()->name; }
void pop_current_node() override { open_elements_.pop(); }
std::string_view current_node_name() const override { return open_elements_.top()->name; }
 
void merge_into_html_node(std::span<html2::Attribute const> attrs) {
void merge_into_html_node(std::span<html2::Attribute const> attrs) override {
auto &html = document_.html();
for (auto const &attr : attrs) {
if (html.attributes.contains(attr.name)) {
@@ -79,7 +74,7 @@ public:
}
}
 
void insert_character(html2::CharacterToken const &character) {
void insert_character(html2::CharacterToken const &character) override {
auto &current_element = open_elements_.top();
if (current_element->children.empty() || !std::holds_alternative<dom::Text>(current_element->children.back())) {
current_element->children.emplace_back(dom::Text{});
@@ -88,10 +83,10 @@ public:
std::get<dom::Text>(current_element->children.back()).text += character.data;
}
 
void set_tokenizer_state(html2::State state) { tokenizer_.set_state(state); }
void set_tokenizer_state(html2::State state) override { tokenizer_.set_state(state); }
 
void store_original_insertion_mode(InsertionMode mode) { original_insertion_mode_ = std::move(mode); }
InsertionMode original_insertion_mode() { return std::move(original_insertion_mode_); }
void store_original_insertion_mode(InsertionMode mode) override { original_insertion_mode_ = std::move(mode); }
InsertionMode original_insertion_mode() override { return std::move(original_insertion_mode_); }
 
private:
void insert(dom::Element element) {
 
html/parser_states.cpp added: 78, removed: 40, total 38
@@ -4,7 +4,7 @@
 
#include "html/parser_states.h"
 
#include "html/parser_actions.h"
#include "html/iparser_actions.h"
 
#include "html2/tokenizer.h"
#include "util/string.h"
@@ -132,7 +132,7 @@ constexpr bool is_quirky_when_system_identifier_is_empty(std::string_view public
} // namespace
 
// https://html.spec.whatwg.org/multipage/parsing.html#the-initial-insertion-mode
std::optional<InsertionMode> Initial::process(Actions &a, html2::Token const &token) {
std::optional<InsertionMode> Initial::process(IActions &a, html2::Token const &token) {
if (is_boring_whitespace(token)) {
return {};
}
@@ -171,7 +171,7 @@ std::optional<InsertionMode> Initial::process(Actions &a, html2::Token const &to
}
 
// https://html.spec.whatwg.org/multipage/parsing.html#the-before-html-insertion-mode
std::optional<InsertionMode> BeforeHtml::process(Actions &a, html2::Token const &token) {
std::optional<InsertionMode> BeforeHtml::process(IActions &a, html2::Token const &token) {
if (std::holds_alternative<html2::CommentToken>(token)) {
// TODO(robinlinden): Insert as last child.
return {};
@@ -191,7 +191,7 @@ std::optional<InsertionMode> BeforeHtml::process(Actions &a, html2::Token const
}
 
// https://html.spec.whatwg.org/multipage/parsing.html#the-before-head-insertion-mode
std::optional<InsertionMode> BeforeHead::process(Actions &a, html2::Token const &token) {
std::optional<InsertionMode> BeforeHead::process(IActions &a, html2::Token const &token) {
if (is_boring_whitespace(token)) {
return {};
}
@@ -213,7 +213,7 @@ std::optional<InsertionMode> BeforeHead::process(Actions &a, html2::Token const
}
 
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead
std::optional<InsertionMode> InHead::process(Actions &a, html2::Token const &token) {
std::optional<InsertionMode> InHead::process(IActions &a, html2::Token const &token) {
if (is_boring_whitespace(token)) {
// TODO(robinlinden): Should be inserting characters, but our last
// parser didn't do that so it will require rewriting tests.
@@ -280,7 +280,7 @@ std::optional<InsertionMode> InHead::process(Actions &a, html2::Token const &tok
}
 
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inheadnoscript
std::optional<InsertionMode> InHeadNoscript::process(Actions &a, html2::Token const &token) {
std::optional<InsertionMode> InHeadNoscript::process(IActions &a, html2::Token const &token) {
if (std::holds_alternative<html2::DoctypeToken>(token)) {
// Parse error.
return {};
@@ -319,12 +319,12 @@ std::optional<InsertionMode> InHeadNoscript::process(Actions &a, html2::Token co
return InHead{}.process(a, token).value_or(InHead{});
}
 
std::optional<InsertionMode> AfterHead::process(Actions &, html2::Token const &) {
std::optional<InsertionMode> AfterHead::process(IActions &, html2::Token const &) {
return {};
}
 
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
std::optional<InsertionMode> InBody::process(Actions &a, html2::Token const &token) {
std::optional<InsertionMode> InBody::process(IActions &a, html2::Token const &token) {
if (auto const *start = std::get_if<html2::StartTagToken>(&token); start && start->tag_name == "html") {
// Parse error.
// TODO(robinlinden): If there is a template element on the stack of open elements, then ignore the token.
@@ -337,7 +337,7 @@ std::optional<InsertionMode> InBody::process(Actions &a, html2::Token const &tok
return {};
}
 
std::optional<InsertionMode> Text::process(Actions &a, html2::Token const &token) {
std::optional<InsertionMode> Text::process(IActions &a, html2::Token const &token) {
if (auto const *character = std::get_if<html2::CharacterToken>(&token)) {
assert(character->data != '\0');
a.insert_character(*character);
 
html/parser_states.h added: 78, removed: 40, total 38
@@ -11,7 +11,7 @@
 
namespace html {
 
class Actions;
class IActions;
 
struct Initial;
struct BeforeHtml;
@@ -83,48 +83,48 @@ struct AfterAfterFrameset {};
// https://html.spec.whatwg.org/multipage/parsing.html#the-initial-insertion-mode
// Incomplete.
struct Initial {
std::optional<InsertionMode> process(Actions &, html2::Token const &);
std::optional<InsertionMode> process(IActions &, html2::Token const &);
};
 
// https://html.spec.whatwg.org/multipage/parsing.html#the-before-html-insertion-mode
// Incomplete.
struct BeforeHtml {
std::optional<InsertionMode> process(Actions &, html2::Token const &);
std::optional<InsertionMode> process(IActions &, html2::Token const &);
};
 
// https://html.spec.whatwg.org/multipage/parsing.html#the-before-head-insertion-mode
// Incomplete.
struct BeforeHead {
std::optional<InsertionMode> process(Actions &, html2::Token const &);
std::optional<InsertionMode> process(IActions &, html2::Token const &);
};
 
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead
// Incomplete.
struct InHead {
std::optional<InsertionMode> process(Actions &, html2::Token const &);
std::optional<InsertionMode> process(IActions &, html2::Token const &);
};
 
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inheadnoscript
struct InHeadNoscript {
std::optional<InsertionMode> process(Actions &, html2::Token const &);
std::optional<InsertionMode> process(IActions &, html2::Token const &);
};
 
// https://html.spec.whatwg.org/multipage/parsing.html#the-after-head-insertion-mode
// Incomplete.
struct AfterHead {
std::optional<InsertionMode> process(Actions &, html2::Token const &);
std::optional<InsertionMode> process(IActions &, html2::Token const &);
};
 
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
// Incomplete.
struct InBody {
std::optional<InsertionMode> process(Actions &, html2::Token const &);
std::optional<InsertionMode> process(IActions &, html2::Token const &);
};
 
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata
// Incomplete.
struct Text {
std::optional<InsertionMode> process(Actions &, html2::Token const &);
std::optional<InsertionMode> process(IActions &, html2::Token const &);
};
 
} // namespace html