srctree

Robin Linden parent 12bc83b6 116935f2
dom: Split out xpath into its own header

inlinesplit
browser/gui/app.cpp added: 371, removed: 331, total 40
@@ -7,6 +7,7 @@
#include "css/rule.h"
#include "css/style_sheet.h"
#include "dom/dom.h"
#include "dom/xpath.h"
#include "engine/engine.h"
#include "geom/geom.h"
#include "gfx/color.h"
 
dom/BUILD added: 371, removed: 331, total 40
@@ -3,19 +3,22 @@ load("//bzl:copts.bzl", "HASTUR_COPTS")
 
cc_library(
name = "dom",
srcs = ["dom.cpp"],
hdrs = ["dom.h"],
srcs = glob(
include = ["*.cpp"],
exclude = ["*_test.cpp"],
),
hdrs = glob(["*.h"]),
copts = HASTUR_COPTS,
visibility = ["//visibility:public"],
)
 
cc_test(
name = "dom_test",
[cc_test(
name = src[:-4],
size = "small",
srcs = ["dom_test.cpp"],
srcs = [src],
copts = HASTUR_COPTS,
deps = [
":dom",
"//etest",
],
)
) for src in glob(["*_test.cpp"])]
 
dom/dom.h added: 371, removed: 331, total 40
@@ -5,11 +5,8 @@
#ifndef DOM_DOM_H_
#define DOM_DOM_H_
 
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <functional>
#include <iterator>
#include <map>
#include <string>
#include <string_view>
@@ -66,89 +63,6 @@ inline std::vector<Element const *> dom_children(Element const &e) {
return children;
}
 
// https://developer.mozilla.org/en-US/docs/Web/XPath
// https://en.wikipedia.org/wiki/XPath
template<typename T>
inline std::vector<T const *> nodes_by_xpath(T const &root, std::string_view xpath) {
std::vector<T const *> next_search{&root};
std::vector<T const *> searching{};
std::vector<T const *> goal_nodes{};
 
// We only support xpaths in the form /a/b/c right now.
if (!xpath.starts_with('/')) {
return {};
}
 
static constexpr std::string_view kSeparators{"|/"};
auto is_separator = [](char c) {
return kSeparators.contains(c);
};
 
auto remove_name_segment = [&] {
std::size_t separator_position{xpath.find_first_of(kSeparators)};
if (separator_position == std::string_view::npos) {
xpath = std::string_view{};
return;
}
xpath.remove_prefix(separator_position);
};
 
auto search_children = [&] {
xpath.remove_prefix(1);
for (auto const *node : searching) {
auto name = dom_name(*node);
if (xpath.substr(0, xpath.find_first_of('|')) == name) {
goal_nodes.push_back(node);
continue;
}
 
if (xpath.starts_with(name) && xpath.size() >= name.size() + 1 && is_separator(xpath[name.size()])) {
for (auto const *child : dom_children(*node)) {
next_search.push_back(child);
}
}
}
};
 
auto search_descendants = [&] {
xpath.remove_prefix(2);
for (std::size_t i = 0; i < searching.size(); ++i) {
auto const *node = searching[i];
 
auto name = dom_name(*node);
if (xpath.substr(0, xpath.find_first_of('|')) == name) {
// TODO(robinlinden): Less terrible way of deduplicating goal nodes.
if (std::ranges::find(goal_nodes, node) == end(goal_nodes)) {
goal_nodes.push_back(node);
}
} else if (xpath.starts_with(name) && xpath.size() >= name.size() + 1 && is_separator(xpath[name.size()])) {
std::ranges::move(dom_children(*node), std::back_inserter(next_search));
}
 
// Pretty gross, but we want to perform the search in tree order.
std::ranges::move(dom_children(*node), std::insert_iterator(searching, next(begin(searching), i + 1)));
}
};
 
while (!next_search.empty() && !xpath.empty()) {
searching.swap(next_search);
next_search.clear();
if (xpath.starts_with("//")) {
search_descendants();
} else if (xpath.starts_with('/')) {
search_children();
}
remove_name_segment();
 
if (xpath.starts_with('|')) {
next_search = {&root};
xpath.remove_prefix(1);
}
}
 
return goal_nodes;
}
 
std::string to_string(Document const &);
 
} // namespace dom
 
dom/dom_test.cpp added: 371, removed: 331, total 40
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2021-2023 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2021-2024 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
@@ -6,111 +6,14 @@
 
#include "etest/etest.h"
 
#include <string>
#include <string_view>
#include <vector>
 
using namespace std::literals;
 
using dom::Element;
using dom::Text;
using etest::expect;
using etest::expect_eq;
using etest::require;
 
namespace {
std::vector<dom::Element const *> nodes_by_xpath(dom::Node const &root, std::string_view xpath) {
return nodes_by_xpath(std::get<dom::Element>(root), xpath);
}
 
void descendant_axis_tests() {
etest::test("descendant axis, root node match", [] {
dom::Element dom{"div"};
auto nodes = nodes_by_xpath(dom, "div");
expect(nodes.empty());
 
nodes = nodes_by_xpath(dom, "//div");
expect_eq(*nodes.at(0), dom);
});
 
etest::test("descendant axis, nested matches", [] {
dom::Element const &first{"div", {}, {dom::Element{"div", {}, {dom::Element{"div"}}}}};
auto const &second = std::get<dom::Element>(first.children[0]);
auto const &third = std::get<dom::Element>(second.children[0]);
 
auto nodes = nodes_by_xpath(first, "//div");
expect_eq(nodes, std::vector{&first, &second, &third});
 
nodes = nodes_by_xpath(first, "//div/div");
expect_eq(nodes, std::vector{&second, &third});
 
nodes = nodes_by_xpath(first, "//div//div");
expect_eq(nodes, std::vector{&second, &third});
});
 
etest::test("descendant axis, no matches", [] {
dom::Element dom{"div"};
auto nodes = nodes_by_xpath(dom, "//p");
expect(nodes.empty());
});
 
etest::test("descendant axis, mixed child and descendant axes", [] {
dom::Element div{
.name{"div"},
.children{
dom::Element{"span", {}, {dom::Text{"oh no"}}},
dom::Element{"p", {}, {dom::Element{"span", {}, {dom::Element{"a"}}}}},
dom::Element{"span"},
},
};
 
auto const &div_first_span = std::get<dom::Element>(div.children[0]);
auto const &p = std::get<dom::Element>(div.children[1]);
auto const &p_span = std::get<dom::Element>(p.children[0]);
auto const &p_span_a = std::get<dom::Element>(p_span.children[0]);
auto const &div_last_span = std::get<dom::Element>(div.children[2]);
 
auto nodes = nodes_by_xpath(div, "//p");
expect_eq(nodes, std::vector{&p});
 
nodes = nodes_by_xpath(div, "//p/span");
expect_eq(nodes, std::vector{&p_span});
 
nodes = nodes_by_xpath(div, "/div/p//a");
expect_eq(nodes, std::vector{&p_span_a});
 
nodes = nodes_by_xpath(div, "//span");
expect_eq(nodes, std::vector{&div_first_span, &p_span, &div_last_span});
});
}
 
void union_operator_tests() {
etest::test("union operator", [] {
dom::Element div{
.name{"div"},
.children{
dom::Element{"span", {}, {dom::Text{"oh no"}}},
dom::Element{"p", {}, {dom::Element{"span", {}, {dom::Element{"a"}}}}},
dom::Element{"span"},
},
};
 
auto const &div_first_span = std::get<dom::Element>(div.children[0]);
auto const &p = std::get<dom::Element>(div.children[1]);
auto const &p_span = std::get<dom::Element>(p.children[0]);
auto const &div_last_span = std::get<dom::Element>(div.children[2]);
 
auto nodes = nodes_by_xpath(div, "/div/p|//span");
expect_eq(nodes, std::vector{&p, &div_first_span, &p_span, &div_last_span});
});
}
 
} // namespace
 
int main() {
descendant_axis_tests();
union_operator_tests();
 
etest::test("to_string", [] {
auto document = dom::Document{.doctype{"html5"}};
document.html_node = dom::Element{.name{"span"}, .children{{dom::Text{"hello"}}}};
@@ -118,133 +21,5 @@ int main() {
expect_eq(to_string(document), expected);
});
 
etest::test("unsupported xpaths don't return anything", [] {
dom::Node dom = dom::Element{"div"};
auto nodes = nodes_by_xpath(dom, "div");
expect(nodes.empty());
});
 
etest::test("no matches", [] {
auto const dom_root = dom::Element{
.name{"html"},
.children{
Element{.name{"head"}},
Element{.name{"body"}, .children{Element{.name{"p"}}}},
},
};
 
auto const nodes = nodes_by_xpath(dom_root, "/html/body/a");
expect(nodes.empty());
});
 
etest::test("root match", [] {
auto const dom_root = dom::Element{
.name{"html"},
.children{
Element{.name{"head"}},
Element{.name{"body"}, .children{Element{.name{"p"}}}},
},
};
 
auto const nodes = nodes_by_xpath(dom_root, "/html");
require(nodes.size() == 1);
expect(nodes[0]->name == "html");
});
 
etest::test("path with one element node", [] {
auto const dom_root = dom::Element{
.name{"html"},
.children{
Element{.name{"head"}},
Element{.name{"body"}, .children{Element{.name{"p"}}}},
},
};
 
auto const nodes = nodes_by_xpath(dom_root, "/html/body/p");
require(nodes.size() == 1);
expect(nodes[0]->name == "p");
});
 
etest::test("path with multiple element nodes", [] {
auto const dom_root = dom::Element{
.name{"html"},
.children{
Element{.name{"head"}},
Element{
.name{"body"},
.children{
Element{.name{"p"}},
Element{.name{"p"}, .attributes{{"display", "none"}}},
},
},
},
};
 
auto const nodes = nodes_by_xpath(dom_root, "/html/body/p");
require(nodes.size() == 2);
 
auto const first = *nodes[0];
expect(first.name == "p");
expect(first.attributes.empty());
 
auto const second = *nodes[1];
expect(second.name == "p");
expect(second.attributes.size() == 1);
expect(second.attributes.at("display") == "none");
});
 
etest::test("matching nodes in different branches", [] {
auto const dom_root = dom::Element{
.name{"html"},
.children{
Element{.name{"head"}},
Element{
.name{"body"},
.children{
Element{
.name{"div"},
.children{Element{.name{"p"}, .attributes{{"display", "none"}}}},
},
Element{
.name{"span"},
.children{Element{.name{"p"}, .attributes{{"display", "inline"}}}},
},
Element{
.name{"div"},
.children{Element{.name{"p"}, .attributes{{"display", "block"}}}},
},
},
},
},
};
 
auto const nodes = nodes_by_xpath(dom_root, "/html/body/div/p");
require(nodes.size() == 2);
 
auto const first = *nodes[0];
expect(first.name == "p");
expect(first.attributes.size() == 1);
expect(first.attributes.at("display") == "none");
 
auto const second = *nodes[1];
expect(second.name == "p");
expect(second.attributes.size() == 1);
expect(second.attributes.at("display") == "block");
});
 
etest::test("non-element node in search path", [] {
auto const dom_root = dom::Element{
.name{"html"},
.children{
Element{.name{"head"}},
Text{"I don't belong here. :("},
Element{.name{"body"}, .children{Element{.name{"p"}}}},
},
};
 
auto const nodes = nodes_by_xpath(dom_root, "/html/body/p");
expect(nodes.size() == 1);
});
 
return etest::run_all_tests();
}
 
filename was Deleted added: 371, removed: 331, total 40
@@ -0,0 +1,101 @@
// SPDX-FileCopyrightText: 2021-2024 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
#ifndef DOM_XPATH_H_
#define DOM_XPATH_H_
 
#include <algorithm>
#include <cstddef>
#include <iterator>
#include <string_view>
#include <vector>
 
namespace dom {
 
// https://developer.mozilla.org/en-US/docs/Web/XPath
// https://en.wikipedia.org/wiki/XPath
template<typename T>
inline std::vector<T const *> nodes_by_xpath(T const &root, std::string_view xpath) {
std::vector<T const *> next_search{&root};
std::vector<T const *> searching{};
std::vector<T const *> goal_nodes{};
 
// We only support xpaths in the form /a/b/c right now.
if (!xpath.starts_with('/')) {
return {};
}
 
static constexpr std::string_view kSeparators{"|/"};
auto is_separator = [](char c) {
return kSeparators.contains(c);
};
 
auto remove_name_segment = [&] {
std::size_t separator_position{xpath.find_first_of(kSeparators)};
if (separator_position == std::string_view::npos) {
xpath = std::string_view{};
return;
}
xpath.remove_prefix(separator_position);
};
 
auto search_children = [&] {
xpath.remove_prefix(1);
for (auto const *node : searching) {
auto name = dom_name(*node);
if (xpath.substr(0, xpath.find_first_of('|')) == name) {
goal_nodes.push_back(node);
continue;
}
 
if (xpath.starts_with(name) && xpath.size() >= name.size() + 1 && is_separator(xpath[name.size()])) {
for (auto const *child : dom_children(*node)) {
next_search.push_back(child);
}
}
}
};
 
auto search_descendants = [&] {
xpath.remove_prefix(2);
for (std::size_t i = 0; i < searching.size(); ++i) {
auto const *node = searching[i];
 
auto name = dom_name(*node);
if (xpath.substr(0, xpath.find_first_of('|')) == name) {
// TODO(robinlinden): Less terrible way of deduplicating goal nodes.
if (std::ranges::find(goal_nodes, node) == end(goal_nodes)) {
goal_nodes.push_back(node);
}
} else if (xpath.starts_with(name) && xpath.size() >= name.size() + 1 && is_separator(xpath[name.size()])) {
std::ranges::move(dom_children(*node), std::back_inserter(next_search));
}
 
// Pretty gross, but we want to perform the search in tree order.
std::ranges::move(dom_children(*node), std::insert_iterator(searching, next(begin(searching), i + 1)));
}
};
 
while (!next_search.empty() && !xpath.empty()) {
searching.swap(next_search);
next_search.clear();
if (xpath.starts_with("//")) {
search_descendants();
} else if (xpath.starts_with('/')) {
search_children();
}
remove_name_segment();
 
if (xpath.starts_with('|')) {
next_search = {&root};
xpath.remove_prefix(1);
}
}
 
return goal_nodes;
}
 
} // namespace dom
 
#endif
 
filename was Deleted added: 371, removed: 331, total 40
@@ -0,0 +1,242 @@
// SPDX-FileCopyrightText: 2021-2024 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
#include "dom/xpath.h"
 
#include "dom/dom.h"
 
#include "etest/etest.h"
 
#include <string_view>
#include <vector>
 
using dom::Element;
using dom::Text;
using etest::expect;
using etest::expect_eq;
using etest::require;
 
namespace {
std::vector<dom::Element const *> nodes_by_xpath(dom::Node const &root, std::string_view xpath) {
return nodes_by_xpath(std::get<dom::Element>(root), xpath);
}
 
void descendant_axis_tests() {
etest::test("descendant axis, root node match", [] {
dom::Element dom{"div"};
auto nodes = nodes_by_xpath(dom, "div");
expect(nodes.empty());
 
nodes = nodes_by_xpath(dom, "//div");
expect_eq(*nodes.at(0), dom);
});
 
etest::test("descendant axis, nested matches", [] {
dom::Element const &first{"div", {}, {dom::Element{"div", {}, {dom::Element{"div"}}}}};
auto const &second = std::get<dom::Element>(first.children[0]);
auto const &third = std::get<dom::Element>(second.children[0]);
 
auto nodes = nodes_by_xpath(first, "//div");
expect_eq(nodes, std::vector{&first, &second, &third});
 
nodes = nodes_by_xpath(first, "//div/div");
expect_eq(nodes, std::vector{&second, &third});
 
nodes = nodes_by_xpath(first, "//div//div");
expect_eq(nodes, std::vector{&second, &third});
});
 
etest::test("descendant axis, no matches", [] {
dom::Element dom{"div"};
auto nodes = nodes_by_xpath(dom, "//p");
expect(nodes.empty());
});
 
etest::test("descendant axis, mixed child and descendant axes", [] {
dom::Element div{
.name{"div"},
.children{
dom::Element{"span", {}, {dom::Text{"oh no"}}},
dom::Element{"p", {}, {dom::Element{"span", {}, {dom::Element{"a"}}}}},
dom::Element{"span"},
},
};
 
auto const &div_first_span = std::get<dom::Element>(div.children[0]);
auto const &p = std::get<dom::Element>(div.children[1]);
auto const &p_span = std::get<dom::Element>(p.children[0]);
auto const &p_span_a = std::get<dom::Element>(p_span.children[0]);
auto const &div_last_span = std::get<dom::Element>(div.children[2]);
 
auto nodes = nodes_by_xpath(div, "//p");
expect_eq(nodes, std::vector{&p});
 
nodes = nodes_by_xpath(div, "//p/span");
expect_eq(nodes, std::vector{&p_span});
 
nodes = nodes_by_xpath(div, "/div/p//a");
expect_eq(nodes, std::vector{&p_span_a});
 
nodes = nodes_by_xpath(div, "//span");
expect_eq(nodes, std::vector{&div_first_span, &p_span, &div_last_span});
});
}
 
void union_operator_tests() {
etest::test("union operator", [] {
dom::Element div{
.name{"div"},
.children{
dom::Element{"span", {}, {dom::Text{"oh no"}}},
dom::Element{"p", {}, {dom::Element{"span", {}, {dom::Element{"a"}}}}},
dom::Element{"span"},
},
};
 
auto const &div_first_span = std::get<dom::Element>(div.children[0]);
auto const &p = std::get<dom::Element>(div.children[1]);
auto const &p_span = std::get<dom::Element>(p.children[0]);
auto const &div_last_span = std::get<dom::Element>(div.children[2]);
 
auto nodes = nodes_by_xpath(div, "/div/p|//span");
expect_eq(nodes, std::vector{&p, &div_first_span, &p_span, &div_last_span});
});
}
 
} // namespace
 
int main() {
descendant_axis_tests();
union_operator_tests();
 
etest::test("unsupported xpaths don't return anything", [] {
dom::Node dom = dom::Element{"div"};
auto nodes = nodes_by_xpath(dom, "div");
expect(nodes.empty());
});
 
etest::test("no matches", [] {
auto const dom_root = dom::Element{
.name{"html"},
.children{
Element{.name{"head"}},
Element{.name{"body"}, .children{Element{.name{"p"}}}},
},
};
 
auto const nodes = nodes_by_xpath(dom_root, "/html/body/a");
expect(nodes.empty());
});
 
etest::test("root match", [] {
auto const dom_root = dom::Element{
.name{"html"},
.children{
Element{.name{"head"}},
Element{.name{"body"}, .children{Element{.name{"p"}}}},
},
};
 
auto const nodes = nodes_by_xpath(dom_root, "/html");
require(nodes.size() == 1);
expect(nodes[0]->name == "html");
});
 
etest::test("path with one element node", [] {
auto const dom_root = dom::Element{
.name{"html"},
.children{
Element{.name{"head"}},
Element{.name{"body"}, .children{Element{.name{"p"}}}},
},
};
 
auto const nodes = nodes_by_xpath(dom_root, "/html/body/p");
require(nodes.size() == 1);
expect(nodes[0]->name == "p");
});
 
etest::test("path with multiple element nodes", [] {
auto const dom_root = dom::Element{
.name{"html"},
.children{
Element{.name{"head"}},
Element{
.name{"body"},
.children{
Element{.name{"p"}},
Element{.name{"p"}, .attributes{{"display", "none"}}},
},
},
},
};
 
auto const nodes = nodes_by_xpath(dom_root, "/html/body/p");
require(nodes.size() == 2);
 
auto const first = *nodes[0];
expect(first.name == "p");
expect(first.attributes.empty());
 
auto const second = *nodes[1];
expect(second.name == "p");
expect(second.attributes.size() == 1);
expect(second.attributes.at("display") == "none");
});
 
etest::test("matching nodes in different branches", [] {
auto const dom_root = dom::Element{
.name{"html"},
.children{
Element{.name{"head"}},
Element{
.name{"body"},
.children{
Element{
.name{"div"},
.children{Element{.name{"p"}, .attributes{{"display", "none"}}}},
},
Element{
.name{"span"},
.children{Element{.name{"p"}, .attributes{{"display", "inline"}}}},
},
Element{
.name{"div"},
.children{Element{.name{"p"}, .attributes{{"display", "block"}}}},
},
},
},
},
};
 
auto const nodes = nodes_by_xpath(dom_root, "/html/body/div/p");
require(nodes.size() == 2);
 
auto const first = *nodes[0];
expect(first.name == "p");
expect(first.attributes.size() == 1);
expect(first.attributes.at("display") == "none");
 
auto const second = *nodes[1];
expect(second.name == "p");
expect(second.attributes.size() == 1);
expect(second.attributes.at("display") == "block");
});
 
etest::test("non-element node in search path", [] {
auto const dom_root = dom::Element{
.name{"html"},
.children{
Element{.name{"head"}},
Text{"I don't belong here. :("},
Element{.name{"body"}, .children{Element{.name{"p"}}}},
},
};
 
auto const nodes = nodes_by_xpath(dom_root, "/html/body/p");
expect(nodes.size() == 1);
});
 
return etest::run_all_tests();
}
 
engine/engine.cpp added: 371, removed: 331, total 40
@@ -11,6 +11,7 @@
#include "css/parser.h"
#include "css/style_sheet.h"
#include "dom/dom.h"
#include "dom/xpath.h"
#include "html/parser.h"
#include "layout/layout.h"
#include "protocol/response.h"
 
engine/engine_test.cpp added: 371, removed: 331, total 40
@@ -7,6 +7,7 @@
#include "css/property_id.h"
#include "css/rule.h"
#include "dom/dom.h"
#include "dom/xpath.h"
#include "etest/etest.h"
#include "gfx/color.h"
#include "protocol/iprotocol_handler.h"
 
layout/layout_box_test.cpp added: 371, removed: 331, total 40
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2021-2023 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2021-2024 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2022 Mikael Larsson <c.mikael.larsson@gmail.com>
//
// SPDX-License-Identifier: BSD-2-Clause
@@ -9,6 +9,7 @@
 
#include "css/property_id.h"
#include "dom/dom.h"
#include "dom/xpath.h"
#include "etest/etest.h"
#include "style/styled_node.h"
 
 
render/render.cpp added: 371, removed: 331, total 40
@@ -6,7 +6,7 @@
#include "render/render.h"
 
#include "css/property_id.h"
#include "dom/dom.h"
#include "dom/xpath.h"
#include "geom/geom.h"
#include "gfx/color.h"
#include "gfx/font.h"
 
style/styled_node_test.cpp added: 371, removed: 331, total 40
@@ -6,6 +6,7 @@
 
#include "css/property_id.h"
#include "dom/dom.h"
#include "dom/xpath.h"
#include "etest/etest.h"
#include "gfx/color.h"