srctree

Robin Linden parent 7da6a085 d0e3e825
dom: Add support for the descendant xpath axis

inlinesplit
dom/dom.h added: 105, removed: 17, total 88
@@ -5,7 +5,9 @@
#ifndef DOM_DOM_H_
#define DOM_DOM_H_
 
#include <algorithm>
#include <cstddef>
#include <iterator>
#include <map>
#include <string>
#include <string_view>
@@ -67,6 +69,15 @@ inline std::vector<T const *> nodes_by_xpath(T const &root, std::string_view xpa
return {};
}
 
auto remove_name_segment = [&] {
std::size_t separator_position{xpath.find_first_of("/")};
if (separator_position == std::string_view::npos) {
xpath = std::string_view{};
return;
}
xpath.remove_prefix(separator_position);
};
 
auto search_children = [&] {
xpath.remove_prefix(1);
for (auto node : searching) {
@@ -82,21 +93,37 @@ inline std::vector<T const *> nodes_by_xpath(T const &root, std::string_view xpa
}
}
}
};
 
// Remove name.
std::size_t separator_position{xpath.find_first_of("/")};
if (separator_position == xpath.npos) {
xpath = std::string_view{};
return;
auto search_descendants = [&] {
xpath.remove_prefix(2);
for (std::size_t i = 0; i < searching.size(); ++i) {
auto const *node = searching[i];
 
auto name = dom_name(*node);
if (xpath == name) {
// TODO(robinlinden): Less terrible way of deduplicating goal nodes.
if (std::ranges::find(goal_nodes, node) == end(goal_nodes)) {
goal_nodes.push_back(node);
}
} else if (xpath.starts_with(name) && xpath.size() >= name.size() + 1 && xpath[name.size()] == '/') {
std::ranges::move(dom_children(*node), std::back_inserter(next_search));
}
 
// Pretty gross, but we want to perform the search in tree order.
std::ranges::move(dom_children(*node), std::insert_iterator(searching, next(begin(searching), i + 1)));
}
 
xpath.remove_prefix(separator_position);
};
 
while (!next_search.empty() && !xpath.empty()) {
searching.swap(next_search);
next_search.clear();
search_children();
if (xpath.starts_with("//")) {
search_descendants();
} else if (xpath.starts_with('/')) {
search_children();
}
remove_name_segment();
}
 
return goal_nodes;
 
dom/dom_test.cpp added: 105, removed: 17, total 88
@@ -24,9 +24,73 @@ dom::Node create_element_node(std::string_view name, dom::AttrMap attrs, std::ve
std::vector<dom::Element const *> nodes_by_xpath(dom::Node const &root, std::string_view xpath) {
return nodes_by_xpath(std::get<dom::Element>(root), xpath);
}
 
void descendant_axis_tests() {
etest::test("descendant axis, root node match", [] {
dom::Element dom{"div"};
auto nodes = nodes_by_xpath(dom, "div");
expect(nodes.empty());
 
nodes = nodes_by_xpath(dom, "//div");
expect_eq(*nodes.at(0), dom);
});
 
etest::test("descendant axis, nested matches", [] {
dom::Element const &first{"div", {}, {dom::Element{"div", {}, {dom::Element{"div"}}}}};
dom::Element const &second = std::get<dom::Element>(first.children[0]);
dom::Element const &third = std::get<dom::Element>(second.children[0]);
 
auto nodes = nodes_by_xpath(first, "//div");
expect_eq(nodes, std::vector{&first, &second, &third});
 
nodes = nodes_by_xpath(first, "//div/div");
expect_eq(nodes, std::vector{&second, &third});
 
nodes = nodes_by_xpath(first, "//div//div");
expect_eq(nodes, std::vector{&second, &third});
});
 
etest::test("descendant axis, no matches", [] {
dom::Element dom{"div"};
auto nodes = nodes_by_xpath(dom, "//p");
expect(nodes.empty());
});
 
etest::test("descendant axis, mixed child and descendant axes", [] {
dom::Element div{
.name{"div"},
.children{
dom::Element{"span", {}, {dom::Text{"oh no"}}},
dom::Element{"p", {}, {dom::Element{"span", {}, {dom::Element{"a"}}}}},
dom::Element{"span"},
},
};
 
dom::Element const &div_first_span = std::get<dom::Element>(div.children[0]);
dom::Element const &p = std::get<dom::Element>(div.children[1]);
dom::Element const &p_span = std::get<dom::Element>(p.children[0]);
dom::Element const &p_span_a = std::get<dom::Element>(p_span.children[0]);
dom::Element const &div_last_span = std::get<dom::Element>(div.children[2]);
 
auto nodes = nodes_by_xpath(div, "//p");
expect_eq(nodes, std::vector{&p});
 
nodes = nodes_by_xpath(div, "//p/span");
expect_eq(nodes, std::vector{&p_span});
 
nodes = nodes_by_xpath(div, "/div/p//a");
expect_eq(nodes, std::vector{&p_span_a});
 
nodes = nodes_by_xpath(div, "//span");
expect_eq(nodes, std::vector{&div_first_span, &p_span, &div_last_span});
});
}
 
} // namespace
 
int main() {
descendant_axis_tests();
 
etest::test("to_string", [] {
auto document = dom::Document{.doctype{"html5"}};
document.html_node = dom::Element{.name{"span"}, .children{{dom::Text{"hello"}}}};
@@ -38,9 +102,6 @@ int main() {
dom::Node dom = dom::Element{"div"};
auto nodes = nodes_by_xpath(dom, "div");
expect(nodes.empty());
 
nodes = nodes_by_xpath(dom, "//div");
expect(nodes.empty());
});
 
// TODO(robinlinden): clang-format doesn't get along well with how I structured
 
layout/layout_test.cpp added: 105, removed: 17, total 88
@@ -1164,7 +1164,7 @@ int main() {
expect_eq(dom::nodes_by_xpath(layout, "/html/div/"), NodeVec{});
expect_eq(dom::nodes_by_xpath(layout, "/html/div/p"), NodeVec{&anon_block.children[1].children[0]});
expect_eq(dom::nodes_by_xpath(layout, "/htm/div"), NodeVec{});
expect_eq(dom::nodes_by_xpath(layout, "//div"), NodeVec{});
expect_eq(dom::nodes_by_xpath(layout, "//div"), NodeVec{&layout.children[0], &anon_block.children[1]});
});
 
return etest::run_all_tests();
 
style/styled_node_test.cpp added: 105, removed: 17, total 88
@@ -238,7 +238,7 @@ int main() {
expect_eq(dom::nodes_by_xpath(styled_node, "/html/div/"), NodeVec{});
expect_eq(dom::nodes_by_xpath(styled_node, "/html/div/p"), NodeVec{});
expect_eq(dom::nodes_by_xpath(styled_node, "/htm/div"), NodeVec{});
expect_eq(dom::nodes_by_xpath(styled_node, "//div"), NodeVec{});
expect_eq(dom::nodes_by_xpath(styled_node, "//div"), NodeVec{&styled_node.children[1]});
});
 
etest::test("get_property, last property gets priority", [] {