srctree

Robin Linden parent ca1c1895 b56b8fbf
Parse and display part of the HTML

Done using lots of hacks. pugixml isn't meant for parsing HTML, but Ichose a library and stuck with it. I'll replace all of this anyway.

Added a .bazelrc to target C++20 as I wanted to use the if-initconstruction.

inlinesplit
filename was Deleted added: 70, removed: 7, total 63
@@ -0,0 +1 @@
build --cxxopt='-std=c++2a'
 
BUILD added: 70, removed: 7, total 63
@@ -7,5 +7,8 @@ cc_binary(
"@bazel_tools//platforms:windows": [],
"@bazel_tools//platforms:linux": ["-lpthread"],
}),
deps = ["@asio"],
deps = [
"@asio",
"@pugixml",
],
)
 
WORKSPACE added: 70, removed: 7, total 63
@@ -7,3 +7,11 @@ http_archive(
strip_prefix = "asio-1.18.1",
urls = ["https://downloads.sourceforge.net/project/asio/asio/1.18.1%20(Stable)/asio-1.18.1.tar.bz2"],
)
 
http_archive(
name = "pugixml",
build_file = "//bazel:pugixml.BUILD",
sha256 = "8ddf57b65fb860416979a3f0640c2ad45ddddbbafa82508ef0a0af3ce7061716",
strip_prefix = "pugixml-1.11.4",
urls = ["https://github.com/zeux/pugixml/releases/download/v1.11.4/pugixml-1.11.4.tar.gz"],
)
 
filename was Deleted added: 70, removed: 7, total 63
@@ -0,0 +1,9 @@
load("@rules_cc//cc:defs.bzl", "cc_library")
 
cc_library(
name = "pugixml",
srcs = glob(["src/*.cpp"]),
hdrs = glob(["src/*.hpp"]),
strip_include_prefix = "src",
visibility = ["//visibility:public"],
)
 
main.cpp added: 70, removed: 7, total 63
@@ -1,6 +1,32 @@
#include <asio.hpp>
#include <pugixml.hpp>
 
#include <cassert>
#include <iostream>
#include <sstream>
#include <string>
 
using namespace std::string_literals;
 
std::string drop_http_headers(std::string html) {
const auto delim = "\r\n\r\n"s;
auto it = html.find(delim);
html.erase(0, it + delim.size());
return html;
}
 
std::string drop_head(std::string html) {
const auto tag_start = "<head>"s;
const auto tag_end = "</head>"s;
auto head = html.find(tag_start);
html.erase(head, html.find(tag_end) - head + tag_end.size());
return html;
}
 
std::string drop_doctype(std::string html) {
html.erase(0, "<!doctype html>"s.size());
return html;
}
 
int main(int argc, char **argv) {
asio::ip::tcp::iostream stream("www.example.com", "http");
@@ -9,5 +35,21 @@ int main(int argc, char **argv) {
stream << "Accept: text/html\r\n";
stream << "Connection: close\r\n\r\n";
stream.flush();
std::cout << stream.rdbuf();
 
std::stringstream ss;
ss << stream.rdbuf();
auto buffer = ss.str();
 
buffer = drop_http_headers(buffer);
buffer = drop_head(buffer);
buffer = drop_doctype(buffer);
 
pugi::xml_document doc;
if (auto res = doc.load_string(buffer.c_str()); !res) {
std::cerr << res.offset << ": " << res.description() << '\n';
std::cerr << buffer.c_str() + res.offset;
return 1;
}
 
doc.print(std::cout);
}