srctree

Mikael Larsson parent 126f7627 6954484e
css2: Add initial support for ident token

Support for escaped characters are still missing.

inlinesplit
css2/BUILD added: 121, removed: 12, total 109
@@ -8,7 +8,10 @@ cc_library(
),
hdrs = glob(["*.h"]),
visibility = ["//visibility:public"],
deps = ["//util:overloaded"],
deps = [
"//util:overloaded",
"//util:string",
],
)
 
[cc_test(
 
css2/tokenizer.cpp added: 121, removed: 12, total 109
@@ -5,10 +5,25 @@
 
#include "css2/tokenizer.h"
 
#include "util/string.h"
 
#include <exception>
 
namespace css2 {
 
namespace {
 
constexpr bool is_ident_start_code_point(char c) {
// TODO(mkiael): Handle non-ascii code point
return util::is_alpha(c) || c == '_';
}
 
constexpr bool is_ident_code_point(char c) {
return is_ident_start_code_point(c) || util::is_digit(c) || c == '-';
}
 
} // namespace
 
void Tokenizer::run() {
while (true) {
switch (state_) {
@@ -18,6 +33,12 @@ void Tokenizer::run() {
return;
}
 
if (is_ident_start_code_point(*c)) {
temporary_buffer_ = "";
reconsume_in(State::IdentLike);
continue;
}
 
switch (*c) {
case ' ':
case '\n':
@@ -33,6 +54,9 @@ void Tokenizer::run() {
case '/':
state_ = State::CommentStart;
continue;
case '-':
state_ = State::HyphenMinus;
continue;
default:
emit(DelimToken{*c});
continue;
@@ -86,6 +110,47 @@ void Tokenizer::run() {
}
}
 
case State::HyphenMinus: {
auto c = consume_next_input_character();
if (!c) {
emit(DelimToken{'-'});
return;
}
 
if (is_ident_start_code_point(*c) || *c == '-') {
temporary_buffer_ = '-';
temporary_buffer_ += *c;
state_ = State::IdentLike;
continue;
}
 
// TODO(mkiael): Handle numeric token
// TODO(mkiael): Handle escaped code point in ident sequence
std::terminate();
}
 
case State::IdentLike: {
auto c = consume_next_input_character();
if (!c) {
emit(IdentToken{temporary_buffer_});
return;
}
 
if (is_ident_code_point(*c)) {
temporary_buffer_ += *c;
continue;
} else if (*c == '\\') {
// TODO(mkiael): Handle escaped code point
std::terminate();
}
 
// TODO(mkiael): Handle url and function token
 
emit(IdentToken{temporary_buffer_});
reconsume_in(State::Main);
continue;
}
 
case State::String: {
auto c = consume_next_input_character();
if (!c) {
 
css2/tokenizer.h added: 121, removed: 12, total 109
@@ -21,6 +21,8 @@ enum class State {
CommentStart,
Comment,
CommentEnd,
HyphenMinus,
IdentLike,
String,
Whitespace,
};
@@ -46,6 +48,8 @@ private:
 
char string_ending_{};
 
std::string temporary_buffer_{};
 
std::function<void(Token &&)> on_emit_;
std::function<void(ParseError)> on_error_;
 
 
css2/tokenizer_test.cpp added: 121, removed: 12, total 109
@@ -59,9 +59,9 @@ void expect_error(
 
int main() {
etest::test("delimiter", [] {
auto output = run_tokenizer("a");
auto output = run_tokenizer("?");
 
expect_token(output, DelimToken{'a'});
expect_token(output, DelimToken{'?'});
});
 
etest::test("comment", [] {
@@ -80,7 +80,7 @@ int main() {
auto output = run_tokenizer("/a");
 
expect_token(output, DelimToken{'/'});
expect_token(output, DelimToken{'a'});
expect_token(output, IdentToken{"a"});
});
 
etest::test("delimiter after comment", [] {
@@ -111,28 +111,28 @@ int main() {
etest::test("end with one tab", [] {
auto output = run_tokenizer("a\t");
 
expect_token(output, DelimToken{'a'});
expect_token(output, IdentToken{"a"});
expect_token(output, WhitespaceToken{});
});
 
etest::test("end with two tabs", [] {
auto output = run_tokenizer("a\t\t");
 
expect_token(output, DelimToken{'a'});
expect_token(output, IdentToken{"a"});
expect_token(output, WhitespaceToken{});
});
 
etest::test("end with one line feed", [] {
auto output = run_tokenizer("a\n");
 
expect_token(output, DelimToken{'a'});
expect_token(output, IdentToken{"a"});
expect_token(output, WhitespaceToken{});
});
 
etest::test("end with two line feeds", [] {
auto output = run_tokenizer("a\n\n");
 
expect_token(output, DelimToken{'a'});
expect_token(output, IdentToken{"a"});
expect_token(output, WhitespaceToken{});
});
 
@@ -163,5 +163,42 @@ int main() {
expect_token(output, WhitespaceToken{});
});
 
etest::test("ident token", [] {
auto output = run_tokenizer("foo");
 
expect_token(output, IdentToken{"foo"});
});
 
etest::test("ident token with digit", [] {
auto output = run_tokenizer("f0o");
 
expect_token(output, IdentToken{"f0o"});
});
 
etest::test("ident token starting with one dash", [] {
auto output = run_tokenizer("-foo");
 
expect_token(output, IdentToken{"-foo"});
});
 
etest::test("ident token starting with two dashes", [] {
auto output = run_tokenizer("--foo");
 
expect_token(output, IdentToken{"--foo"});
});
 
etest::test("ident token starting with underscore", [] {
auto output = run_tokenizer("_foo-bar");
 
expect_token(output, IdentToken{"_foo-bar"});
});
 
etest::test("whitespace after ident", [] {
auto output = run_tokenizer("abc ");
 
expect_token(output, IdentToken{"abc"});
expect_token(output, WhitespaceToken{});
});
 
return etest::run_all_tests();
}