srctree

Mikael Larsson parent 126f7627 6954484e
css2: Add initial support for ident token

Support for escaped characters are still missing.

inline split

css2/BUILD added: 121, removed: 12, total 109

@@ -8,7 +8,10 @@ cc_library(

hdrs = glob(["*.h"]),

visibility = ["//visibility:public"],

deps = ["//util:overloaded"],

deps = [

"//util:overloaded",

"//util:string",

)

[cc_test(

css2/tokenizer.cpp added: 121, removed: 12, total 109

@@ -5,10 +5,25 @@

#include "css2/tokenizer.h"

#include "util/string.h"

#include <exception>

namespace css2 {

namespace {

constexpr bool is_ident_start_code_point(char c) {

// TODO(mkiael): Handle non-ascii code point

return util::is_alpha(c) || c == '_';

}

constexpr bool is_ident_code_point(char c) {

return is_ident_start_code_point(c) || util::is_digit(c) || c == '-';

}

} // namespace

void Tokenizer::run() {

while (true) {

switch (state_) {

@@ -18,6 +33,12 @@ void Tokenizer::run() {

return;

}

if (is_ident_start_code_point(*c)) {

temporary_buffer_ = "";

reconsume_in(State::IdentLike);

continue;

}

switch (*c) {

case ' ':

case '\n':

@@ -33,6 +54,9 @@ void Tokenizer::run() {

case '/':

state_ = State::CommentStart;

continue;

case '-':

state_ = State::HyphenMinus;

continue;

default:

emit(DelimToken{*c});

continue;

@@ -86,6 +110,47 @@ void Tokenizer::run() {

}

case State::HyphenMinus: {

auto c = consume_next_input_character();

if (!c) {

emit(DelimToken{'-'});

return;

}

if (is_ident_start_code_point(*c) || *c == '-') {

temporary_buffer_ = '-';

temporary_buffer_ += *c;

state_ = State::IdentLike;

continue;

}

// TODO(mkiael): Handle numeric token

// TODO(mkiael): Handle escaped code point in ident sequence

std::terminate();

}

case State::IdentLike: {

auto c = consume_next_input_character();

if (!c) {

emit(IdentToken{temporary_buffer_});

return;

}

if (is_ident_code_point(*c)) {

temporary_buffer_ += *c;

continue;

} else if (*c == '\\') {

// TODO(mkiael): Handle escaped code point

std::terminate();

}

// TODO(mkiael): Handle url and function token

emit(IdentToken{temporary_buffer_});

reconsume_in(State::Main);

continue;

}

case State::String: {

auto c = consume_next_input_character();

if (!c) {

css2/tokenizer.h added: 121, removed: 12, total 109

@@ -21,6 +21,8 @@ enum class State {

CommentStart,

Comment,

CommentEnd,

HyphenMinus,

IdentLike,

String,

Whitespace,

};

@@ -46,6 +48,8 @@ private:

char string_ending_{};

std::string temporary_buffer_{};

std::function<void(Token &&)> on_emit_;

std::function<void(ParseError)> on_error_;

css2/tokenizer_test.cpp added: 121, removed: 12, total 109

@@ -59,9 +59,9 @@ void expect_error(

int main() {

etest::test("delimiter", [] {

auto output = run_tokenizer("a");

auto output = run_tokenizer("?");

expect_token(output, DelimToken{'a'});

expect_token(output, DelimToken{'?'});

});

etest::test("comment", [] {

@@ -80,7 +80,7 @@ int main() {

auto output = run_tokenizer("/a");

expect_token(output, DelimToken{'/'});

expect_token(output, DelimToken{'a'});

expect_token(output, IdentToken{"a"});

});

etest::test("delimiter after comment", [] {

@@ -111,28 +111,28 @@ int main() {

etest::test("end with one tab", [] {

auto output = run_tokenizer("a\t");

expect_token(output, DelimToken{'a'});

expect_token(output, IdentToken{"a"});

expect_token(output, WhitespaceToken{});

});

etest::test("end with two tabs", [] {

auto output = run_tokenizer("a\t\t");

expect_token(output, DelimToken{'a'});

expect_token(output, IdentToken{"a"});

expect_token(output, WhitespaceToken{});

});

etest::test("end with one line feed", [] {

auto output = run_tokenizer("a\n");

expect_token(output, DelimToken{'a'});

expect_token(output, IdentToken{"a"});

expect_token(output, WhitespaceToken{});

});

etest::test("end with two line feeds", [] {

auto output = run_tokenizer("a\n\n");

expect_token(output, DelimToken{'a'});

expect_token(output, IdentToken{"a"});

expect_token(output, WhitespaceToken{});

});

@@ -163,5 +163,42 @@ int main() {

expect_token(output, WhitespaceToken{});

});

etest::test("ident token", [] {

auto output = run_tokenizer("foo");

expect_token(output, IdentToken{"foo"});

});

etest::test("ident token with digit", [] {

auto output = run_tokenizer("f0o");

expect_token(output, IdentToken{"f0o"});

});

etest::test("ident token starting with one dash", [] {

auto output = run_tokenizer("-foo");

expect_token(output, IdentToken{"-foo"});

});

etest::test("ident token starting with two dashes", [] {

auto output = run_tokenizer("--foo");

expect_token(output, IdentToken{"--foo"});

});

etest::test("ident token starting with underscore", [] {

auto output = run_tokenizer("_foo-bar");

expect_token(output, IdentToken{"_foo-bar"});

});

etest::test("whitespace after ident", [] {

auto output = run_tokenizer("abc ");

expect_token(output, IdentToken{"abc"});

expect_token(output, WhitespaceToken{});

});

return etest::run_all_tests();

}