srctree

Robin Linden parent 3c57e4ba 7f18cc70
js: Implement a basic tokenizer

It currently barely handles anything, but it's a start.

inlinesplit
filename was Deleted added: 138, removed: 2, total 136
@@ -0,0 +1,109 @@
// SPDX-FileCopyrightText: 2023 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
#ifndef JS_TOKENIZER_H_
#define JS_TOKENIZER_H_
 
#include <cassert>
#include <cstddef>
#include <optional>
#include <string>
#include <string_view>
#include <variant>
#include <vector>
 
namespace js::parse {
 
struct Identifier {
std::string name;
bool operator==(Identifier const &) const = default;
};
 
struct LParen {
bool operator==(LParen const &) const = default;
};
 
struct RParen {
bool operator==(RParen const &) const = default;
};
 
struct Semicolon {
bool operator==(Semicolon const &) const = default;
};
 
struct Eof {
bool operator==(Eof const &) const = default;
};
 
using Token = std::variant< //
Identifier,
LParen,
RParen,
Semicolon,
Eof>;
 
class Tokenizer {
public:
explicit Tokenizer(std::string_view input) : input_{input} {}
 
Token tokenize() {
if (pos_ >= input_.size()) {
return Eof{};
}
 
char current = input_[pos_++];
switch (current) {
case '(':
return LParen{};
case ')':
return RParen{};
case ';':
return Semicolon{};
default:
break;
}
 
assert(is_alpha(current));
Identifier id{};
while (true) {
id.name += current;
auto next = peek();
if (!next || !is_alpha(*next)) {
break;
}
current = *next;
pos_ += 1;
}
 
return id;
}
 
private:
std::string_view input_;
std::size_t pos_{};
 
std::optional<char> peek() const {
if ((pos_) < input_.size()) {
return input_[pos_];
}
return std::nullopt;
}
 
static constexpr bool is_alpha(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); }
};
 
inline std::vector<Token> tokenize(std::string_view input) {
std::vector<Token> tokens;
auto t = Tokenizer{input};
 
do {
tokens.push_back(t.tokenize());
} while (!std::holds_alternative<Eof>(tokens.back()));
 
return tokens;
}
 
} // namespace js::parse
 
#endif
 
filename was Deleted added: 138, removed: 2, total 136
@@ -0,0 +1,27 @@
// SPDX-FileCopyrightText: 2023 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
#include "js/tokenizer.h"
 
#include "etest/etest.h"
 
#include <vector>
 
using namespace js::parse;
 
using etest::expect_eq;
 
using Tokens = std::vector<Token>;
 
int main() {
etest::test("identifier", [] {
expect_eq(tokenize("hello"), Tokens{Identifier{"hello"}, Eof{}}); //
});
 
etest::test("function call", [] {
expect_eq(tokenize("func();"), Tokens{Identifier{"func"}, LParen{}, RParen{}, Semicolon{}, Eof{}}); //
});
 
return etest::run_all_tests();
}