srctree

Robin Linden parent 9b10932a ac2f76d8
unicode: Split out pure unicode bits from //idna

These are generated from different specifications and are required in different places, even if idna depends on parts of unicode.
.clang-tidy added: 70, removed: 42, total 28
@@ -95,7 +95,7 @@ Checks: >
 
WarningsAsErrors: "*"
 
HeaderFilterRegex: "\\./(archive|azm|browser|css|css2|dom|engine|etest|geom|gfx|html|html2|idna|img|js|layout|net|os|protocol|render|style|tui|type|uri|url|util|wasm)/"
HeaderFilterRegex: "\\./(archive|azm|browser|css|css2|dom|engine|etest|geom|gfx|html|html2|idna|img|js|layout|net|os|protocol|render|style|tui|type|unicode|uri|url|util|wasm)/"
 
CheckOptions:
# performance-move-const-arg
 
.gitlint added: 70, removed: 42, total 28
@@ -4,4 +4,4 @@ ignore=body-is-missing
# TODO(robinlinden): Better way of documenting and setting this up.
# Each commit must start with the main area it affects.
[title-match-regex]
regex=^(archive|azm|browser|bzl|css|css2|dom|dom2|engine|etest|geom|gfx|html|html2|idna|img|js|layout|net|os|protocol|render|style|tui|type|uri|url|util|wasm|all|build|ci|deps|doc|meta)(/.*|\+.*)?:
regex=^(archive|azm|browser|bzl|css|css2|dom|dom2|engine|etest|geom|gfx|html|html2|idna|img|js|layout|net|os|protocol|render|style|tui|type|unicode|uri|url|util|wasm|all|build|ci|deps|doc|meta)(/.*|\+.*)?:
 
idna/BUILD added: 70, removed: 42, total 28
@@ -15,19 +15,6 @@ genrule(
tools = [":idna_data_processor"],
)
 
py_binary(
name = "unicode_data_processor",
srcs = ["unicode_data_processor.py"],
)
 
genrule(
name = "generate_unicode_data",
srcs = ["@ucd//:UnicodeData.txt"],
outs = ["unicode_data.h"],
cmd = "$(location :unicode_data_processor) $(location @ucd//:UnicodeData.txt) >$@",
tools = [":unicode_data_processor"],
)
 
cc_library(
name = "idna",
srcs = glob(
@@ -36,7 +23,6 @@ cc_library(
),
hdrs = glob(["*.h"]) + [
":generate_idna_data",
":generate_unicode_data",
],
copts = HASTUR_COPTS,
visibility = ["//visibility:public"],
 
filename was Deleted added: 70, removed: 42, total 28
@@ -0,0 +1,42 @@
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
load("@rules_python//python:defs.bzl", "py_binary")
load("//bzl:copts.bzl", "HASTUR_COPTS")
 
py_binary(
name = "unicode_data_processor",
srcs = ["unicode_data_processor.py"],
)
 
genrule(
name = "generate_unicode_data",
srcs = ["@ucd//:UnicodeData.txt"],
outs = ["unicode_data.h"],
cmd = "$(location :unicode_data_processor) $(location @ucd//:UnicodeData.txt) >$@",
tools = [":unicode_data_processor"],
)
 
cc_library(
name = "unicode",
srcs = glob(
include = ["*.cpp"],
exclude = ["*_test.cpp"],
),
hdrs = glob(["*.h"]) + [
":generate_unicode_data",
],
copts = HASTUR_COPTS,
visibility = ["//visibility:public"],
deps = ["//util:unicode"],
)
 
[cc_test(
name = src[:-4],
size = "small",
srcs = [src],
copts = HASTUR_COPTS,
deps = [
":unicode",
"//etest",
"//util:unicode",
],
) for src in glob(["*_test.cpp"])]
 
idna/unicode.cpp added: 70, removed: 42, total 28
@@ -2,9 +2,9 @@
//
// SPDX-License-Identifier: BSD-2-Clause
 
#include "idna/unicode.h"
#include "unicode/unicode.h"
 
#include "idna/unicode_data.h"
#include "unicode/unicode_data.h"
 
#include "util/unicode.h"
 
@@ -16,7 +16,7 @@
#include <string_view>
#include <utility>
 
namespace idna {
namespace unicode {
namespace {
 
void decompose_to(std::ostream &os, char32_t code_point) {
@@ -25,7 +25,7 @@ void decompose_to(std::ostream &os, char32_t code_point) {
// * clang-tidy says this is pointer-ish, but msvc disagrees.
// NOLINTNEXTLINE(misc-include-cleaner,readability-qualified-auto)
auto maybe_decomposition = std::ranges::lower_bound(
unicode::kDecompositions, code_point, {}, &decltype(unicode::kDecompositions)::value_type::code_point);
generated::kDecompositions, code_point, {}, &decltype(generated::kDecompositions)::value_type::code_point);
 
// This code point does not decompose.
if (maybe_decomposition->code_point != code_point) {
@@ -52,4 +52,4 @@ std::string Unicode::decompose(std::string_view input) {
return std::move(ss).str();
}
 
} // namespace idna
} // namespace unicode
 
idna/unicode.h added: 70, removed: 42, total 28
@@ -2,13 +2,13 @@
//
// SPDX-License-Identifier: BSD-2-Clause
 
#ifndef IDNA_UNICODE_H_
#define IDNA_UNICODE_H_
#ifndef UNICODE_UNICODE_H_
#define UNICODE_UNICODE_H_
 
#include <string>
#include <string_view>
 
namespace idna {
namespace unicode {
 
class Unicode {
public:
@@ -16,6 +16,6 @@ public:
static std::string decompose(std::string_view);
};
 
} // namespace idna
} // namespace unicode
 
#endif
 
idna/unicode_data_processor.py added: 70, removed: 42, total 28
@@ -58,14 +58,14 @@ if __name__ == "__main__":
 
// This file is generated. Do not touch it.
 
#ifndef IDNA_UNICODE_DATA_H_
#define IDNA_UNICODE_DATA_H_
#ifndef UNICODE_UNICODE_DATA_H_
#define UNICODE_UNICODE_DATA_H_
// clang-format off
 
#include <array>
#include <string_view>
 
namespace idna::unicode {{
namespace unicode::generated {{
 
struct Decomposition {{
char32_t code_point{{}};
@@ -77,7 +77,7 @@ if __name__ == "__main__":
{",\n ".join(d.to_cxx_class() for d in decompositions)}
}}}};
 
}} // namespace idna::unicode
}} // namespace unicode::generated
 
// clang-format on
#endif
 
idna/unicode_test.cpp added: 70, removed: 42, total 28
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: BSD-2-Clause
 
#include "idna/unicode.h"
#include "unicode/unicode.h"
 
#include "etest/etest2.h"
 
@@ -10,20 +10,20 @@ int main() {
etest::Suite s{};
 
s.add_test("not decomposed", [](etest::IActions &a) {
a.expect_eq(idna::Unicode::decompose("abc123xyz"), "abc123xyz"); //
a.expect_eq(unicode::Unicode::decompose("abc123xyz"), "abc123xyz"); //
});
 
s.add_test("decomposed", [](etest::IActions &a) {
// A + COMBINING RING ABOVE
a.expect_eq(idna::Unicode::decompose("Å"), "A\xcc\x8a");
a.expect_eq(unicode::Unicode::decompose("Å"), "A\xcc\x8a");
 
// s + COMBINING DOT BELOW + COMBINING DOT ABOVE
a.expect_eq(idna::Unicode::decompose("ṩ"), "s\xcc\xa3\xcc\x87");
a.expect_eq(unicode::Unicode::decompose("ṩ"), "s\xcc\xa3\xcc\x87");
});
 
s.add_test("mixed", [](etest::IActions &a) {
// s + COMBINING DOT BELOW + COMBINING DOT ABOVE
a.expect_eq(idna::Unicode::decompose("123ṩ567"),
a.expect_eq(unicode::Unicode::decompose("123ṩ567"),
"123"
"s\xcc\xa3\xcc\x87"
"567");