srctree

Robin Linden parent 9b10932a ac2f76d8
unicode: Split out pure unicode bits from //idna

These are generated from different specifications and are required indifferent places, even if idna depends on parts of unicode.

.clang-tidy added: 70, removed: 42, total 28

@@ -95,7 +95,7 @@ Checks: >

WarningsAsErrors: "*"

HeaderFilterRegex: "\\./(archive|azm|browser|css|css2|dom|engine|etest|geom|gfx|html|html2|idna|img|js|layout|net|os|protocol|render|style|tui|type|uri|url|util|wasm)/"

HeaderFilterRegex: "\\./(archive|azm|browser|css|css2|dom|engine|etest|geom|gfx|html|html2|idna|img|js|layout|net|os|protocol|render|style|tui|type|unicode|uri|url|util|wasm)/"

CheckOptions:

# performance-move-const-arg

.gitlint added: 70, removed: 42, total 28

@@ -4,4 +4,4 @@ ignore=body-is-missing

# TODO(robinlinden): Better way of documenting and setting this up.

# Each commit must start with the main area it affects.

[title-match-regex]

regex=^(archive|azm|browser|bzl|css|css2|dom|dom2|engine|etest|geom|gfx|html|html2|idna|img|js|layout|net|os|protocol|render|style|tui|type|uri|url|util|wasm|all|build|ci|deps|doc|meta)(/.*|\+.*)?:

regex=^(archive|azm|browser|bzl|css|css2|dom|dom2|engine|etest|geom|gfx|html|html2|idna|img|js|layout|net|os|protocol|render|style|tui|type|unicode|uri|url|util|wasm|all|build|ci|deps|doc|meta)(/.*|\+.*)?:

idna/BUILD added: 70, removed: 42, total 28

@@ -15,19 +15,6 @@ genrule(

tools = [":idna_data_processor"],

)

py_binary(

name = "unicode_data_processor",

srcs = ["unicode_data_processor.py"],

)

genrule(

name = "generate_unicode_data",

srcs = ["@ucd//:UnicodeData.txt"],

outs = ["unicode_data.h"],

cmd = "$(location :unicode_data_processor) $(location @ucd//:UnicodeData.txt) >$@",

tools = [":unicode_data_processor"],

)

cc_library(

name = "idna",

srcs = glob(

@@ -36,7 +23,6 @@ cc_library(

),

hdrs = glob(["*.h"]) + [

":generate_idna_data",

":generate_unicode_data",

],

copts = HASTUR_COPTS,

visibility = ["//visibility:public"],

filename was Deleted added: 70, removed: 42, total 28

@@ -0,0 +1,42 @@

load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")

load("@rules_python//python:defs.bzl", "py_binary")

load("//bzl:copts.bzl", "HASTUR_COPTS")

py_binary(

name = "unicode_data_processor",

srcs = ["unicode_data_processor.py"],

)

genrule(

name = "generate_unicode_data",

srcs = ["@ucd//:UnicodeData.txt"],

outs = ["unicode_data.h"],

cmd = "$(location :unicode_data_processor) $(location @ucd//:UnicodeData.txt) >$@",

tools = [":unicode_data_processor"],

)

cc_library(

name = "unicode",

srcs = glob(

include = ["*.cpp"],

exclude = ["*_test.cpp"],

),

hdrs = glob(["*.h"]) + [

":generate_unicode_data",

],

copts = HASTUR_COPTS,

visibility = ["//visibility:public"],

deps = ["//util:unicode"],

)

[cc_test(

name = src[:-4],

size = "small",

srcs = [src],

copts = HASTUR_COPTS,

deps = [

":unicode",

"//etest",

"//util:unicode",

],

) for src in glob(["*_test.cpp"])]

idna/unicode.cpp added: 70, removed: 42, total 28

@@ -2,9 +2,9 @@

//

// SPDX-License-Identifier: BSD-2-Clause

#include "idna/unicode.h"

#include "unicode/unicode.h"

#include "idna/unicode_data.h"

#include "unicode/unicode_data.h"

#include "util/unicode.h"

@@ -16,7 +16,7 @@

#include <string_view>

#include <utility>

namespace idna {

namespace unicode {

namespace {

void decompose_to(std::ostream &os, char32_t code_point) {

@@ -25,7 +25,7 @@ void decompose_to(std::ostream &os, char32_t code_point) {

// * clang-tidy says this is pointer-ish, but msvc disagrees.

// NOLINTNEXTLINE(misc-include-cleaner,readability-qualified-auto)

auto maybe_decomposition = std::ranges::lower_bound(

unicode::kDecompositions, code_point, {}, &decltype(unicode::kDecompositions)::value_type::code_point);

generated::kDecompositions, code_point, {}, &decltype(generated::kDecompositions)::value_type::code_point);

// This code point does not decompose.

if (maybe_decomposition->code_point != code_point) {

@@ -52,4 +52,4 @@ std::string Unicode::decompose(std::string_view input) {

return std::move(ss).str();

}

} // namespace idna

} // namespace unicode

idna/unicode.h added: 70, removed: 42, total 28

@@ -2,13 +2,13 @@

//

// SPDX-License-Identifier: BSD-2-Clause

#ifndef IDNA_UNICODE_H_

#define IDNA_UNICODE_H_

#ifndef UNICODE_UNICODE_H_

#define UNICODE_UNICODE_H_

#include <string>

#include <string_view>

namespace idna {

namespace unicode {

class Unicode {

public:

@@ -16,6 +16,6 @@ public:

static std::string decompose(std::string_view);

};

} // namespace idna

} // namespace unicode

#endif

idna/unicode_data_processor.py added: 70, removed: 42, total 28

@@ -58,14 +58,14 @@ if __name__ == "__main__":

// This file is generated. Do not touch it.

#ifndef IDNA_UNICODE_DATA_H_

#define IDNA_UNICODE_DATA_H_

#ifndef UNICODE_UNICODE_DATA_H_

#define UNICODE_UNICODE_DATA_H_

// clang-format off

#include <array>

#include <string_view>

namespace idna::unicode {{

namespace unicode::generated {{

struct Decomposition {{

char32_t code_point{{}};

@@ -77,7 +77,7 @@ if __name__ == "__main__":

{",\n ".join(d.to_cxx_class() for d in decompositions)}

}}}};

}} // namespace idna::unicode

}} // namespace unicode::generated

// clang-format on

#endif

idna/unicode_test.cpp added: 70, removed: 42, total 28

@@ -2,7 +2,7 @@

//

// SPDX-License-Identifier: BSD-2-Clause

#include "idna/unicode.h"

#include "unicode/unicode.h"

#include "etest/etest2.h"

@@ -10,20 +10,20 @@ int main() {

etest::Suite s{};

s.add_test("not decomposed", [](etest::IActions &a) {

a.expect_eq(idna::Unicode::decompose("abc123xyz"), "abc123xyz"); //

a.expect_eq(unicode::Unicode::decompose("abc123xyz"), "abc123xyz"); //

});

s.add_test("decomposed", [](etest::IActions &a) {

// A + COMBINING RING ABOVE

a.expect_eq(idna::Unicode::decompose("Å"), "A\xcc\x8a");

a.expect_eq(unicode::Unicode::decompose("Å"), "A\xcc\x8a");

// s + COMBINING DOT BELOW + COMBINING DOT ABOVE

a.expect_eq(idna::Unicode::decompose("ṩ"), "s\xcc\xa3\xcc\x87");

a.expect_eq(unicode::Unicode::decompose("ṩ"), "s\xcc\xa3\xcc\x87");

});

s.add_test("mixed", [](etest::IActions &a) {

// s + COMBINING DOT BELOW + COMBINING DOT ABOVE

a.expect_eq(idna::Unicode::decompose("123ṩ567"),

a.expect_eq(unicode::Unicode::decompose("123ṩ567"),

"123"

"s\xcc\xa3\xcc\x87"

"567");