srctree

Robin Linden parent 7c2ec192 07065d57
idna: Generate a uts46 table that gets along better w/ std::lower_bound

inlinesplit
idna/idna_data_processor.py added: 21, removed: 9, total 12
@@ -100,11 +100,16 @@ class IDNA:
if len(cols) <= 1:
continue
 
code_point = int(cols[0].split("..")[0].lstrip("0") or "0", 16)
if ".." in cols[0]:
code_point = int(cols[0].split("..")[1].lstrip("0"), 16)
else:
code_point = int(cols[0].lstrip("0"), 16)
 
status = cols[1]
if status == "disallowed":
assert len(cols) == 2
if len(mappings) > 0 and isinstance(mappings[-1][1], Disallowed):
mappings[-1] = (code_point, mappings[-1][1])
continue
mappings.append((code_point, Disallowed()))
elif status == "disallowed_STD3_valid":
@@ -112,6 +117,7 @@ class IDNA:
if len(mappings) > 0 and isinstance(
mappings[-1][1], DisallowedStd3Valid
):
mappings[-1] = (code_point, mappings[-1][1])
continue
mappings.append((code_point, DisallowedStd3Valid()))
elif status == "disallowed_STD3_mapped":
@@ -119,16 +125,19 @@ class IDNA:
if len(mappings) > 0 and mappings[-1][
1
] == DisallowedStd3Mapped.from_string(cols[2]):
mappings[-1] = (code_point, mappings[-1][1])
continue
mappings.append((code_point, DisallowedStd3Mapped.from_string(cols[2])))
elif status == "ignored":
assert len(cols) == 2
if len(mappings) > 0 and isinstance(mappings[-1][1], Ignored):
mappings[-1] = (code_point, mappings[-1][1])
continue
mappings.append((code_point, Ignored()))
elif status == "mapped":
assert len(cols) == 3
if len(mappings) > 0 and mappings[-1][1] == Mapped.from_string(cols[2]):
mappings[-1] = (code_point, mappings[-1][1])
continue
mappings.append((code_point, Mapped.from_string(cols[2])))
elif status == "deviation":
@@ -136,18 +145,22 @@ class IDNA:
if len(mappings) > 0 and mappings[-1][1] == Deviation.from_string(
cols[2]
):
mappings[-1] = (code_point, mappings[-1][1])
continue
mappings.append((code_point, Deviation.from_string(cols[2])))
elif status == "valid" and len(cols) == 2:
if len(mappings) > 0 and isinstance(mappings[-1][1], Valid):
mappings[-1] = (code_point, mappings[-1][1])
continue
mappings.append((code_point, Valid()))
elif status == "valid" and len(cols) == 4 and cols[3] == "NV8":
if len(mappings) > 0 and isinstance(mappings[-1][1], ValidNv8):
mappings[-1] = (code_point, mappings[-1][1])
continue
mappings.append((code_point, ValidNv8()))
elif status == "valid" and len(cols) == 4 and cols[3] == "XV8":
if len(mappings) > 0 and isinstance(mappings[-1][1], ValidXv8):
mappings[-1] = (code_point, mappings[-1][1])
continue
mappings.append((code_point, ValidXv8()))
else:
 
idna/uts46.cpp added: 21, removed: 9, total 12
@@ -31,11 +31,6 @@ std::optional<std::string> Uts46::map(std::string_view input) {
auto mapping = std::ranges::lower_bound(
uts46::kMappings, code_point, {}, &decltype(uts46::kMappings)::value_type::first);
 
// TODO(robinlinden): Generate better mapping table.
if (mapping->first != code_point) {
mapping -= 1;
}
 
auto const &entry = mapping->second;
if (std::holds_alternative<uts46::Ignored>(entry)) {
continue;
 
idna/uts46_test.cpp added: 21, removed: 9, total 12
@@ -16,7 +16,11 @@ int main() {
etest::Suite s{};
 
s.add_test("disallowed", [](etest::IActions &a) {
// The first unicode value
a.expect_eq(idna::Uts46::map("\0"sv), std::nullopt);
// and the last one, U+10FFFF, but in UTF-8.
a.expect_eq(idna::Uts46::map("\xf4\x8f\xbf\xbf"), std::nullopt);
 
a.expect_eq(idna::Uts46::map(","), std::nullopt);
a.expect_eq(idna::Uts46::map("\xc2\xa0"), std::nullopt);
a.expect_eq(idna::Uts46::map("a⒈com"), std::nullopt);