srctree

Robin Linden parent b46cd107 150729c7
idna: Fix uts46 mappings to code points >0xFFFF being broken

The 1234 escapes only work with exactly 4 characters.

inlinesplit
idna/idna_data_processor.py added: 12, removed: 5, total 7
@@ -171,21 +171,23 @@ class IDNA:
return idna
 
 
# The version of MSVC we target doesn't yet support \u{12abc}, so we always
# write the full 8-character escapes.
def to_cxx_variant(a: Mapping) -> str:
if isinstance(a, Disallowed):
return "Disallowed{}"
elif isinstance(a, DisallowedStd3Valid):
return "DisallowedStd3Valid{}"
elif isinstance(a, DisallowedStd3Mapped):
mapping = "".join(f"\\u{c:04X}" for c in a.maps_to)
mapping = "".join(f"\\U{c:08X}" for c in a.maps_to)
return f'DisallowedStd3Mapped{{"{mapping}"}}'
elif isinstance(a, Ignored):
return "Ignored{}"
elif isinstance(a, Mapped):
mapping = "".join(f"\\u{c:04X}" for c in a.maps_to)
mapping = "".join(f"\\U{c:08X}" for c in a.maps_to)
return f'Mapped{{"{mapping}"}}'
elif isinstance(a, Deviation):
mapping = "".join(f"\\u{c:04X}" for c in a.maps_to)
mapping = "".join(f"\\U{c:08X}" for c in a.maps_to)
return f'Deviation{{"{mapping}"}}'
elif isinstance(a, Valid):
return "Valid{}"
 
idna/uts46_test.cpp added: 12, removed: 5, total 7
@@ -30,6 +30,11 @@ int main() {
a.expect_eq(idna::Uts46::map("ABCXYZ"), "abcxyz");
a.expect_eq(idna::Uts46::map("日本語。JP"), "日本語.jp");
a.expect_eq(idna::Uts46::map("☕.us"), "☕.us");
 
// Code point that maps to a character requiring 5 characters to
// represent, \u{20A2C}.
// https://www.compart.com/en/unicode/U+2F834
a.expect_eq(idna::Uts46::map("\xf0\xaf\xa0\xb4").value(), "\xf0\xa0\xa8\xac");
});
 
s.add_test("deviation", [](etest::IActions &a) {