srctree

Robin Linden parent 36c25d38 6c686ccd
html2: Add support for tokenizing HTML entities

Only entities resulting in ASCII characters are supported right now.
html/parser_test.cpp added: 2522, removed: 16, total 2506
@@ -141,5 +141,21 @@ int main() {
expect(text.text == "fantastic, the future is now"s);
});
 
etest::test("character reference in attribute", [] {
auto html = html::parse("<html test='&lt;3'></html>"sv).html();
expect(html.children.size() == 0);
expect(html.name == "html"s);
expect(html.attributes.size() == 1);
expect(html.attributes.at("test") == "<3");
});
 
etest::test("character reference in attribute, no semicolon", [] {
auto html = html::parse("<html test='&lt3'></html>"sv).html();
expect(html.children.size() == 0);
expect(html.name == "html"s);
expect(html.attributes.size() == 1);
expect(html.attributes.at("test") == "&lt3");
});
 
return etest::run_all_tests();
}
 
html2/BUILD added: 2522, removed: 16, total 2506
@@ -15,13 +15,20 @@ cc_library(
],
)
 
cc_test(
name = "tokenizer_test",
data_files = {
"tokenizer": ["test/simple_page.html"],
}
 
[cc_test(
name = src[:-4],
size = "small",
srcs = ["tokenizer_test.cpp"],
data = ["test/simple_page.html"],
srcs = [src],
data = data_files.get(
src[:-9],
[],
),
deps = [
":html2",
"//etest",
],
)
) for src in glob(["*_test.cpp"])]
 
filename was Deleted added: 2522, removed: 16, total 2506
@@ -0,0 +1,2279 @@
// SPDX-FileCopyrightText: 2022 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
#include "html2/character_reference.h"
 
#include <array>
#include <string_view>
 
using namespace std::literals;
 
namespace html2 {
namespace {
 
// https://html.spec.whatwg.org/multipage/named-characters.html
//
// Generated from https://html.spec.whatwg.org/entities.json using
// ```python
// #!/usr/bin/env python3
//
// import json
//
// with open('entities.json') as f:
// entities = json.loads(f.read())
//
// for key in entities.keys():
// codepoints = entities[key]['codepoints']
// print('{', end='')
// print(f'"{key}"sv', *codepoints, sep=', ', end='')
// print('},')
// ```
static constexpr std::array references = std::to_array<CharacterReference>({{"&AElig"sv, 198},
{"&AElig;"sv, 198},
{"&AMP"sv, 38},
{"&AMP;"sv, 38},
{"&Aacute"sv, 193},
{"&Aacute;"sv, 193},
{"&Abreve;"sv, 258},
{"&Acirc"sv, 194},
{"&Acirc;"sv, 194},
{"&Acy;"sv, 1040},
{"&Afr;"sv, 120068},
{"&Agrave"sv, 192},
{"&Agrave;"sv, 192},
{"&Alpha;"sv, 913},
{"&Amacr;"sv, 256},
{"&And;"sv, 10835},
{"&Aogon;"sv, 260},
{"&Aopf;"sv, 120120},
{"&ApplyFunction;"sv, 8289},
{"&Aring"sv, 197},
{"&Aring;"sv, 197},
{"&Ascr;"sv, 119964},
{"&Assign;"sv, 8788},
{"&Atilde"sv, 195},
{"&Atilde;"sv, 195},
{"&Auml"sv, 196},
{"&Auml;"sv, 196},
{"&Backslash;"sv, 8726},
{"&Barv;"sv, 10983},
{"&Barwed;"sv, 8966},
{"&Bcy;"sv, 1041},
{"&Because;"sv, 8757},
{"&Bernoullis;"sv, 8492},
{"&Beta;"sv, 914},
{"&Bfr;"sv, 120069},
{"&Bopf;"sv, 120121},
{"&Breve;"sv, 728},
{"&Bscr;"sv, 8492},
{"&Bumpeq;"sv, 8782},
{"&CHcy;"sv, 1063},
{"&COPY"sv, 169},
{"&COPY;"sv, 169},
{"&Cacute;"sv, 262},
{"&Cap;"sv, 8914},
{"&CapitalDifferentialD;"sv, 8517},
{"&Cayleys;"sv, 8493},
{"&Ccaron;"sv, 268},
{"&Ccedil"sv, 199},
{"&Ccedil;"sv, 199},
{"&Ccirc;"sv, 264},
{"&Cconint;"sv, 8752},
{"&Cdot;"sv, 266},
{"&Cedilla;"sv, 184},
{"&CenterDot;"sv, 183},
{"&Cfr;"sv, 8493},
{"&Chi;"sv, 935},
{"&CircleDot;"sv, 8857},
{"&CircleMinus;"sv, 8854},
{"&CirclePlus;"sv, 8853},
{"&CircleTimes;"sv, 8855},
{"&ClockwiseContourIntegral;"sv, 8754},
{"&CloseCurlyDoubleQuote;"sv, 8221},
{"&CloseCurlyQuote;"sv, 8217},
{"&Colon;"sv, 8759},
{"&Colone;"sv, 10868},
{"&Congruent;"sv, 8801},
{"&Conint;"sv, 8751},
{"&ContourIntegral;"sv, 8750},
{"&Copf;"sv, 8450},
{"&Coproduct;"sv, 8720},
{"&CounterClockwiseContourIntegral;"sv, 8755},
{"&Cross;"sv, 10799},
{"&Cscr;"sv, 119966},
{"&Cup;"sv, 8915},
{"&CupCap;"sv, 8781},
{"&DD;"sv, 8517},
{"&DDotrahd;"sv, 10513},
{"&DJcy;"sv, 1026},
{"&DScy;"sv, 1029},
{"&DZcy;"sv, 1039},
{"&Dagger;"sv, 8225},
{"&Darr;"sv, 8609},
{"&Dashv;"sv, 10980},
{"&Dcaron;"sv, 270},
{"&Dcy;"sv, 1044},
{"&Del;"sv, 8711},
{"&Delta;"sv, 916},
{"&Dfr;"sv, 120071},
{"&DiacriticalAcute;"sv, 180},
{"&DiacriticalDot;"sv, 729},
{"&DiacriticalDoubleAcute;"sv, 733},
{"&DiacriticalGrave;"sv, 96},
{"&DiacriticalTilde;"sv, 732},
{"&Diamond;"sv, 8900},
{"&DifferentialD;"sv, 8518},
{"&Dopf;"sv, 120123},
{"&Dot;"sv, 168},
{"&DotDot;"sv, 8412},
{"&DotEqual;"sv, 8784},
{"&DoubleContourIntegral;"sv, 8751},
{"&DoubleDot;"sv, 168},
{"&DoubleDownArrow;"sv, 8659},
{"&DoubleLeftArrow;"sv, 8656},
{"&DoubleLeftRightArrow;"sv, 8660},
{"&DoubleLeftTee;"sv, 10980},
{"&DoubleLongLeftArrow;"sv, 10232},
{"&DoubleLongLeftRightArrow;"sv, 10234},
{"&DoubleLongRightArrow;"sv, 10233},
{"&DoubleRightArrow;"sv, 8658},
{"&DoubleRightTee;"sv, 8872},
{"&DoubleUpArrow;"sv, 8657},
{"&DoubleUpDownArrow;"sv, 8661},
{"&DoubleVerticalBar;"sv, 8741},
{"&DownArrow;"sv, 8595},
{"&DownArrowBar;"sv, 10515},
{"&DownArrowUpArrow;"sv, 8693},
{"&DownBreve;"sv, 785},
{"&DownLeftRightVector;"sv, 10576},
{"&DownLeftTeeVector;"sv, 10590},
{"&DownLeftVector;"sv, 8637},
{"&DownLeftVectorBar;"sv, 10582},
{"&DownRightTeeVector;"sv, 10591},
{"&DownRightVector;"sv, 8641},
{"&DownRightVectorBar;"sv, 10583},
{"&DownTee;"sv, 8868},
{"&DownTeeArrow;"sv, 8615},
{"&Downarrow;"sv, 8659},
{"&Dscr;"sv, 119967},
{"&Dstrok;"sv, 272},
{"&ENG;"sv, 330},
{"&ETH"sv, 208},
{"&ETH;"sv, 208},
{"&Eacute"sv, 201},
{"&Eacute;"sv, 201},
{"&Ecaron;"sv, 282},
{"&Ecirc"sv, 202},
{"&Ecirc;"sv, 202},
{"&Ecy;"sv, 1069},
{"&Edot;"sv, 278},
{"&Efr;"sv, 120072},
{"&Egrave"sv, 200},
{"&Egrave;"sv, 200},
{"&Element;"sv, 8712},
{"&Emacr;"sv, 274},
{"&EmptySmallSquare;"sv, 9723},
{"&EmptyVerySmallSquare;"sv, 9643},
{"&Eogon;"sv, 280},
{"&Eopf;"sv, 120124},
{"&Epsilon;"sv, 917},
{"&Equal;"sv, 10869},
{"&EqualTilde;"sv, 8770},
{"&Equilibrium;"sv, 8652},
{"&Escr;"sv, 8496},
{"&Esim;"sv, 10867},
{"&Eta;"sv, 919},
{"&Euml"sv, 203},
{"&Euml;"sv, 203},
{"&Exists;"sv, 8707},
{"&ExponentialE;"sv, 8519},
{"&Fcy;"sv, 1060},
{"&Ffr;"sv, 120073},
{"&FilledSmallSquare;"sv, 9724},
{"&FilledVerySmallSquare;"sv, 9642},
{"&Fopf;"sv, 120125},
{"&ForAll;"sv, 8704},
{"&Fouriertrf;"sv, 8497},
{"&Fscr;"sv, 8497},
{"&GJcy;"sv, 1027},
{"&GT"sv, 62},
{"&GT;"sv, 62},
{"&Gamma;"sv, 915},
{"&Gammad;"sv, 988},
{"&Gbreve;"sv, 286},
{"&Gcedil;"sv, 290},
{"&Gcirc;"sv, 284},
{"&Gcy;"sv, 1043},
{"&Gdot;"sv, 288},
{"&Gfr;"sv, 120074},
{"&Gg;"sv, 8921},
{"&Gopf;"sv, 120126},
{"&GreaterEqual;"sv, 8805},
{"&GreaterEqualLess;"sv, 8923},
{"&GreaterFullEqual;"sv, 8807},
{"&GreaterGreater;"sv, 10914},
{"&GreaterLess;"sv, 8823},
{"&GreaterSlantEqual;"sv, 10878},
{"&GreaterTilde;"sv, 8819},
{"&Gscr;"sv, 119970},
{"&Gt;"sv, 8811},
{"&HARDcy;"sv, 1066},
{"&Hacek;"sv, 711},
{"&Hat;"sv, 94},
{"&Hcirc;"sv, 292},
{"&Hfr;"sv, 8460},
{"&HilbertSpace;"sv, 8459},
{"&Hopf;"sv, 8461},
{"&HorizontalLine;"sv, 9472},
{"&Hscr;"sv, 8459},
{"&Hstrok;"sv, 294},
{"&HumpDownHump;"sv, 8782},
{"&HumpEqual;"sv, 8783},
{"&IEcy;"sv, 1045},
{"&IJlig;"sv, 306},
{"&IOcy;"sv, 1025},
{"&Iacute"sv, 205},
{"&Iacute;"sv, 205},
{"&Icirc"sv, 206},
{"&Icirc;"sv, 206},
{"&Icy;"sv, 1048},
{"&Idot;"sv, 304},
{"&Ifr;"sv, 8465},
{"&Igrave"sv, 204},
{"&Igrave;"sv, 204},
{"&Im;"sv, 8465},
{"&Imacr;"sv, 298},
{"&ImaginaryI;"sv, 8520},
{"&Implies;"sv, 8658},
{"&Int;"sv, 8748},
{"&Integral;"sv, 8747},
{"&Intersection;"sv, 8898},
{"&InvisibleComma;"sv, 8291},
{"&InvisibleTimes;"sv, 8290},
{"&Iogon;"sv, 302},
{"&Iopf;"sv, 120128},
{"&Iota;"sv, 921},
{"&Iscr;"sv, 8464},
{"&Itilde;"sv, 296},
{"&Iukcy;"sv, 1030},
{"&Iuml"sv, 207},
{"&Iuml;"sv, 207},
{"&Jcirc;"sv, 308},
{"&Jcy;"sv, 1049},
{"&Jfr;"sv, 120077},
{"&Jopf;"sv, 120129},
{"&Jscr;"sv, 119973},
{"&Jsercy;"sv, 1032},
{"&Jukcy;"sv, 1028},
{"&KHcy;"sv, 1061},
{"&KJcy;"sv, 1036},
{"&Kappa;"sv, 922},
{"&Kcedil;"sv, 310},
{"&Kcy;"sv, 1050},
{"&Kfr;"sv, 120078},
{"&Kopf;"sv, 120130},
{"&Kscr;"sv, 119974},
{"&LJcy;"sv, 1033},
{"&LT"sv, 60},
{"&LT;"sv, 60},
{"&Lacute;"sv, 313},
{"&Lambda;"sv, 923},
{"&Lang;"sv, 10218},
{"&Laplacetrf;"sv, 8466},
{"&Larr;"sv, 8606},
{"&Lcaron;"sv, 317},
{"&Lcedil;"sv, 315},
{"&Lcy;"sv, 1051},
{"&LeftAngleBracket;"sv, 10216},
{"&LeftArrow;"sv, 8592},
{"&LeftArrowBar;"sv, 8676},
{"&LeftArrowRightArrow;"sv, 8646},
{"&LeftCeiling;"sv, 8968},
{"&LeftDoubleBracket;"sv, 10214},
{"&LeftDownTeeVector;"sv, 10593},
{"&LeftDownVector;"sv, 8643},
{"&LeftDownVectorBar;"sv, 10585},
{"&LeftFloor;"sv, 8970},
{"&LeftRightArrow;"sv, 8596},
{"&LeftRightVector;"sv, 10574},
{"&LeftTee;"sv, 8867},
{"&LeftTeeArrow;"sv, 8612},
{"&LeftTeeVector;"sv, 10586},
{"&LeftTriangle;"sv, 8882},
{"&LeftTriangleBar;"sv, 10703},
{"&LeftTriangleEqual;"sv, 8884},
{"&LeftUpDownVector;"sv, 10577},
{"&LeftUpTeeVector;"sv, 10592},
{"&LeftUpVector;"sv, 8639},
{"&LeftUpVectorBar;"sv, 10584},
{"&LeftVector;"sv, 8636},
{"&LeftVectorBar;"sv, 10578},
{"&Leftarrow;"sv, 8656},
{"&Leftrightarrow;"sv, 8660},
{"&LessEqualGreater;"sv, 8922},
{"&LessFullEqual;"sv, 8806},
{"&LessGreater;"sv, 8822},
{"&LessLess;"sv, 10913},
{"&LessSlantEqual;"sv, 10877},
{"&LessTilde;"sv, 8818},
{"&Lfr;"sv, 120079},
{"&Ll;"sv, 8920},
{"&Lleftarrow;"sv, 8666},
{"&Lmidot;"sv, 319},
{"&LongLeftArrow;"sv, 10229},
{"&LongLeftRightArrow;"sv, 10231},
{"&LongRightArrow;"sv, 10230},
{"&Longleftarrow;"sv, 10232},
{"&Longleftrightarrow;"sv, 10234},
{"&Longrightarrow;"sv, 10233},
{"&Lopf;"sv, 120131},
{"&LowerLeftArrow;"sv, 8601},
{"&LowerRightArrow;"sv, 8600},
{"&Lscr;"sv, 8466},
{"&Lsh;"sv, 8624},
{"&Lstrok;"sv, 321},
{"&Lt;"sv, 8810},
{"&Map;"sv, 10501},
{"&Mcy;"sv, 1052},
{"&MediumSpace;"sv, 8287},
{"&Mellintrf;"sv, 8499},
{"&Mfr;"sv, 120080},
{"&MinusPlus;"sv, 8723},
{"&Mopf;"sv, 120132},
{"&Mscr;"sv, 8499},
{"&Mu;"sv, 924},
{"&NJcy;"sv, 1034},
{"&Nacute;"sv, 323},
{"&Ncaron;"sv, 327},
{"&Ncedil;"sv, 325},
{"&Ncy;"sv, 1053},
{"&NegativeMediumSpace;"sv, 8203},
{"&NegativeThickSpace;"sv, 8203},
{"&NegativeThinSpace;"sv, 8203},
{"&NegativeVeryThinSpace;"sv, 8203},
{"&NestedGreaterGreater;"sv, 8811},
{"&NestedLessLess;"sv, 8810},
{"&NewLine;"sv, 10},
{"&Nfr;"sv, 120081},
{"&NoBreak;"sv, 8288},
{"&NonBreakingSpace;"sv, 160},
{"&Nopf;"sv, 8469},
{"&Not;"sv, 10988},
{"&NotCongruent;"sv, 8802},
{"&NotCupCap;"sv, 8813},
{"&NotDoubleVerticalBar;"sv, 8742},
{"&NotElement;"sv, 8713},
{"&NotEqual;"sv, 8800},
{"&NotEqualTilde;"sv, 8770, 824},
{"&NotExists;"sv, 8708},
{"&NotGreater;"sv, 8815},
{"&NotGreaterEqual;"sv, 8817},
{"&NotGreaterFullEqual;"sv, 8807, 824},
{"&NotGreaterGreater;"sv, 8811, 824},
{"&NotGreaterLess;"sv, 8825},
{"&NotGreaterSlantEqual;"sv, 10878, 824},
{"&NotGreaterTilde;"sv, 8821},
{"&NotHumpDownHump;"sv, 8782, 824},
{"&NotHumpEqual;"sv, 8783, 824},
{"&NotLeftTriangle;"sv, 8938},
{"&NotLeftTriangleBar;"sv, 10703, 824},
{"&NotLeftTriangleEqual;"sv, 8940},
{"&NotLess;"sv, 8814},
{"&NotLessEqual;"sv, 8816},
{"&NotLessGreater;"sv, 8824},
{"&NotLessLess;"sv, 8810, 824},
{"&NotLessSlantEqual;"sv, 10877, 824},
{"&NotLessTilde;"sv, 8820},
{"&NotNestedGreaterGreater;"sv, 10914, 824},
{"&NotNestedLessLess;"sv, 10913, 824},
{"&NotPrecedes;"sv, 8832},
{"&NotPrecedesEqual;"sv, 10927, 824},
{"&NotPrecedesSlantEqual;"sv, 8928},
{"&NotReverseElement;"sv, 8716},
{"&NotRightTriangle;"sv, 8939},
{"&NotRightTriangleBar;"sv, 10704, 824},
{"&NotRightTriangleEqual;"sv, 8941},
{"&NotSquareSubset;"sv, 8847, 824},
{"&NotSquareSubsetEqual;"sv, 8930},
{"&NotSquareSuperset;"sv, 8848, 824},
{"&NotSquareSupersetEqual;"sv, 8931},
{"&NotSubset;"sv, 8834, 8402},
{"&NotSubsetEqual;"sv, 8840},
{"&NotSucceeds;"sv, 8833},
{"&NotSucceedsEqual;"sv, 10928, 824},
{"&NotSucceedsSlantEqual;"sv, 8929},
{"&NotSucceedsTilde;"sv, 8831, 824},
{"&NotSuperset;"sv, 8835, 8402},
{"&NotSupersetEqual;"sv, 8841},
{"&NotTilde;"sv, 8769},
{"&NotTildeEqual;"sv, 8772},
{"&NotTildeFullEqual;"sv, 8775},
{"&NotTildeTilde;"sv, 8777},
{"&NotVerticalBar;"sv, 8740},
{"&Nscr;"sv, 119977},
{"&Ntilde"sv, 209},
{"&Ntilde;"sv, 209},
{"&Nu;"sv, 925},
{"&OElig;"sv, 338},
{"&Oacute"sv, 211},
{"&Oacute;"sv, 211},
{"&Ocirc"sv, 212},
{"&Ocirc;"sv, 212},
{"&Ocy;"sv, 1054},
{"&Odblac;"sv, 336},
{"&Ofr;"sv, 120082},
{"&Ograve"sv, 210},
{"&Ograve;"sv, 210},
{"&Omacr;"sv, 332},
{"&Omega;"sv, 937},
{"&Omicron;"sv, 927},
{"&Oopf;"sv, 120134},
{"&OpenCurlyDoubleQuote;"sv, 8220},
{"&OpenCurlyQuote;"sv, 8216},
{"&Or;"sv, 10836},
{"&Oscr;"sv, 119978},
{"&Oslash"sv, 216},
{"&Oslash;"sv, 216},
{"&Otilde"sv, 213},
{"&Otilde;"sv, 213},
{"&Otimes;"sv, 10807},
{"&Ouml"sv, 214},
{"&Ouml;"sv, 214},
{"&OverBar;"sv, 8254},
{"&OverBrace;"sv, 9182},
{"&OverBracket;"sv, 9140},
{"&OverParenthesis;"sv, 9180},
{"&PartialD;"sv, 8706},
{"&Pcy;"sv, 1055},
{"&Pfr;"sv, 120083},
{"&Phi;"sv, 934},
{"&Pi;"sv, 928},
{"&PlusMinus;"sv, 177},
{"&Poincareplane;"sv, 8460},
{"&Popf;"sv, 8473},
{"&Pr;"sv, 10939},
{"&Precedes;"sv, 8826},
{"&PrecedesEqual;"sv, 10927},
{"&PrecedesSlantEqual;"sv, 8828},
{"&PrecedesTilde;"sv, 8830},
{"&Prime;"sv, 8243},
{"&Product;"sv, 8719},
{"&Proportion;"sv, 8759},
{"&Proportional;"sv, 8733},
{"&Pscr;"sv, 119979},
{"&Psi;"sv, 936},
{"&QUOT"sv, 34},
{"&QUOT;"sv, 34},
{"&Qfr;"sv, 120084},
{"&Qopf;"sv, 8474},
{"&Qscr;"sv, 119980},
{"&RBarr;"sv, 10512},
{"&REG"sv, 174},
{"&REG;"sv, 174},
{"&Racute;"sv, 340},
{"&Rang;"sv, 10219},
{"&Rarr;"sv, 8608},
{"&Rarrtl;"sv, 10518},
{"&Rcaron;"sv, 344},
{"&Rcedil;"sv, 342},
{"&Rcy;"sv, 1056},
{"&Re;"sv, 8476},
{"&ReverseElement;"sv, 8715},
{"&ReverseEquilibrium;"sv, 8651},
{"&ReverseUpEquilibrium;"sv, 10607},
{"&Rfr;"sv, 8476},
{"&Rho;"sv, 929},
{"&RightAngleBracket;"sv, 10217},
{"&RightArrow;"sv, 8594},
{"&RightArrowBar;"sv, 8677},
{"&RightArrowLeftArrow;"sv, 8644},
{"&RightCeiling;"sv, 8969},
{"&RightDoubleBracket;"sv, 10215},
{"&RightDownTeeVector;"sv, 10589},
{"&RightDownVector;"sv, 8642},
{"&RightDownVectorBar;"sv, 10581},
{"&RightFloor;"sv, 8971},
{"&RightTee;"sv, 8866},
{"&RightTeeArrow;"sv, 8614},
{"&RightTeeVector;"sv, 10587},
{"&RightTriangle;"sv, 8883},
{"&RightTriangleBar;"sv, 10704},
{"&RightTriangleEqual;"sv, 8885},
{"&RightUpDownVector;"sv, 10575},
{"&RightUpTeeVector;"sv, 10588},
{"&RightUpVector;"sv, 8638},
{"&RightUpVectorBar;"sv, 10580},
{"&RightVector;"sv, 8640},
{"&RightVectorBar;"sv, 10579},
{"&Rightarrow;"sv, 8658},
{"&Ropf;"sv, 8477},
{"&RoundImplies;"sv, 10608},
{"&Rrightarrow;"sv, 8667},
{"&Rscr;"sv, 8475},
{"&Rsh;"sv, 8625},
{"&RuleDelayed;"sv, 10740},
{"&SHCHcy;"sv, 1065},
{"&SHcy;"sv, 1064},
{"&SOFTcy;"sv, 1068},
{"&Sacute;"sv, 346},
{"&Sc;"sv, 10940},
{"&Scaron;"sv, 352},
{"&Scedil;"sv, 350},
{"&Scirc;"sv, 348},
{"&Scy;"sv, 1057},
{"&Sfr;"sv, 120086},
{"&ShortDownArrow;"sv, 8595},
{"&ShortLeftArrow;"sv, 8592},
{"&ShortRightArrow;"sv, 8594},
{"&ShortUpArrow;"sv, 8593},
{"&Sigma;"sv, 931},
{"&SmallCircle;"sv, 8728},
{"&Sopf;"sv, 120138},
{"&Sqrt;"sv, 8730},
{"&Square;"sv, 9633},
{"&SquareIntersection;"sv, 8851},
{"&SquareSubset;"sv, 8847},
{"&SquareSubsetEqual;"sv, 8849},
{"&SquareSuperset;"sv, 8848},
{"&SquareSupersetEqual;"sv, 8850},
{"&SquareUnion;"sv, 8852},
{"&Sscr;"sv, 119982},
{"&Star;"sv, 8902},
{"&Sub;"sv, 8912},
{"&Subset;"sv, 8912},
{"&SubsetEqual;"sv, 8838},
{"&Succeeds;"sv, 8827},
{"&SucceedsEqual;"sv, 10928},
{"&SucceedsSlantEqual;"sv, 8829},
{"&SucceedsTilde;"sv, 8831},
{"&SuchThat;"sv, 8715},
{"&Sum;"sv, 8721},
{"&Sup;"sv, 8913},
{"&Superset;"sv, 8835},
{"&SupersetEqual;"sv, 8839},
{"&Supset;"sv, 8913},
{"&THORN"sv, 222},
{"&THORN;"sv, 222},
{"&TRADE;"sv, 8482},
{"&TSHcy;"sv, 1035},
{"&TScy;"sv, 1062},
{"&Tab;"sv, 9},
{"&Tau;"sv, 932},
{"&Tcaron;"sv, 356},
{"&Tcedil;"sv, 354},
{"&Tcy;"sv, 1058},
{"&Tfr;"sv, 120087},
{"&Therefore;"sv, 8756},
{"&Theta;"sv, 920},
{"&ThickSpace;"sv, 8287, 8202},
{"&ThinSpace;"sv, 8201},
{"&Tilde;"sv, 8764},
{"&TildeEqual;"sv, 8771},
{"&TildeFullEqual;"sv, 8773},
{"&TildeTilde;"sv, 8776},
{"&Topf;"sv, 120139},
{"&TripleDot;"sv, 8411},
{"&Tscr;"sv, 119983},
{"&Tstrok;"sv, 358},
{"&Uacute"sv, 218},
{"&Uacute;"sv, 218},
{"&Uarr;"sv, 8607},
{"&Uarrocir;"sv, 10569},
{"&Ubrcy;"sv, 1038},
{"&Ubreve;"sv, 364},
{"&Ucirc"sv, 219},
{"&Ucirc;"sv, 219},
{"&Ucy;"sv, 1059},
{"&Udblac;"sv, 368},
{"&Ufr;"sv, 120088},
{"&Ugrave"sv, 217},
{"&Ugrave;"sv, 217},
{"&Umacr;"sv, 362},
{"&UnderBar;"sv, 95},
{"&UnderBrace;"sv, 9183},
{"&UnderBracket;"sv, 9141},
{"&UnderParenthesis;"sv, 9181},
{"&Union;"sv, 8899},
{"&UnionPlus;"sv, 8846},
{"&Uogon;"sv, 370},
{"&Uopf;"sv, 120140},
{"&UpArrow;"sv, 8593},
{"&UpArrowBar;"sv, 10514},
{"&UpArrowDownArrow;"sv, 8645},
{"&UpDownArrow;"sv, 8597},
{"&UpEquilibrium;"sv, 10606},
{"&UpTee;"sv, 8869},
{"&UpTeeArrow;"sv, 8613},
{"&Uparrow;"sv, 8657},
{"&Updownarrow;"sv, 8661},
{"&UpperLeftArrow;"sv, 8598},
{"&UpperRightArrow;"sv, 8599},
{"&Upsi;"sv, 978},
{"&Upsilon;"sv, 933},
{"&Uring;"sv, 366},
{"&Uscr;"sv, 119984},
{"&Utilde;"sv, 360},
{"&Uuml"sv, 220},
{"&Uuml;"sv, 220},
{"&VDash;"sv, 8875},
{"&Vbar;"sv, 10987},
{"&Vcy;"sv, 1042},
{"&Vdash;"sv, 8873},
{"&Vdashl;"sv, 10982},
{"&Vee;"sv, 8897},
{"&Verbar;"sv, 8214},
{"&Vert;"sv, 8214},
{"&VerticalBar;"sv, 8739},
{"&VerticalLine;"sv, 124},
{"&VerticalSeparator;"sv, 10072},
{"&VerticalTilde;"sv, 8768},
{"&VeryThinSpace;"sv, 8202},
{"&Vfr;"sv, 120089},
{"&Vopf;"sv, 120141},
{"&Vscr;"sv, 119985},
{"&Vvdash;"sv, 8874},
{"&Wcirc;"sv, 372},
{"&Wedge;"sv, 8896},
{"&Wfr;"sv, 120090},
{"&Wopf;"sv, 120142},
{"&Wscr;"sv, 119986},
{"&Xfr;"sv, 120091},
{"&Xi;"sv, 926},
{"&Xopf;"sv, 120143},
{"&Xscr;"sv, 119987},
{"&YAcy;"sv, 1071},
{"&YIcy;"sv, 1031},
{"&YUcy;"sv, 1070},
{"&Yacute"sv, 221},
{"&Yacute;"sv, 221},
{"&Ycirc;"sv, 374},
{"&Ycy;"sv, 1067},
{"&Yfr;"sv, 120092},
{"&Yopf;"sv, 120144},
{"&Yscr;"sv, 119988},
{"&Yuml;"sv, 376},
{"&ZHcy;"sv, 1046},
{"&Zacute;"sv, 377},
{"&Zcaron;"sv, 381},
{"&Zcy;"sv, 1047},
{"&Zdot;"sv, 379},
{"&ZeroWidthSpace;"sv, 8203},
{"&Zeta;"sv, 918},
{"&Zfr;"sv, 8488},
{"&Zopf;"sv, 8484},
{"&Zscr;"sv, 119989},
{"&aacute"sv, 225},
{"&aacute;"sv, 225},
{"&abreve;"sv, 259},
{"&ac;"sv, 8766},
{"&acE;"sv, 8766, 819},
{"&acd;"sv, 8767},
{"&acirc"sv, 226},
{"&acirc;"sv, 226},
{"&acute"sv, 180},
{"&acute;"sv, 180},
{"&acy;"sv, 1072},
{"&aelig"sv, 230},
{"&aelig;"sv, 230},
{"&af;"sv, 8289},
{"&afr;"sv, 120094},
{"&agrave"sv, 224},
{"&agrave;"sv, 224},
{"&alefsym;"sv, 8501},
{"&aleph;"sv, 8501},
{"&alpha;"sv, 945},
{"&amacr;"sv, 257},
{"&amalg;"sv, 10815},
{"&amp"sv, 38},
{"&amp;"sv, 38},
{"&and;"sv, 8743},
{"&andand;"sv, 10837},
{"&andd;"sv, 10844},
{"&andslope;"sv, 10840},
{"&andv;"sv, 10842},
{"&ang;"sv, 8736},
{"&ange;"sv, 10660},
{"&angle;"sv, 8736},
{"&angmsd;"sv, 8737},
{"&angmsdaa;"sv, 10664},
{"&angmsdab;"sv, 10665},
{"&angmsdac;"sv, 10666},
{"&angmsdad;"sv, 10667},
{"&angmsdae;"sv, 10668},
{"&angmsdaf;"sv, 10669},
{"&angmsdag;"sv, 10670},
{"&angmsdah;"sv, 10671},
{"&angrt;"sv, 8735},
{"&angrtvb;"sv, 8894},
{"&angrtvbd;"sv, 10653},
{"&angsph;"sv, 8738},
{"&angst;"sv, 197},
{"&angzarr;"sv, 9084},
{"&aogon;"sv, 261},
{"&aopf;"sv, 120146},
{"&ap;"sv, 8776},
{"&apE;"sv, 10864},
{"&apacir;"sv, 10863},
{"&ape;"sv, 8778},
{"&apid;"sv, 8779},
{"&apos;"sv, 39},
{"&approx;"sv, 8776},
{"&approxeq;"sv, 8778},
{"&aring"sv, 229},
{"&aring;"sv, 229},
{"&ascr;"sv, 119990},
{"&ast;"sv, 42},
{"&asymp;"sv, 8776},
{"&asympeq;"sv, 8781},
{"&atilde"sv, 227},
{"&atilde;"sv, 227},
{"&auml"sv, 228},
{"&auml;"sv, 228},
{"&awconint;"sv, 8755},
{"&awint;"sv, 10769},
{"&bNot;"sv, 10989},
{"&backcong;"sv, 8780},
{"&backepsilon;"sv, 1014},
{"&backprime;"sv, 8245},
{"&backsim;"sv, 8765},
{"&backsimeq;"sv, 8909},
{"&barvee;"sv, 8893},
{"&barwed;"sv, 8965},
{"&barwedge;"sv, 8965},
{"&bbrk;"sv, 9141},
{"&bbrktbrk;"sv, 9142},
{"&bcong;"sv, 8780},
{"&bcy;"sv, 1073},
{"&bdquo;"sv, 8222},
{"&becaus;"sv, 8757},
{"&because;"sv, 8757},
{"&bemptyv;"sv, 10672},
{"&bepsi;"sv, 1014},
{"&bernou;"sv, 8492},
{"&beta;"sv, 946},
{"&beth;"sv, 8502},
{"&between;"sv, 8812},
{"&bfr;"sv, 120095},
{"&bigcap;"sv, 8898},
{"&bigcirc;"sv, 9711},
{"&bigcup;"sv, 8899},
{"&bigodot;"sv, 10752},
{"&bigoplus;"sv, 10753},
{"&bigotimes;"sv, 10754},
{"&bigsqcup;"sv, 10758},
{"&bigstar;"sv, 9733},
{"&bigtriangledown;"sv, 9661},
{"&bigtriangleup;"sv, 9651},
{"&biguplus;"sv, 10756},
{"&bigvee;"sv, 8897},
{"&bigwedge;"sv, 8896},
{"&bkarow;"sv, 10509},
{"&blacklozenge;"sv, 10731},
{"&blacksquare;"sv, 9642},
{"&blacktriangle;"sv, 9652},
{"&blacktriangledown;"sv, 9662},
{"&blacktriangleleft;"sv, 9666},
{"&blacktriangleright;"sv, 9656},
{"&blank;"sv, 9251},
{"&blk12;"sv, 9618},
{"&blk14;"sv, 9617},
{"&blk34;"sv, 9619},
{"&block;"sv, 9608},
{"&bne;"sv, 61, 8421},
{"&bnequiv;"sv, 8801, 8421},
{"&bnot;"sv, 8976},
{"&bopf;"sv, 120147},
{"&bot;"sv, 8869},
{"&bottom;"sv, 8869},
{"&bowtie;"sv, 8904},
{"&boxDL;"sv, 9559},
{"&boxDR;"sv, 9556},
{"&boxDl;"sv, 9558},
{"&boxDr;"sv, 9555},
{"&boxH;"sv, 9552},
{"&boxHD;"sv, 9574},
{"&boxHU;"sv, 9577},
{"&boxHd;"sv, 9572},
{"&boxHu;"sv, 9575},
{"&boxUL;"sv, 9565},
{"&boxUR;"sv, 9562},
{"&boxUl;"sv, 9564},
{"&boxUr;"sv, 9561},
{"&boxV;"sv, 9553},
{"&boxVH;"sv, 9580},
{"&boxVL;"sv, 9571},
{"&boxVR;"sv, 9568},
{"&boxVh;"sv, 9579},
{"&boxVl;"sv, 9570},
{"&boxVr;"sv, 9567},
{"&boxbox;"sv, 10697},
{"&boxdL;"sv, 9557},
{"&boxdR;"sv, 9554},
{"&boxdl;"sv, 9488},
{"&boxdr;"sv, 9484},
{"&boxh;"sv, 9472},
{"&boxhD;"sv, 9573},
{"&boxhU;"sv, 9576},
{"&boxhd;"sv, 9516},
{"&boxhu;"sv, 9524},
{"&boxminus;"sv, 8863},
{"&boxplus;"sv, 8862},
{"&boxtimes;"sv, 8864},
{"&boxuL;"sv, 9563},
{"&boxuR;"sv, 9560},
{"&boxul;"sv, 9496},
{"&boxur;"sv, 9492},
{"&boxv;"sv, 9474},
{"&boxvH;"sv, 9578},
{"&boxvL;"sv, 9569},
{"&boxvR;"sv, 9566},
{"&boxvh;"sv, 9532},
{"&boxvl;"sv, 9508},
{"&boxvr;"sv, 9500},
{"&bprime;"sv, 8245},
{"&breve;"sv, 728},
{"&brvbar"sv, 166},
{"&brvbar;"sv, 166},
{"&bscr;"sv, 119991},
{"&bsemi;"sv, 8271},
{"&bsim;"sv, 8765},
{"&bsime;"sv, 8909},
{"&bsol;"sv, 92},
{"&bsolb;"sv, 10693},
{"&bsolhsub;"sv, 10184},
{"&bull;"sv, 8226},
{"&bullet;"sv, 8226},
{"&bump;"sv, 8782},
{"&bumpE;"sv, 10926},
{"&bumpe;"sv, 8783},
{"&bumpeq;"sv, 8783},
{"&cacute;"sv, 263},
{"&cap;"sv, 8745},
{"&capand;"sv, 10820},
{"&capbrcup;"sv, 10825},
{"&capcap;"sv, 10827},
{"&capcup;"sv, 10823},
{"&capdot;"sv, 10816},
{"&caps;"sv, 8745, 65024},
{"&caret;"sv, 8257},
{"&caron;"sv, 711},
{"&ccaps;"sv, 10829},
{"&ccaron;"sv, 269},
{"&ccedil"sv, 231},
{"&ccedil;"sv, 231},
{"&ccirc;"sv, 265},
{"&ccups;"sv, 10828},
{"&ccupssm;"sv, 10832},
{"&cdot;"sv, 267},
{"&cedil"sv, 184},
{"&cedil;"sv, 184},
{"&cemptyv;"sv, 10674},
{"&cent"sv, 162},
{"&cent;"sv, 162},
{"&centerdot;"sv, 183},
{"&cfr;"sv, 120096},
{"&chcy;"sv, 1095},
{"&check;"sv, 10003},
{"&checkmark;"sv, 10003},
{"&chi;"sv, 967},
{"&cir;"sv, 9675},
{"&cirE;"sv, 10691},
{"&circ;"sv, 710},
{"&circeq;"sv, 8791},
{"&circlearrowleft;"sv, 8634},
{"&circlearrowright;"sv, 8635},
{"&circledR;"sv, 174},
{"&circledS;"sv, 9416},
{"&circledast;"sv, 8859},
{"&circledcirc;"sv, 8858},
{"&circleddash;"sv, 8861},
{"&cire;"sv, 8791},
{"&cirfnint;"sv, 10768},
{"&cirmid;"sv, 10991},
{"&cirscir;"sv, 10690},
{"&clubs;"sv, 9827},
{"&clubsuit;"sv, 9827},
{"&colon;"sv, 58},
{"&colone;"sv, 8788},
{"&coloneq;"sv, 8788},
{"&comma;"sv, 44},
{"&commat;"sv, 64},
{"&comp;"sv, 8705},
{"&compfn;"sv, 8728},
{"&complement;"sv, 8705},
{"&complexes;"sv, 8450},
{"&cong;"sv, 8773},
{"&congdot;"sv, 10861},
{"&conint;"sv, 8750},
{"&copf;"sv, 120148},
{"&coprod;"sv, 8720},
{"&copy"sv, 169},
{"&copy;"sv, 169},
{"&copysr;"sv, 8471},
{"&crarr;"sv, 8629},
{"&cross;"sv, 10007},
{"&cscr;"sv, 119992},
{"&csub;"sv, 10959},
{"&csube;"sv, 10961},
{"&csup;"sv, 10960},
{"&csupe;"sv, 10962},
{"&ctdot;"sv, 8943},
{"&cudarrl;"sv, 10552},
{"&cudarrr;"sv, 10549},
{"&cuepr;"sv, 8926},
{"&cuesc;"sv, 8927},
{"&cularr;"sv, 8630},
{"&cularrp;"sv, 10557},
{"&cup;"sv, 8746},
{"&cupbrcap;"sv, 10824},
{"&cupcap;"sv, 10822},
{"&cupcup;"sv, 10826},
{"&cupdot;"sv, 8845},
{"&cupor;"sv, 10821},
{"&cups;"sv, 8746, 65024},
{"&curarr;"sv, 8631},
{"&curarrm;"sv, 10556},
{"&curlyeqprec;"sv, 8926},
{"&curlyeqsucc;"sv, 8927},
{"&curlyvee;"sv, 8910},
{"&curlywedge;"sv, 8911},
{"&curren"sv, 164},
{"&curren;"sv, 164},
{"&curvearrowleft;"sv, 8630},
{"&curvearrowright;"sv, 8631},
{"&cuvee;"sv, 8910},
{"&cuwed;"sv, 8911},
{"&cwconint;"sv, 8754},
{"&cwint;"sv, 8753},
{"&cylcty;"sv, 9005},
{"&dArr;"sv, 8659},
{"&dHar;"sv, 10597},
{"&dagger;"sv, 8224},
{"&daleth;"sv, 8504},
{"&darr;"sv, 8595},
{"&dash;"sv, 8208},
{"&dashv;"sv, 8867},
{"&dbkarow;"sv, 10511},
{"&dblac;"sv, 733},
{"&dcaron;"sv, 271},
{"&dcy;"sv, 1076},
{"&dd;"sv, 8518},
{"&ddagger;"sv, 8225},
{"&ddarr;"sv, 8650},
{"&ddotseq;"sv, 10871},
{"&deg"sv, 176},
{"&deg;"sv, 176},
{"&delta;"sv, 948},
{"&demptyv;"sv, 10673},
{"&dfisht;"sv, 10623},
{"&dfr;"sv, 120097},
{"&dharl;"sv, 8643},
{"&dharr;"sv, 8642},
{"&diam;"sv, 8900},
{"&diamond;"sv, 8900},
{"&diamondsuit;"sv, 9830},
{"&diams;"sv, 9830},
{"&die;"sv, 168},
{"&digamma;"sv, 989},
{"&disin;"sv, 8946},
{"&div;"sv, 247},
{"&divide"sv, 247},
{"&divide;"sv, 247},
{"&divideontimes;"sv, 8903},
{"&divonx;"sv, 8903},
{"&djcy;"sv, 1106},
{"&dlcorn;"sv, 8990},
{"&dlcrop;"sv, 8973},
{"&dollar;"sv, 36},
{"&dopf;"sv, 120149},
{"&dot;"sv, 729},
{"&doteq;"sv, 8784},
{"&doteqdot;"sv, 8785},
{"&dotminus;"sv, 8760},
{"&dotplus;"sv, 8724},
{"&dotsquare;"sv, 8865},
{"&doublebarwedge;"sv, 8966},
{"&downarrow;"sv, 8595},
{"&downdownarrows;"sv, 8650},
{"&downharpoonleft;"sv, 8643},
{"&downharpoonright;"sv, 8642},
{"&drbkarow;"sv, 10512},
{"&drcorn;"sv, 8991},
{"&drcrop;"sv, 8972},
{"&dscr;"sv, 119993},
{"&dscy;"sv, 1109},
{"&dsol;"sv, 10742},
{"&dstrok;"sv, 273},
{"&dtdot;"sv, 8945},
{"&dtri;"sv, 9663},
{"&dtrif;"sv, 9662},
{"&duarr;"sv, 8693},
{"&duhar;"sv, 10607},
{"&dwangle;"sv, 10662},
{"&dzcy;"sv, 1119},
{"&dzigrarr;"sv, 10239},
{"&eDDot;"sv, 10871},
{"&eDot;"sv, 8785},
{"&eacute"sv, 233},
{"&eacute;"sv, 233},
{"&easter;"sv, 10862},
{"&ecaron;"sv, 283},
{"&ecir;"sv, 8790},
{"&ecirc"sv, 234},
{"&ecirc;"sv, 234},
{"&ecolon;"sv, 8789},
{"&ecy;"sv, 1101},
{"&edot;"sv, 279},
{"&ee;"sv, 8519},
{"&efDot;"sv, 8786},
{"&efr;"sv, 120098},
{"&eg;"sv, 10906},
{"&egrave"sv, 232},
{"&egrave;"sv, 232},
{"&egs;"sv, 10902},
{"&egsdot;"sv, 10904},
{"&el;"sv, 10905},
{"&elinters;"sv, 9191},
{"&ell;"sv, 8467},
{"&els;"sv, 10901},
{"&elsdot;"sv, 10903},
{"&emacr;"sv, 275},
{"&empty;"sv, 8709},
{"&emptyset;"sv, 8709},
{"&emptyv;"sv, 8709},
{"&emsp13;"sv, 8196},
{"&emsp14;"sv, 8197},
{"&emsp;"sv, 8195},
{"&eng;"sv, 331},
{"&ensp;"sv, 8194},
{"&eogon;"sv, 281},
{"&eopf;"sv, 120150},
{"&epar;"sv, 8917},
{"&eparsl;"sv, 10723},
{"&eplus;"sv, 10865},
{"&epsi;"sv, 949},
{"&epsilon;"sv, 949},
{"&epsiv;"sv, 1013},
{"&eqcirc;"sv, 8790},
{"&eqcolon;"sv, 8789},
{"&eqsim;"sv, 8770},
{"&eqslantgtr;"sv, 10902},
{"&eqslantless;"sv, 10901},
{"&equals;"sv, 61},
{"&equest;"sv, 8799},
{"&equiv;"sv, 8801},
{"&equivDD;"sv, 10872},
{"&eqvparsl;"sv, 10725},
{"&erDot;"sv, 8787},
{"&erarr;"sv, 10609},
{"&escr;"sv, 8495},
{"&esdot;"sv, 8784},
{"&esim;"sv, 8770},
{"&eta;"sv, 951},
{"&eth"sv, 240},
{"&eth;"sv, 240},
{"&euml"sv, 235},
{"&euml;"sv, 235},
{"&euro;"sv, 8364},
{"&excl;"sv, 33},
{"&exist;"sv, 8707},
{"&expectation;"sv, 8496},
{"&exponentiale;"sv, 8519},
{"&fallingdotseq;"sv, 8786},
{"&fcy;"sv, 1092},
{"&female;"sv, 9792},
{"&ffilig;"sv, 64259},
{"&fflig;"sv, 64256},
{"&ffllig;"sv, 64260},
{"&ffr;"sv, 120099},
{"&filig;"sv, 64257},
{"&fjlig;"sv, 102, 106},
{"&flat;"sv, 9837},
{"&fllig;"sv, 64258},
{"&fltns;"sv, 9649},
{"&fnof;"sv, 402},
{"&fopf;"sv, 120151},
{"&forall;"sv, 8704},
{"&fork;"sv, 8916},
{"&forkv;"sv, 10969},
{"&fpartint;"sv, 10765},
{"&frac12"sv, 189},
{"&frac12;"sv, 189},
{"&frac13;"sv, 8531},
{"&frac14"sv, 188},
{"&frac14;"sv, 188},
{"&frac15;"sv, 8533},
{"&frac16;"sv, 8537},
{"&frac18;"sv, 8539},
{"&frac23;"sv, 8532},
{"&frac25;"sv, 8534},
{"&frac34"sv, 190},
{"&frac34;"sv, 190},
{"&frac35;"sv, 8535},
{"&frac38;"sv, 8540},
{"&frac45;"sv, 8536},
{"&frac56;"sv, 8538},
{"&frac58;"sv, 8541},
{"&frac78;"sv, 8542},
{"&frasl;"sv, 8260},
{"&frown;"sv, 8994},
{"&fscr;"sv, 119995},
{"&gE;"sv, 8807},
{"&gEl;"sv, 10892},
{"&gacute;"sv, 501},
{"&gamma;"sv, 947},
{"&gammad;"sv, 989},
{"&gap;"sv, 10886},
{"&gbreve;"sv, 287},
{"&gcirc;"sv, 285},
{"&gcy;"sv, 1075},
{"&gdot;"sv, 289},
{"&ge;"sv, 8805},
{"&gel;"sv, 8923},
{"&geq;"sv, 8805},
{"&geqq;"sv, 8807},
{"&geqslant;"sv, 10878},
{"&ges;"sv, 10878},
{"&gescc;"sv, 10921},
{"&gesdot;"sv, 10880},
{"&gesdoto;"sv, 10882},
{"&gesdotol;"sv, 10884},
{"&gesl;"sv, 8923, 65024},
{"&gesles;"sv, 10900},
{"&gfr;"sv, 120100},
{"&gg;"sv, 8811},
{"&ggg;"sv, 8921},
{"&gimel;"sv, 8503},
{"&gjcy;"sv, 1107},
{"&gl;"sv, 8823},
{"&glE;"sv, 10898},
{"&gla;"sv, 10917},
{"&glj;"sv, 10916},
{"&gnE;"sv, 8809},
{"&gnap;"sv, 10890},
{"&gnapprox;"sv, 10890},
{"&gne;"sv, 10888},
{"&gneq;"sv, 10888},
{"&gneqq;"sv, 8809},
{"&gnsim;"sv, 8935},
{"&gopf;"sv, 120152},
{"&grave;"sv, 96},
{"&gscr;"sv, 8458},
{"&gsim;"sv, 8819},
{"&gsime;"sv, 10894},
{"&gsiml;"sv, 10896},
{"&gt"sv, 62},
{"&gt;"sv, 62},
{"&gtcc;"sv, 10919},
{"&gtcir;"sv, 10874},
{"&gtdot;"sv, 8919},
{"&gtlPar;"sv, 10645},
{"&gtquest;"sv, 10876},
{"&gtrapprox;"sv, 10886},
{"&gtrarr;"sv, 10616},
{"&gtrdot;"sv, 8919},
{"&gtreqless;"sv, 8923},
{"&gtreqqless;"sv, 10892},
{"&gtrless;"sv, 8823},
{"&gtrsim;"sv, 8819},
{"&gvertneqq;"sv, 8809, 65024},
{"&gvnE;"sv, 8809, 65024},
{"&hArr;"sv, 8660},
{"&hairsp;"sv, 8202},
{"&half;"sv, 189},
{"&hamilt;"sv, 8459},
{"&hardcy;"sv, 1098},
{"&harr;"sv, 8596},
{"&harrcir;"sv, 10568},
{"&harrw;"sv, 8621},
{"&hbar;"sv, 8463},
{"&hcirc;"sv, 293},
{"&hearts;"sv, 9829},
{"&heartsuit;"sv, 9829},
{"&hellip;"sv, 8230},
{"&hercon;"sv, 8889},
{"&hfr;"sv, 120101},
{"&hksearow;"sv, 10533},
{"&hkswarow;"sv, 10534},
{"&hoarr;"sv, 8703},
{"&homtht;"sv, 8763},
{"&hookleftarrow;"sv, 8617},
{"&hookrightarrow;"sv, 8618},
{"&hopf;"sv, 120153},
{"&horbar;"sv, 8213},
{"&hscr;"sv, 119997},
{"&hslash;"sv, 8463},
{"&hstrok;"sv, 295},
{"&hybull;"sv, 8259},
{"&hyphen;"sv, 8208},
{"&iacute"sv, 237},
{"&iacute;"sv, 237},
{"&ic;"sv, 8291},
{"&icirc"sv, 238},
{"&icirc;"sv, 238},
{"&icy;"sv, 1080},
{"&iecy;"sv, 1077},
{"&iexcl"sv, 161},
{"&iexcl;"sv, 161},
{"&iff;"sv, 8660},
{"&ifr;"sv, 120102},
{"&igrave"sv, 236},
{"&igrave;"sv, 236},
{"&ii;"sv, 8520},
{"&iiiint;"sv, 10764},
{"&iiint;"sv, 8749},
{"&iinfin;"sv, 10716},
{"&iiota;"sv, 8489},
{"&ijlig;"sv, 307},
{"&imacr;"sv, 299},
{"&image;"sv, 8465},
{"&imagline;"sv, 8464},
{"&imagpart;"sv, 8465},
{"&imath;"sv, 305},
{"&imof;"sv, 8887},
{"&imped;"sv, 437},
{"&in;"sv, 8712},
{"&incare;"sv, 8453},
{"&infin;"sv, 8734},
{"&infintie;"sv, 10717},
{"&inodot;"sv, 305},
{"&int;"sv, 8747},
{"&intcal;"sv, 8890},
{"&integers;"sv, 8484},
{"&intercal;"sv, 8890},
{"&intlarhk;"sv, 10775},
{"&intprod;"sv, 10812},
{"&iocy;"sv, 1105},
{"&iogon;"sv, 303},
{"&iopf;"sv, 120154},
{"&iota;"sv, 953},
{"&iprod;"sv, 10812},
{"&iquest"sv, 191},
{"&iquest;"sv, 191},
{"&iscr;"sv, 119998},
{"&isin;"sv, 8712},
{"&isinE;"sv, 8953},
{"&isindot;"sv, 8949},
{"&isins;"sv, 8948},
{"&isinsv;"sv, 8947},
{"&isinv;"sv, 8712},
{"&it;"sv, 8290},
{"&itilde;"sv, 297},
{"&iukcy;"sv, 1110},
{"&iuml"sv, 239},
{"&iuml;"sv, 239},
{"&jcirc;"sv, 309},
{"&jcy;"sv, 1081},
{"&jfr;"sv, 120103},
{"&jmath;"sv, 567},
{"&jopf;"sv, 120155},
{"&jscr;"sv, 119999},
{"&jsercy;"sv, 1112},
{"&jukcy;"sv, 1108},
{"&kappa;"sv, 954},
{"&kappav;"sv, 1008},
{"&kcedil;"sv, 311},
{"&kcy;"sv, 1082},
{"&kfr;"sv, 120104},
{"&kgreen;"sv, 312},
{"&khcy;"sv, 1093},
{"&kjcy;"sv, 1116},
{"&kopf;"sv, 120156},
{"&kscr;"sv, 120000},
{"&lAarr;"sv, 8666},
{"&lArr;"sv, 8656},
{"&lAtail;"sv, 10523},
{"&lBarr;"sv, 10510},
{"&lE;"sv, 8806},
{"&lEg;"sv, 10891},
{"&lHar;"sv, 10594},
{"&lacute;"sv, 314},
{"&laemptyv;"sv, 10676},
{"&lagran;"sv, 8466},
{"&lambda;"sv, 955},
{"&lang;"sv, 10216},
{"&langd;"sv, 10641},
{"&langle;"sv, 10216},
{"&lap;"sv, 10885},
{"&laquo"sv, 171},
{"&laquo;"sv, 171},
{"&larr;"sv, 8592},
{"&larrb;"sv, 8676},
{"&larrbfs;"sv, 10527},
{"&larrfs;"sv, 10525},
{"&larrhk;"sv, 8617},
{"&larrlp;"sv, 8619},
{"&larrpl;"sv, 10553},
{"&larrsim;"sv, 10611},
{"&larrtl;"sv, 8610},
{"&lat;"sv, 10923},
{"&latail;"sv, 10521},
{"&late;"sv, 10925},
{"&lates;"sv, 10925, 65024},
{"&lbarr;"sv, 10508},
{"&lbbrk;"sv, 10098},
{"&lbrace;"sv, 123},
{"&lbrack;"sv, 91},
{"&lbrke;"sv, 10635},
{"&lbrksld;"sv, 10639},
{"&lbrkslu;"sv, 10637},
{"&lcaron;"sv, 318},
{"&lcedil;"sv, 316},
{"&lceil;"sv, 8968},
{"&lcub;"sv, 123},
{"&lcy;"sv, 1083},
{"&ldca;"sv, 10550},
{"&ldquo;"sv, 8220},
{"&ldquor;"sv, 8222},
{"&ldrdhar;"sv, 10599},
{"&ldrushar;"sv, 10571},
{"&ldsh;"sv, 8626},
{"&le;"sv, 8804},
{"&leftarrow;"sv, 8592},
{"&leftarrowtail;"sv, 8610},
{"&leftharpoondown;"sv, 8637},
{"&leftharpoonup;"sv, 8636},
{"&leftleftarrows;"sv, 8647},
{"&leftrightarrow;"sv, 8596},
{"&leftrightarrows;"sv, 8646},
{"&leftrightharpoons;"sv, 8651},
{"&leftrightsquigarrow;"sv, 8621},
{"&leftthreetimes;"sv, 8907},
{"&leg;"sv, 8922},
{"&leq;"sv, 8804},
{"&leqq;"sv, 8806},
{"&leqslant;"sv, 10877},
{"&les;"sv, 10877},
{"&lescc;"sv, 10920},
{"&lesdot;"sv, 10879},
{"&lesdoto;"sv, 10881},
{"&lesdotor;"sv, 10883},
{"&lesg;"sv, 8922, 65024},
{"&lesges;"sv, 10899},
{"&lessapprox;"sv, 10885},
{"&lessdot;"sv, 8918},
{"&lesseqgtr;"sv, 8922},
{"&lesseqqgtr;"sv, 10891},
{"&lessgtr;"sv, 8822},
{"&lesssim;"sv, 8818},
{"&lfisht;"sv, 10620},
{"&lfloor;"sv, 8970},
{"&lfr;"sv, 120105},
{"&lg;"sv, 8822},
{"&lgE;"sv, 10897},
{"&lhard;"sv, 8637},
{"&lharu;"sv, 8636},
{"&lharul;"sv, 10602},
{"&lhblk;"sv, 9604},
{"&ljcy;"sv, 1113},
{"&ll;"sv, 8810},
{"&llarr;"sv, 8647},
{"&llcorner;"sv, 8990},
{"&llhard;"sv, 10603},
{"&lltri;"sv, 9722},
{"&lmidot;"sv, 320},
{"&lmoust;"sv, 9136},
{"&lmoustache;"sv, 9136},
{"&lnE;"sv, 8808},
{"&lnap;"sv, 10889},
{"&lnapprox;"sv, 10889},
{"&lne;"sv, 10887},
{"&lneq;"sv, 10887},
{"&lneqq;"sv, 8808},
{"&lnsim;"sv, 8934},
{"&loang;"sv, 10220},
{"&loarr;"sv, 8701},
{"&lobrk;"sv, 10214},
{"&longleftarrow;"sv, 10229},
{"&longleftrightarrow;"sv, 10231},
{"&longmapsto;"sv, 10236},
{"&longrightarrow;"sv, 10230},
{"&looparrowleft;"sv, 8619},
{"&looparrowright;"sv, 8620},
{"&lopar;"sv, 10629},
{"&lopf;"sv, 120157},
{"&loplus;"sv, 10797},
{"&lotimes;"sv, 10804},
{"&lowast;"sv, 8727},
{"&lowbar;"sv, 95},
{"&loz;"sv, 9674},
{"&lozenge;"sv, 9674},
{"&lozf;"sv, 10731},
{"&lpar;"sv, 40},
{"&lparlt;"sv, 10643},
{"&lrarr;"sv, 8646},
{"&lrcorner;"sv, 8991},
{"&lrhar;"sv, 8651},
{"&lrhard;"sv, 10605},
{"&lrm;"sv, 8206},
{"&lrtri;"sv, 8895},
{"&lsaquo;"sv, 8249},
{"&lscr;"sv, 120001},
{"&lsh;"sv, 8624},
{"&lsim;"sv, 8818},
{"&lsime;"sv, 10893},
{"&lsimg;"sv, 10895},
{"&lsqb;"sv, 91},
{"&lsquo;"sv, 8216},
{"&lsquor;"sv, 8218},
{"&lstrok;"sv, 322},
{"&lt"sv, 60},
{"&lt;"sv, 60},
{"&ltcc;"sv, 10918},
{"&ltcir;"sv, 10873},
{"&ltdot;"sv, 8918},
{"&lthree;"sv, 8907},
{"&ltimes;"sv, 8905},
{"&ltlarr;"sv, 10614},
{"&ltquest;"sv, 10875},
{"&ltrPar;"sv, 10646},
{"&ltri;"sv, 9667},
{"&ltrie;"sv, 8884},
{"&ltrif;"sv, 9666},
{"&lurdshar;"sv, 10570},
{"&luruhar;"sv, 10598},
{"&lvertneqq;"sv, 8808, 65024},
{"&lvnE;"sv, 8808, 65024},
{"&mDDot;"sv, 8762},
{"&macr"sv, 175},
{"&macr;"sv, 175},
{"&male;"sv, 9794},
{"&malt;"sv, 10016},
{"&maltese;"sv, 10016},
{"&map;"sv, 8614},
{"&mapsto;"sv, 8614},
{"&mapstodown;"sv, 8615},
{"&mapstoleft;"sv, 8612},
{"&mapstoup;"sv, 8613},
{"&marker;"sv, 9646},
{"&mcomma;"sv, 10793},
{"&mcy;"sv, 1084},
{"&mdash;"sv, 8212},
{"&measuredangle;"sv, 8737},
{"&mfr;"sv, 120106},
{"&mho;"sv, 8487},
{"&micro"sv, 181},
{"&micro;"sv, 181},
{"&mid;"sv, 8739},
{"&midast;"sv, 42},
{"&midcir;"sv, 10992},
{"&middot"sv, 183},
{"&middot;"sv, 183},
{"&minus;"sv, 8722},
{"&minusb;"sv, 8863},
{"&minusd;"sv, 8760},
{"&minusdu;"sv, 10794},
{"&mlcp;"sv, 10971},
{"&mldr;"sv, 8230},
{"&mnplus;"sv, 8723},
{"&models;"sv, 8871},
{"&mopf;"sv, 120158},
{"&mp;"sv, 8723},
{"&mscr;"sv, 120002},
{"&mstpos;"sv, 8766},
{"&mu;"sv, 956},
{"&multimap;"sv, 8888},
{"&mumap;"sv, 8888},
{"&nGg;"sv, 8921, 824},
{"&nGt;"sv, 8811, 8402},
{"&nGtv;"sv, 8811, 824},
{"&nLeftarrow;"sv, 8653},
{"&nLeftrightarrow;"sv, 8654},
{"&nLl;"sv, 8920, 824},
{"&nLt;"sv, 8810, 8402},
{"&nLtv;"sv, 8810, 824},
{"&nRightarrow;"sv, 8655},
{"&nVDash;"sv, 8879},
{"&nVdash;"sv, 8878},
{"&nabla;"sv, 8711},
{"&nacute;"sv, 324},
{"&nang;"sv, 8736, 8402},
{"&nap;"sv, 8777},
{"&napE;"sv, 10864, 824},
{"&napid;"sv, 8779, 824},
{"&napos;"sv, 329},
{"&napprox;"sv, 8777},
{"&natur;"sv, 9838},
{"&natural;"sv, 9838},
{"&naturals;"sv, 8469},
{"&nbsp"sv, 160},
{"&nbsp;"sv, 160},
{"&nbump;"sv, 8782, 824},
{"&nbumpe;"sv, 8783, 824},
{"&ncap;"sv, 10819},
{"&ncaron;"sv, 328},
{"&ncedil;"sv, 326},
{"&ncong;"sv, 8775},
{"&ncongdot;"sv, 10861, 824},
{"&ncup;"sv, 10818},
{"&ncy;"sv, 1085},
{"&ndash;"sv, 8211},
{"&ne;"sv, 8800},
{"&neArr;"sv, 8663},
{"&nearhk;"sv, 10532},
{"&nearr;"sv, 8599},
{"&nearrow;"sv, 8599},
{"&nedot;"sv, 8784, 824},
{"&nequiv;"sv, 8802},
{"&nesear;"sv, 10536},
{"&nesim;"sv, 8770, 824},
{"&nexist;"sv, 8708},
{"&nexists;"sv, 8708},
{"&nfr;"sv, 120107},
{"&ngE;"sv, 8807, 824},
{"&nge;"sv, 8817},
{"&ngeq;"sv, 8817},
{"&ngeqq;"sv, 8807, 824},
{"&ngeqslant;"sv, 10878, 824},
{"&nges;"sv, 10878, 824},
{"&ngsim;"sv, 8821},
{"&ngt;"sv, 8815},
{"&ngtr;"sv, 8815},
{"&nhArr;"sv, 8654},
{"&nharr;"sv, 8622},
{"&nhpar;"sv, 10994},
{"&ni;"sv, 8715},
{"&nis;"sv, 8956},
{"&nisd;"sv, 8954},
{"&niv;"sv, 8715},
{"&njcy;"sv, 1114},
{"&nlArr;"sv, 8653},
{"&nlE;"sv, 8806, 824},
{"&nlarr;"sv, 8602},
{"&nldr;"sv, 8229},
{"&nle;"sv, 8816},
{"&nleftarrow;"sv, 8602},
{"&nleftrightarrow;"sv, 8622},
{"&nleq;"sv, 8816},
{"&nleqq;"sv, 8806, 824},
{"&nleqslant;"sv, 10877, 824},
{"&nles;"sv, 10877, 824},
{"&nless;"sv, 8814},
{"&nlsim;"sv, 8820},
{"&nlt;"sv, 8814},
{"&nltri;"sv, 8938},
{"&nltrie;"sv, 8940},
{"&nmid;"sv, 8740},
{"&nopf;"sv, 120159},
{"&not"sv, 172},
{"&not;"sv, 172},
{"&notin;"sv, 8713},
{"&notinE;"sv, 8953, 824},
{"&notindot;"sv, 8949, 824},
{"&notinva;"sv, 8713},
{"&notinvb;"sv, 8951},
{"&notinvc;"sv, 8950},
{"&notni;"sv, 8716},
{"&notniva;"sv, 8716},
{"&notnivb;"sv, 8958},
{"&notnivc;"sv, 8957},
{"&npar;"sv, 8742},
{"&nparallel;"sv, 8742},
{"&nparsl;"sv, 11005, 8421},
{"&npart;"sv, 8706, 824},
{"&npolint;"sv, 10772},
{"&npr;"sv, 8832},
{"&nprcue;"sv, 8928},
{"&npre;"sv, 10927, 824},
{"&nprec;"sv, 8832},
{"&npreceq;"sv, 10927, 824},
{"&nrArr;"sv, 8655},
{"&nrarr;"sv, 8603},
{"&nrarrc;"sv, 10547, 824},
{"&nrarrw;"sv, 8605, 824},
{"&nrightarrow;"sv, 8603},
{"&nrtri;"sv, 8939},
{"&nrtrie;"sv, 8941},
{"&nsc;"sv, 8833},
{"&nsccue;"sv, 8929},
{"&nsce;"sv, 10928, 824},
{"&nscr;"sv, 120003},
{"&nshortmid;"sv, 8740},
{"&nshortparallel;"sv, 8742},
{"&nsim;"sv, 8769},
{"&nsime;"sv, 8772},
{"&nsimeq;"sv, 8772},
{"&nsmid;"sv, 8740},
{"&nspar;"sv, 8742},
{"&nsqsube;"sv, 8930},
{"&nsqsupe;"sv, 8931},
{"&nsub;"sv, 8836},
{"&nsubE;"sv, 10949, 824},
{"&nsube;"sv, 8840},
{"&nsubset;"sv, 8834, 8402},
{"&nsubseteq;"sv, 8840},
{"&nsubseteqq;"sv, 10949, 824},
{"&nsucc;"sv, 8833},
{"&nsucceq;"sv, 10928, 824},
{"&nsup;"sv, 8837},
{"&nsupE;"sv, 10950, 824},
{"&nsupe;"sv, 8841},
{"&nsupset;"sv, 8835, 8402},
{"&nsupseteq;"sv, 8841},
{"&nsupseteqq;"sv, 10950, 824},
{"&ntgl;"sv, 8825},
{"&ntilde"sv, 241},
{"&ntilde;"sv, 241},
{"&ntlg;"sv, 8824},
{"&ntriangleleft;"sv, 8938},
{"&ntrianglelefteq;"sv, 8940},
{"&ntriangleright;"sv, 8939},
{"&ntrianglerighteq;"sv, 8941},
{"&nu;"sv, 957},
{"&num;"sv, 35},
{"&numero;"sv, 8470},
{"&numsp;"sv, 8199},
{"&nvDash;"sv, 8877},
{"&nvHarr;"sv, 10500},
{"&nvap;"sv, 8781, 8402},
{"&nvdash;"sv, 8876},
{"&nvge;"sv, 8805, 8402},
{"&nvgt;"sv, 62, 8402},
{"&nvinfin;"sv, 10718},
{"&nvlArr;"sv, 10498},
{"&nvle;"sv, 8804, 8402},
{"&nvlt;"sv, 60, 8402},
{"&nvltrie;"sv, 8884, 8402},
{"&nvrArr;"sv, 10499},
{"&nvrtrie;"sv, 8885, 8402},
{"&nvsim;"sv, 8764, 8402},
{"&nwArr;"sv, 8662},
{"&nwarhk;"sv, 10531},
{"&nwarr;"sv, 8598},
{"&nwarrow;"sv, 8598},
{"&nwnear;"sv, 10535},
{"&oS;"sv, 9416},
{"&oacute"sv, 243},
{"&oacute;"sv, 243},
{"&oast;"sv, 8859},
{"&ocir;"sv, 8858},
{"&ocirc"sv, 244},
{"&ocirc;"sv, 244},
{"&ocy;"sv, 1086},
{"&odash;"sv, 8861},
{"&odblac;"sv, 337},
{"&odiv;"sv, 10808},
{"&odot;"sv, 8857},
{"&odsold;"sv, 10684},
{"&oelig;"sv, 339},
{"&ofcir;"sv, 10687},
{"&ofr;"sv, 120108},
{"&ogon;"sv, 731},
{"&ograve"sv, 242},
{"&ograve;"sv, 242},
{"&ogt;"sv, 10689},
{"&ohbar;"sv, 10677},
{"&ohm;"sv, 937},
{"&oint;"sv, 8750},
{"&olarr;"sv, 8634},
{"&olcir;"sv, 10686},
{"&olcross;"sv, 10683},
{"&oline;"sv, 8254},
{"&olt;"sv, 10688},
{"&omacr;"sv, 333},
{"&omega;"sv, 969},
{"&omicron;"sv, 959},
{"&omid;"sv, 10678},
{"&ominus;"sv, 8854},
{"&oopf;"sv, 120160},
{"&opar;"sv, 10679},
{"&operp;"sv, 10681},
{"&oplus;"sv, 8853},
{"&or;"sv, 8744},
{"&orarr;"sv, 8635},
{"&ord;"sv, 10845},
{"&order;"sv, 8500},
{"&orderof;"sv, 8500},
{"&ordf"sv, 170},
{"&ordf;"sv, 170},
{"&ordm"sv, 186},
{"&ordm;"sv, 186},
{"&origof;"sv, 8886},
{"&oror;"sv, 10838},
{"&orslope;"sv, 10839},
{"&orv;"sv, 10843},
{"&oscr;"sv, 8500},
{"&oslash"sv, 248},
{"&oslash;"sv, 248},
{"&osol;"sv, 8856},
{"&otilde"sv, 245},
{"&otilde;"sv, 245},
{"&otimes;"sv, 8855},
{"&otimesas;"sv, 10806},
{"&ouml"sv, 246},
{"&ouml;"sv, 246},
{"&ovbar;"sv, 9021},
{"&par;"sv, 8741},
{"&para"sv, 182},
{"&para;"sv, 182},
{"&parallel;"sv, 8741},
{"&parsim;"sv, 10995},
{"&parsl;"sv, 11005},
{"&part;"sv, 8706},
{"&pcy;"sv, 1087},
{"&percnt;"sv, 37},
{"&period;"sv, 46},
{"&permil;"sv, 8240},
{"&perp;"sv, 8869},
{"&pertenk;"sv, 8241},
{"&pfr;"sv, 120109},
{"&phi;"sv, 966},
{"&phiv;"sv, 981},
{"&phmmat;"sv, 8499},
{"&phone;"sv, 9742},
{"&pi;"sv, 960},
{"&pitchfork;"sv, 8916},
{"&piv;"sv, 982},
{"&planck;"sv, 8463},
{"&planckh;"sv, 8462},
{"&plankv;"sv, 8463},
{"&plus;"sv, 43},
{"&plusacir;"sv, 10787},
{"&plusb;"sv, 8862},
{"&pluscir;"sv, 10786},
{"&plusdo;"sv, 8724},
{"&plusdu;"sv, 10789},
{"&pluse;"sv, 10866},
{"&plusmn"sv, 177},
{"&plusmn;"sv, 177},
{"&plussim;"sv, 10790},
{"&plustwo;"sv, 10791},
{"&pm;"sv, 177},
{"&pointint;"sv, 10773},
{"&popf;"sv, 120161},
{"&pound"sv, 163},
{"&pound;"sv, 163},
{"&pr;"sv, 8826},
{"&prE;"sv, 10931},
{"&prap;"sv, 10935},
{"&prcue;"sv, 8828},
{"&pre;"sv, 10927},
{"&prec;"sv, 8826},
{"&precapprox;"sv, 10935},
{"&preccurlyeq;"sv, 8828},
{"&preceq;"sv, 10927},
{"&precnapprox;"sv, 10937},
{"&precneqq;"sv, 10933},
{"&precnsim;"sv, 8936},
{"&precsim;"sv, 8830},
{"&prime;"sv, 8242},
{"&primes;"sv, 8473},
{"&prnE;"sv, 10933},
{"&prnap;"sv, 10937},
{"&prnsim;"sv, 8936},
{"&prod;"sv, 8719},
{"&profalar;"sv, 9006},
{"&profline;"sv, 8978},
{"&profsurf;"sv, 8979},
{"&prop;"sv, 8733},
{"&propto;"sv, 8733},
{"&prsim;"sv, 8830},
{"&prurel;"sv, 8880},
{"&pscr;"sv, 120005},
{"&psi;"sv, 968},
{"&puncsp;"sv, 8200},
{"&qfr;"sv, 120110},
{"&qint;"sv, 10764},
{"&qopf;"sv, 120162},
{"&qprime;"sv, 8279},
{"&qscr;"sv, 120006},
{"&quaternions;"sv, 8461},
{"&quatint;"sv, 10774},
{"&quest;"sv, 63},
{"&questeq;"sv, 8799},
{"&quot"sv, 34},
{"&quot;"sv, 34},
{"&rAarr;"sv, 8667},
{"&rArr;"sv, 8658},
{"&rAtail;"sv, 10524},
{"&rBarr;"sv, 10511},
{"&rHar;"sv, 10596},
{"&race;"sv, 8765, 817},
{"&racute;"sv, 341},
{"&radic;"sv, 8730},
{"&raemptyv;"sv, 10675},
{"&rang;"sv, 10217},
{"&rangd;"sv, 10642},
{"&range;"sv, 10661},
{"&rangle;"sv, 10217},
{"&raquo"sv, 187},
{"&raquo;"sv, 187},
{"&rarr;"sv, 8594},
{"&rarrap;"sv, 10613},
{"&rarrb;"sv, 8677},
{"&rarrbfs;"sv, 10528},
{"&rarrc;"sv, 10547},
{"&rarrfs;"sv, 10526},
{"&rarrhk;"sv, 8618},
{"&rarrlp;"sv, 8620},
{"&rarrpl;"sv, 10565},
{"&rarrsim;"sv, 10612},
{"&rarrtl;"sv, 8611},
{"&rarrw;"sv, 8605},
{"&ratail;"sv, 10522},
{"&ratio;"sv, 8758},
{"&rationals;"sv, 8474},
{"&rbarr;"sv, 10509},
{"&rbbrk;"sv, 10099},
{"&rbrace;"sv, 125},
{"&rbrack;"sv, 93},
{"&rbrke;"sv, 10636},
{"&rbrksld;"sv, 10638},
{"&rbrkslu;"sv, 10640},
{"&rcaron;"sv, 345},
{"&rcedil;"sv, 343},
{"&rceil;"sv, 8969},
{"&rcub;"sv, 125},
{"&rcy;"sv, 1088},
{"&rdca;"sv, 10551},
{"&rdldhar;"sv, 10601},
{"&rdquo;"sv, 8221},
{"&rdquor;"sv, 8221},
{"&rdsh;"sv, 8627},
{"&real;"sv, 8476},
{"&realine;"sv, 8475},
{"&realpart;"sv, 8476},
{"&reals;"sv, 8477},
{"&rect;"sv, 9645},
{"&reg"sv, 174},
{"&reg;"sv, 174},
{"&rfisht;"sv, 10621},
{"&rfloor;"sv, 8971},
{"&rfr;"sv, 120111},
{"&rhard;"sv, 8641},
{"&rharu;"sv, 8640},
{"&rharul;"sv, 10604},
{"&rho;"sv, 961},
{"&rhov;"sv, 1009},
{"&rightarrow;"sv, 8594},
{"&rightarrowtail;"sv, 8611},
{"&rightharpoondown;"sv, 8641},
{"&rightharpoonup;"sv, 8640},
{"&rightleftarrows;"sv, 8644},
{"&rightleftharpoons;"sv, 8652},
{"&rightrightarrows;"sv, 8649},
{"&rightsquigarrow;"sv, 8605},
{"&rightthreetimes;"sv, 8908},
{"&ring;"sv, 730},
{"&risingdotseq;"sv, 8787},
{"&rlarr;"sv, 8644},
{"&rlhar;"sv, 8652},
{"&rlm;"sv, 8207},
{"&rmoust;"sv, 9137},
{"&rmoustache;"sv, 9137},
{"&rnmid;"sv, 10990},
{"&roang;"sv, 10221},
{"&roarr;"sv, 8702},
{"&robrk;"sv, 10215},
{"&ropar;"sv, 10630},
{"&ropf;"sv, 120163},
{"&roplus;"sv, 10798},
{"&rotimes;"sv, 10805},
{"&rpar;"sv, 41},
{"&rpargt;"sv, 10644},
{"&rppolint;"sv, 10770},
{"&rrarr;"sv, 8649},
{"&rsaquo;"sv, 8250},
{"&rscr;"sv, 120007},
{"&rsh;"sv, 8625},
{"&rsqb;"sv, 93},
{"&rsquo;"sv, 8217},
{"&rsquor;"sv, 8217},
{"&rthree;"sv, 8908},
{"&rtimes;"sv, 8906},
{"&rtri;"sv, 9657},
{"&rtrie;"sv, 8885},
{"&rtrif;"sv, 9656},
{"&rtriltri;"sv, 10702},
{"&ruluhar;"sv, 10600},
{"&rx;"sv, 8478},
{"&sacute;"sv, 347},
{"&sbquo;"sv, 8218},
{"&sc;"sv, 8827},
{"&scE;"sv, 10932},
{"&scap;"sv, 10936},
{"&scaron;"sv, 353},
{"&sccue;"sv, 8829},
{"&sce;"sv, 10928},
{"&scedil;"sv, 351},
{"&scirc;"sv, 349},
{"&scnE;"sv, 10934},
{"&scnap;"sv, 10938},
{"&scnsim;"sv, 8937},
{"&scpolint;"sv, 10771},
{"&scsim;"sv, 8831},
{"&scy;"sv, 1089},
{"&sdot;"sv, 8901},
{"&sdotb;"sv, 8865},
{"&sdote;"sv, 10854},
{"&seArr;"sv, 8664},
{"&searhk;"sv, 10533},
{"&searr;"sv, 8600},
{"&searrow;"sv, 8600},
{"&sect"sv, 167},
{"&sect;"sv, 167},
{"&semi;"sv, 59},
{"&seswar;"sv, 10537},
{"&setminus;"sv, 8726},
{"&setmn;"sv, 8726},
{"&sext;"sv, 10038},
{"&sfr;"sv, 120112},
{"&sfrown;"sv, 8994},
{"&sharp;"sv, 9839},
{"&shchcy;"sv, 1097},
{"&shcy;"sv, 1096},
{"&shortmid;"sv, 8739},
{"&shortparallel;"sv, 8741},
{"&shy"sv, 173},
{"&shy;"sv, 173},
{"&sigma;"sv, 963},
{"&sigmaf;"sv, 962},
{"&sigmav;"sv, 962},
{"&sim;"sv, 8764},
{"&simdot;"sv, 10858},
{"&sime;"sv, 8771},
{"&simeq;"sv, 8771},
{"&simg;"sv, 10910},
{"&simgE;"sv, 10912},
{"&siml;"sv, 10909},
{"&simlE;"sv, 10911},
{"&simne;"sv, 8774},
{"&simplus;"sv, 10788},
{"&simrarr;"sv, 10610},
{"&slarr;"sv, 8592},
{"&smallsetminus;"sv, 8726},
{"&smashp;"sv, 10803},
{"&smeparsl;"sv, 10724},
{"&smid;"sv, 8739},
{"&smile;"sv, 8995},
{"&smt;"sv, 10922},
{"&smte;"sv, 10924},
{"&smtes;"sv, 10924, 65024},
{"&softcy;"sv, 1100},
{"&sol;"sv, 47},
{"&solb;"sv, 10692},
{"&solbar;"sv, 9023},
{"&sopf;"sv, 120164},
{"&spades;"sv, 9824},
{"&spadesuit;"sv, 9824},
{"&spar;"sv, 8741},
{"&sqcap;"sv, 8851},
{"&sqcaps;"sv, 8851, 65024},
{"&sqcup;"sv, 8852},
{"&sqcups;"sv, 8852, 65024},
{"&sqsub;"sv, 8847},
{"&sqsube;"sv, 8849},
{"&sqsubset;"sv, 8847},
{"&sqsubseteq;"sv, 8849},
{"&sqsup;"sv, 8848},
{"&sqsupe;"sv, 8850},
{"&sqsupset;"sv, 8848},
{"&sqsupseteq;"sv, 8850},
{"&squ;"sv, 9633},
{"&square;"sv, 9633},
{"&squarf;"sv, 9642},
{"&squf;"sv, 9642},
{"&srarr;"sv, 8594},
{"&sscr;"sv, 120008},
{"&ssetmn;"sv, 8726},
{"&ssmile;"sv, 8995},
{"&sstarf;"sv, 8902},
{"&star;"sv, 9734},
{"&starf;"sv, 9733},
{"&straightepsilon;"sv, 1013},
{"&straightphi;"sv, 981},
{"&strns;"sv, 175},
{"&sub;"sv, 8834},
{"&subE;"sv, 10949},
{"&subdot;"sv, 10941},
{"&sube;"sv, 8838},
{"&subedot;"sv, 10947},
{"&submult;"sv, 10945},
{"&subnE;"sv, 10955},
{"&subne;"sv, 8842},
{"&subplus;"sv, 10943},
{"&subrarr;"sv, 10617},
{"&subset;"sv, 8834},
{"&subseteq;"sv, 8838},
{"&subseteqq;"sv, 10949},
{"&subsetneq;"sv, 8842},
{"&subsetneqq;"sv, 10955},
{"&subsim;"sv, 10951},
{"&subsub;"sv, 10965},
{"&subsup;"sv, 10963},
{"&succ;"sv, 8827},
{"&succapprox;"sv, 10936},
{"&succcurlyeq;"sv, 8829},
{"&succeq;"sv, 10928},
{"&succnapprox;"sv, 10938},
{"&succneqq;"sv, 10934},
{"&succnsim;"sv, 8937},
{"&succsim;"sv, 8831},
{"&sum;"sv, 8721},
{"&sung;"sv, 9834},
{"&sup1"sv, 185},
{"&sup1;"sv, 185},
{"&sup2"sv, 178},
{"&sup2;"sv, 178},
{"&sup3"sv, 179},
{"&sup3;"sv, 179},
{"&sup;"sv, 8835},
{"&supE;"sv, 10950},
{"&supdot;"sv, 10942},
{"&supdsub;"sv, 10968},
{"&supe;"sv, 8839},
{"&supedot;"sv, 10948},
{"&suphsol;"sv, 10185},
{"&suphsub;"sv, 10967},
{"&suplarr;"sv, 10619},
{"&supmult;"sv, 10946},
{"&supnE;"sv, 10956},
{"&supne;"sv, 8843},
{"&supplus;"sv, 10944},
{"&supset;"sv, 8835},
{"&supseteq;"sv, 8839},
{"&supseteqq;"sv, 10950},
{"&supsetneq;"sv, 8843},
{"&supsetneqq;"sv, 10956},
{"&supsim;"sv, 10952},
{"&supsub;"sv, 10964},
{"&supsup;"sv, 10966},
{"&swArr;"sv, 8665},
{"&swarhk;"sv, 10534},
{"&swarr;"sv, 8601},
{"&swarrow;"sv, 8601},
{"&swnwar;"sv, 10538},
{"&szlig"sv, 223},
{"&szlig;"sv, 223},
{"&target;"sv, 8982},
{"&tau;"sv, 964},
{"&tbrk;"sv, 9140},
{"&tcaron;"sv, 357},
{"&tcedil;"sv, 355},
{"&tcy;"sv, 1090},
{"&tdot;"sv, 8411},
{"&telrec;"sv, 8981},
{"&tfr;"sv, 120113},
{"&there4;"sv, 8756},
{"&therefore;"sv, 8756},
{"&theta;"sv, 952},
{"&thetasym;"sv, 977},
{"&thetav;"sv, 977},
{"&thickapprox;"sv, 8776},
{"&thicksim;"sv, 8764},
{"&thinsp;"sv, 8201},
{"&thkap;"sv, 8776},
{"&thksim;"sv, 8764},
{"&thorn"sv, 254},
{"&thorn;"sv, 254},
{"&tilde;"sv, 732},
{"&times"sv, 215},
{"&times;"sv, 215},
{"&timesb;"sv, 8864},
{"&timesbar;"sv, 10801},
{"&timesd;"sv, 10800},
{"&tint;"sv, 8749},
{"&toea;"sv, 10536},
{"&top;"sv, 8868},
{"&topbot;"sv, 9014},
{"&topcir;"sv, 10993},
{"&topf;"sv, 120165},
{"&topfork;"sv, 10970},
{"&tosa;"sv, 10537},
{"&tprime;"sv, 8244},
{"&trade;"sv, 8482},
{"&triangle;"sv, 9653},
{"&triangledown;"sv, 9663},
{"&triangleleft;"sv, 9667},
{"&trianglelefteq;"sv, 8884},
{"&triangleq;"sv, 8796},
{"&triangleright;"sv, 9657},
{"&trianglerighteq;"sv, 8885},
{"&tridot;"sv, 9708},
{"&trie;"sv, 8796},
{"&triminus;"sv, 10810},
{"&triplus;"sv, 10809},
{"&trisb;"sv, 10701},
{"&tritime;"sv, 10811},
{"&trpezium;"sv, 9186},
{"&tscr;"sv, 120009},
{"&tscy;"sv, 1094},
{"&tshcy;"sv, 1115},
{"&tstrok;"sv, 359},
{"&twixt;"sv, 8812},
{"&twoheadleftarrow;"sv, 8606},
{"&twoheadrightarrow;"sv, 8608},
{"&uArr;"sv, 8657},
{"&uHar;"sv, 10595},
{"&uacute"sv, 250},
{"&uacute;"sv, 250},
{"&uarr;"sv, 8593},
{"&ubrcy;"sv, 1118},
{"&ubreve;"sv, 365},
{"&ucirc"sv, 251},
{"&ucirc;"sv, 251},
{"&ucy;"sv, 1091},
{"&udarr;"sv, 8645},
{"&udblac;"sv, 369},
{"&udhar;"sv, 10606},
{"&ufisht;"sv, 10622},
{"&ufr;"sv, 120114},
{"&ugrave"sv, 249},
{"&ugrave;"sv, 249},
{"&uharl;"sv, 8639},
{"&uharr;"sv, 8638},
{"&uhblk;"sv, 9600},
{"&ulcorn;"sv, 8988},
{"&ulcorner;"sv, 8988},
{"&ulcrop;"sv, 8975},
{"&ultri;"sv, 9720},
{"&umacr;"sv, 363},
{"&uml"sv, 168},
{"&uml;"sv, 168},
{"&uogon;"sv, 371},
{"&uopf;"sv, 120166},
{"&uparrow;"sv, 8593},
{"&updownarrow;"sv, 8597},
{"&upharpoonleft;"sv, 8639},
{"&upharpoonright;"sv, 8638},
{"&uplus;"sv, 8846},
{"&upsi;"sv, 965},
{"&upsih;"sv, 978},
{"&upsilon;"sv, 965},
{"&upuparrows;"sv, 8648},
{"&urcorn;"sv, 8989},
{"&urcorner;"sv, 8989},
{"&urcrop;"sv, 8974},
{"&uring;"sv, 367},
{"&urtri;"sv, 9721},
{"&uscr;"sv, 120010},
{"&utdot;"sv, 8944},
{"&utilde;"sv, 361},
{"&utri;"sv, 9653},
{"&utrif;"sv, 9652},
{"&uuarr;"sv, 8648},
{"&uuml"sv, 252},
{"&uuml;"sv, 252},
{"&uwangle;"sv, 10663},
{"&vArr;"sv, 8661},
{"&vBar;"sv, 10984},
{"&vBarv;"sv, 10985},
{"&vDash;"sv, 8872},
{"&vangrt;"sv, 10652},
{"&varepsilon;"sv, 1013},
{"&varkappa;"sv, 1008},
{"&varnothing;"sv, 8709},
{"&varphi;"sv, 981},
{"&varpi;"sv, 982},
{"&varpropto;"sv, 8733},
{"&varr;"sv, 8597},
{"&varrho;"sv, 1009},
{"&varsigma;"sv, 962},
{"&varsubsetneq;"sv, 8842, 65024},
{"&varsubsetneqq;"sv, 10955, 65024},
{"&varsupsetneq;"sv, 8843, 65024},
{"&varsupsetneqq;"sv, 10956, 65024},
{"&vartheta;"sv, 977},
{"&vartriangleleft;"sv, 8882},
{"&vartriangleright;"sv, 8883},
{"&vcy;"sv, 1074},
{"&vdash;"sv, 8866},
{"&vee;"sv, 8744},
{"&veebar;"sv, 8891},
{"&veeeq;"sv, 8794},
{"&vellip;"sv, 8942},
{"&verbar;"sv, 124},
{"&vert;"sv, 124},
{"&vfr;"sv, 120115},
{"&vltri;"sv, 8882},
{"&vnsub;"sv, 8834, 8402},
{"&vnsup;"sv, 8835, 8402},
{"&vopf;"sv, 120167},
{"&vprop;"sv, 8733},
{"&vrtri;"sv, 8883},
{"&vscr;"sv, 120011},
{"&vsubnE;"sv, 10955, 65024},
{"&vsubne;"sv, 8842, 65024},
{"&vsupnE;"sv, 10956, 65024},
{"&vsupne;"sv, 8843, 65024},
{"&vzigzag;"sv, 10650},
{"&wcirc;"sv, 373},
{"&wedbar;"sv, 10847},
{"&wedge;"sv, 8743},
{"&wedgeq;"sv, 8793},
{"&weierp;"sv, 8472},
{"&wfr;"sv, 120116},
{"&wopf;"sv, 120168},
{"&wp;"sv, 8472},
{"&wr;"sv, 8768},
{"&wreath;"sv, 8768},
{"&wscr;"sv, 120012},
{"&xcap;"sv, 8898},
{"&xcirc;"sv, 9711},
{"&xcup;"sv, 8899},
{"&xdtri;"sv, 9661},
{"&xfr;"sv, 120117},
{"&xhArr;"sv, 10234},
{"&xharr;"sv, 10231},
{"&xi;"sv, 958},
{"&xlArr;"sv, 10232},
{"&xlarr;"sv, 10229},
{"&xmap;"sv, 10236},
{"&xnis;"sv, 8955},
{"&xodot;"sv, 10752},
{"&xopf;"sv, 120169},
{"&xoplus;"sv, 10753},
{"&xotime;"sv, 10754},
{"&xrArr;"sv, 10233},
{"&xrarr;"sv, 10230},
{"&xscr;"sv, 120013},
{"&xsqcup;"sv, 10758},
{"&xuplus;"sv, 10756},
{"&xutri;"sv, 9651},
{"&xvee;"sv, 8897},
{"&xwedge;"sv, 8896},
{"&yacute"sv, 253},
{"&yacute;"sv, 253},
{"&yacy;"sv, 1103},
{"&ycirc;"sv, 375},
{"&ycy;"sv, 1099},
{"&yen"sv, 165},
{"&yen;"sv, 165},
{"&yfr;"sv, 120118},
{"&yicy;"sv, 1111},
{"&yopf;"sv, 120170},
{"&yscr;"sv, 120014},
{"&yucy;"sv, 1102},
{"&yuml"sv, 255},
{"&yuml;"sv, 255},
{"&zacute;"sv, 378},
{"&zcaron;"sv, 382},
{"&zcy;"sv, 1079},
{"&zdot;"sv, 380},
{"&zeetrf;"sv, 8488},
{"&zeta;"sv, 950},
{"&zfr;"sv, 120119},
{"&zhcy;"sv, 1078},
{"&zigrarr;"sv, 8669},
{"&zopf;"sv, 120171},
{"&zscr;"sv, 120015},
{"&zwj;"sv, 8205},
{"&zwnj;"sv, 8204}});
 
} // namespace
 
std::optional<CharacterReference> find_named_character_reference_for(std::string_view buffer) {
std::optional<CharacterReference> maybe_reference{std::nullopt};
 
for (auto const &reference : references) {
if (buffer.starts_with(reference.name)
&& (!maybe_reference || reference.name.size() > maybe_reference->name.size())) {
maybe_reference = reference;
}
}
 
return maybe_reference;
}
 
} // namespace html2
 
filename was Deleted added: 2522, removed: 16, total 2506
@@ -0,0 +1,24 @@
// SPDX-FileCopyrightText: 2022 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
#ifndef HTML2_CHARACTER_REFERENCE_H_
#define HTML2_CHARACTER_REFERENCE_H_
 
#include <cstdint>
#include <optional>
#include <string_view>
 
namespace html2 {
 
struct CharacterReference {
std::string_view name{};
std::uint32_t first_codepoint{};
std::optional<std::uint32_t> second_codepoint{};
};
 
std::optional<CharacterReference> find_named_character_reference_for(std::string_view);
 
} // namespace html2
 
#endif
 
filename was Deleted added: 2522, removed: 16, total 2506
@@ -0,0 +1,51 @@
// SPDX-FileCopyrightText: 2022 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
#include "html2/character_reference.h"
 
#include "etest/etest.h"
 
using namespace std::literals;
 
using etest::expect;
using etest::require;
 
using namespace html2;
 
int main() {
etest::test("no entity found", [] {
auto ref = find_named_character_reference_for("A"sv);
expect(!ref.has_value());
});
 
etest::test("single-codepoint entity", [] {
auto ref = find_named_character_reference_for("&lt"sv);
require(ref.has_value());
expect(ref->name == "&lt"sv);
expect(ref->first_codepoint == '<');
expect(!ref->second_codepoint.has_value());
});
 
etest::test("double-codepoint entity", [] {
auto ref = find_named_character_reference_for("&NotSucceedsEqual;"sv);
require(ref.has_value());
expect(ref->name == "&NotSucceedsEqual;"sv);
expect(ref->first_codepoint == 0x02AB0u);
expect(ref->second_codepoint == 0x00338u);
});
 
etest::test("longest prefix is chosen", [] {
auto ref = find_named_character_reference_for("&lt;"sv);
require(ref.has_value());
expect(ref->name == "&lt;"sv); // And not &lt which also matches.
});
 
etest::test("extra characters are ignored", [] {
auto ref = find_named_character_reference_for("&lt;&lt;&abc;123"sv);
require(ref.has_value());
expect(ref->name == "&lt;"sv);
});
 
return etest::run_all_tests();
}
 
html2/tokenizer.cpp added: 2522, removed: 16, total 2506
@@ -1,15 +1,18 @@
// SPDX-FileCopyrightText: 2021 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2021-2022 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
#include "html2/tokenizer.h"
 
#include "html2/character_reference.h"
#include "util/string.h"
 
#include <spdlog/spdlog.h>
 
#include <cstdint>
#include <cstring>
#include <exception>
#include <limits>
#include <sstream>
 
using namespace std::literals;
@@ -29,6 +32,14 @@ constexpr bool is_ascii_alpha(char c) {
return is_ascii_upper_alpha(c) || is_ascii_lower_alpha(c);
}
 
constexpr bool is_numeric(char c) {
return c >= '0' && c <= '9';
}
 
constexpr bool is_ascii_alphanumeric(char c) {
return is_numeric(c) || is_ascii_alpha(c);
}
 
constexpr char to_lower(char c) {
return c + 0x20;
}
@@ -760,6 +771,74 @@ void Tokenizer::run() {
}
}
 
case State::CharacterReference: {
temporary_buffer_ = "&"s;
 
auto c = consume_next_input_character();
if (!c) {
flush_code_points_consumed_as_a_character_reference();
reconsume_in(return_state_);
continue;
}
 
if (is_ascii_alphanumeric(*c)) {
reconsume_in(State::NamedCharacterReference);
continue;
}
 
switch (*c) {
case '#':
temporary_buffer_.append(1, *c);
state_ = State::NumericCharacterReference;
continue;
default:
flush_code_points_consumed_as_a_character_reference();
reconsume_in(return_state_);
continue;
}
}
 
case State::NamedCharacterReference: {
// TODO(robinlinden): -1 here isn't great, but it works right now.
auto maybe_reference = find_named_character_reference_for(input_.substr(pos_ - 1));
if (!maybe_reference) {
flush_code_points_consumed_as_a_character_reference();
state_ = State::AmbiguousAmpersand;
continue;
}
 
// -1 because of the TODO above.
pos_ += maybe_reference->name.size() - 1;
// Should be appending, but again, the TODO.
temporary_buffer_ = maybe_reference->name;
 
auto c = peek_next_input_character();
if (c.has_value() && consumed_as_part_of_an_attribute() && temporary_buffer_.back() != ';'
&& (c == '=' || is_ascii_alphanumeric(*c))) {
flush_code_points_consumed_as_a_character_reference();
state_ = return_state_;
continue;
}
 
if (temporary_buffer_.back() != ';') {
// This is a missing-semicolon-after-character-reference parse error.
}
 
temporary_buffer_.clear();
if (maybe_reference->first_codepoint > static_cast<std::uint32_t>(std::numeric_limits<char>::max())) {
std::terminate();
}
 
if (maybe_reference->second_codepoint) {
std::terminate();
}
 
temporary_buffer_.append(1, static_cast<char>(maybe_reference->first_codepoint));
flush_code_points_consumed_as_a_character_reference();
state_ = return_state_;
continue;
}
 
default:
std::terminate();
}
@@ -779,6 +858,14 @@ std::optional<char> Tokenizer::consume_next_input_character() {
return input_[pos_++];
}
 
std::optional<char> Tokenizer::peek_next_input_character() const {
if (is_eof()) {
return std::nullopt;
}
 
return input_[pos_];
}
 
bool Tokenizer::is_eof() const {
return pos_ >= input_.size();
}
@@ -804,4 +891,24 @@ void Tokenizer::reconsume_in(State state) {
state_ = state;
}
 
bool Tokenizer::consumed_as_part_of_an_attribute() const {
return return_state_ == State::AttributeValueDoubleQuoted || return_state_ == State::AttributeValueSingleQuoted
|| return_state_ == State::AttributeValueUnquoted;
}
 
void Tokenizer::flush_code_points_consumed_as_a_character_reference() {
if (consumed_as_part_of_an_attribute()) {
current_attribute().value += temporary_buffer_;
return;
}
 
emit_temporary_buffer_as_character_tokens();
}
 
void Tokenizer::emit_temporary_buffer_as_character_tokens() {
for (char c : temporary_buffer_) {
emit(CharacterToken{c});
}
}
 
} // namespace html2
 
html2/tokenizer.h added: 2522, removed: 16, total 2506
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2021 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2021-2022 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
@@ -158,15 +158,22 @@ private:
State return_state_{};
Token current_token_{};
 
std::string temporary_buffer_{};
 
std::function<void(Token &&)> on_emit_{};
 
void emit(Token &&) const;
std::optional<char> consume_next_input_character();
std::optional<char> peek_next_input_character() const;
bool is_eof() const;
 
void start_attribute_in_current_tag_token(Attribute);
Attribute &current_attribute();
void reconsume_in(State);
 
bool consumed_as_part_of_an_attribute() const;
void flush_code_points_consumed_as_a_character_reference();
void emit_temporary_buffer_as_character_tokens();
};
 
} // namespace html2
 
html2/tokenizer_test.cpp added: 2522, removed: 16, total 2506
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2021 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2021-2022 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause
 
@@ -139,5 +139,20 @@ int main() {
expect_eq(tokens, std::vector<Token>{CommentToken{.data = "abc"}, EndOfFileToken{}});
});
 
etest::test("character entity reference, simple", [] {
auto tokens = run_tokenizer("&lt;");
expect_eq(tokens, std::vector<Token>{CharacterToken{'<'}, EndOfFileToken{}});
});
 
etest::test("character entity reference, only &", [] {
auto tokens = run_tokenizer("&");
expect_eq(tokens, std::vector<Token>{CharacterToken{'&'}, EndOfFileToken{}});
});
 
etest::test("character entity reference, not ascii alphanumeric", [] {
auto tokens = run_tokenizer("&@");
expect_eq(tokens, std::vector<Token>{CharacterToken{'&'}, CharacterToken{'@'}, EndOfFileToken{}});
});
 
return etest::run_all_tests();
}