diff options
Diffstat (limited to '')
-rw-r--r-- | stdlib/source/lux/data/text/unicode.lux | 454 | ||||
-rw-r--r-- | stdlib/source/lux/data/text/unicode/segment.lux | 204 |
2 files changed, 347 insertions, 311 deletions
diff --git a/stdlib/source/lux/data/text/unicode.lux b/stdlib/source/lux/data/text/unicode.lux index 00c67f2c1..2aad089b9 100644 --- a/stdlib/source/lux/data/text/unicode.lux +++ b/stdlib/source/lux/data/text/unicode.lux @@ -1,196 +1,21 @@ (.module: [lux #* [abstract - [monoid (#+ Monoid)] - ["." interval (#+ Interval)]] + [equivalence (#+ Equivalence)]] [data - [number (#+ hex) - ["n" nat ("#\." interval)]] [collection ["." list ("#\." fold functor)] + ["." set ("#\." equivalence)] ["." tree #_ ["#" finger (#+ Tree)]]]] [type (#+ :by-example) abstract]] - [// (#+ Char)]) - -(abstract: #export Segment - (Interval Char) - - (structure: monoid - (Monoid Segment) - - (def: identity - (:abstraction (interval.between n.enum n\top n\bottom))) - (def: (compose left right) - (let [left (:representation left) - right (:representation right)] - (:abstraction - (interval.between n.enum - (n.min (\ left bottom) - (\ right bottom)) - (n.max (\ left top) - (\ right top))))))) - - (def: #export (segment start end) - (-> Char Char Segment) - (:abstraction (interval.between n.enum (n.min start end) (n.max start end)))) - - (template [<name> <slot>] - [(def: #export <name> - (-> Segment Char) - (|>> :representation (get@ <slot>)))] - - [start #interval.bottom] - [end #interval.top] - ) - - (def: #export (size segment) - (-> Segment Nat) - (let [start (get@ #interval.bottom (:representation segment)) - end (get@ #interval.top (:representation segment))] - (|> end (n.- start) inc))) - - (def: #export (within? segment char) - (All [a] (-> Segment Char Bit)) - (interval.within? (:representation segment) char)) - ) - -(template [<name> <start> <end>] - [(def: #export <name> Segment (..segment (hex <start>) (hex <end>)))] - - ## Normal segments - [basic-latin "0000" "007F"] - [latin-1-supplement "00A0" "00FF"] - [latin-extended-a "0100" "017F"] - [latin-extended-b "0180" "024F"] - [ipa-extensions "0250" "02AF"] - [spacing-modifier-letters "02B0" "02FF"] - [combining-diacritical-marks "0300" "036F"] - [greek-and-coptic "0370" "03FF"] - [cyrillic "0400" "04FF"] - [cyrillic-supplementary "0500" "052F"] - [armenian "0530" "058F"] - [hebrew "0590" "05FF"] - [arabic "0600" "06FF"] - [syriac "0700" "074F"] - [thaana "0780" "07BF"] - [devanagari "0900" "097F"] - [bengali "0980" "09FF"] - [gurmukhi "0A00" "0A7F"] - [gujarati "0A80" "0AFF"] - [oriya "0B00" "0B7F"] - [tamil "0B80" "0BFF"] - [telugu "0C00" "0C7F"] - [kannada "0C80" "0CFF"] - [malayalam "0D00" "0D7F"] - [sinhala "0D80" "0DFF"] - [thai "0E00" "0E7F"] - [lao "0E80" "0EFF"] - [tibetan "0F00" "0FFF"] - [myanmar "1000" "109F"] - [georgian "10A0" "10FF"] - [hangul-jamo "1100" "11FF"] - [ethiopic "1200" "137F"] - [cherokee "13A0" "13FF"] - [unified-canadian-aboriginal-syllabics "1400" "167F"] - [ogham "1680" "169F"] - [runic "16A0" "16FF"] - [tagalog "1700" "171F"] - [hanunoo "1720" "173F"] - [buhid "1740" "175F"] - [tagbanwa "1760" "177F"] - [khmer "1780" "17FF"] - [mongolian "1800" "18AF"] - [limbu "1900" "194F"] - [tai-le "1950" "197F"] - [khmer-symbols "19E0" "19FF"] - [phonetic-extensions "1D00" "1D7F"] - [latin-extended-additional "1E00" "1EFF"] - [greek-extended "1F00" "1FFF"] - [general-punctuation "2000" "206F"] - [superscripts-and-subscripts "2070" "209F"] - [currency-symbols "20A0" "20CF"] - [combining-diacritical-marks-for-symbols "20D0" "20FF"] - [letterlike-symbols "2100" "214F"] - [number-forms "2150" "218F"] - [arrows "2190" "21FF"] - [mathematical-operators "2200" "22FF"] - [miscellaneous-technical "2300" "23FF"] - [control-pictures "2400" "243F"] - [optical-character-recognition "2440" "245F"] - [enclosed-alphanumerics "2460" "24FF"] - [box-drawing "2500" "257F"] - [block-elements "2580" "259F"] - [geometric-shapes "25A0" "25FF"] - [miscellaneous-symbols "2600" "26FF"] - [dingbats "2700" "27BF"] - [miscellaneous-mathematical-symbols-a "27C0" "27EF"] - [supplemental-arrows-a "27F0" "27FF"] - [braille-patterns "2800" "28FF"] - [supplemental-arrows-b "2900" "297F"] - [miscellaneous-mathematical-symbols-b "2980" "29FF"] - [supplemental-mathematical-operators "2A00" "2AFF"] - [miscellaneous-symbols-and-arrows "2B00" "2BFF"] - [cjk-radicals-supplement "2E80" "2EFF"] - [kangxi-radicals "2F00" "2FDF"] - [ideographic-description-characters "2FF0" "2FFF"] - [cjk-symbols-and-punctuation "3000" "303F"] - [hiragana "3040" "309F"] - [katakana "30A0" "30FF"] - [bopomofo "3100" "312F"] - [hangul-compatibility-jamo "3130" "318F"] - [kanbun "3190" "319F"] - [bopomofo-extended "31A0" "31BF"] - [katakana-phonetic-extensions "31F0" "31FF"] - [enclosed-cjk-letters-and-months "3200" "32FF"] - [cjk-compatibility "3300" "33FF"] - [cjk-unified-ideographs-extension-a "3400" "4DBF"] - [yijing-hexagram-symbols "4DC0" "4DFF"] - [cjk-unified-ideographs "4E00" "9FFF"] - [yi-syllables "A000" "A48F"] - [yi-radicals "A490" "A4CF"] - [hangul-syllables "AC00" "D7AF"] - [high-surrogates "D800" "DB7F"] - [high-private-use-surrogates "DB80" "DBFF"] - [low-surrogates "DC00" "DFFF"] - [private-use-area "E000" "F8FF"] - [cjk-compatibility-ideographs "F900" "FAFF"] - [alphabetic-presentation-forms "FB00" "FB4F"] - [arabic-presentation-forms-a "FB50" "FDFF"] - [variation-selectors "FE00" "FE0F"] - [combining-half-marks "FE20" "FE2F"] - [cjk-compatibility-forms "FE30" "FE4F"] - [small-form-variants "FE50" "FE6F"] - [arabic-presentation-forms-b "FE70" "FEFF"] - [halfwidth-and-fullwidth-forms "FF00" "FFEF"] - [specials "FFF0" "FFFF"] - ## [linear-b-syllabary "10000" "1007F"] - ## [linear-b-ideograms "10080" "100FF"] - ## [aegean-numbers "10100" "1013F"] - ## [old-italic "10300" "1032F"] - ## [gothic "10330" "1034F"] - ## [ugaritic "10380" "1039F"] - ## [deseret "10400" "1044F"] - ## [shavian "10450" "1047F"] - ## [osmanya "10480" "104AF"] - ## [cypriot-syllabary "10800" "1083F"] - ## [byzantine-musical-symbols "1D000" "1D0FF"] - ## [musical-symbols "1D100" "1D1FF"] - ## [tai-xuan-jing-symbols "1D300" "1D35F"] - ## [mathematical-alphanumeric-symbols "1D400" "1D7FF"] - ## [cjk-unified-ideographs-extension-b "20000" "2A6DF"] - ## [cjk-compatibility-ideographs-supplement "2F800" "2FA1F"] - ## [tags "E0000" "E007F"] - - ## Specialized segments - [basic-latin/decimal "0030" "0039"] - [basic-latin/upper-alpha "0041" "005A"] - [basic-latin/lower-alpha "0061" "007A"] - ) + ["." / #_ + ["#." segment (#+ Segment)] + [// (#+ Char)]]) (def: builder - (tree.builder ..monoid)) + (tree.builder /segment.monoid)) (def: :@: (:by-example [@] @@ -218,131 +43,131 @@ (list\fold ..compose (..singleton head) (list\map ..singleton tail))) (def: half/0 - (..set [basic-latin - (list latin-1-supplement - latin-extended-a - latin-extended-b - ipa-extensions - spacing-modifier-letters - combining-diacritical-marks - greek-and-coptic - cyrillic - cyrillic-supplementary - armenian - hebrew - arabic - syriac - thaana - devanagari - bengali - gurmukhi - gujarati - oriya - tamil - telugu - kannada - malayalam - sinhala - thai - lao - tibetan - myanmar - georgian - hangul-jamo - ethiopic - cherokee - unified-canadian-aboriginal-syllabics - ogham - runic - tagalog - hanunoo - buhid - tagbanwa - khmer - mongolian - limbu - tai-le - khmer-symbols - phonetic-extensions - latin-extended-additional - greek-extended - general-punctuation - superscripts-and-subscripts - currency-symbols - combining-diacritical-marks-for-symbols - letterlike-symbols - number-forms - arrows - mathematical-operators - miscellaneous-technical - control-pictures - optical-character-recognition - enclosed-alphanumerics - box-drawing + (..set [/segment.basic-latin + (list /segment.latin-1-supplement + /segment.latin-extended-a + /segment.latin-extended-b + /segment.ipa-extensions + /segment.spacing-modifier-letters + /segment.combining-diacritical-marks + /segment.greek-and-coptic + /segment.cyrillic + /segment.cyrillic-supplementary + /segment.armenian + /segment.hebrew + /segment.arabic + /segment.syriac + /segment.thaana + /segment.devanagari + /segment.bengali + /segment.gurmukhi + /segment.gujarati + /segment.oriya + /segment.tamil + /segment.telugu + /segment.kannada + /segment.malayalam + /segment.sinhala + /segment.thai + /segment.lao + /segment.tibetan + /segment.myanmar + /segment.georgian + /segment.hangul-jamo + /segment.ethiopic + /segment.cherokee + /segment.unified-canadian-aboriginal-syllabics + /segment.ogham + /segment.runic + /segment.tagalog + /segment.hanunoo + /segment.buhid + /segment.tagbanwa + /segment.khmer + /segment.mongolian + /segment.limbu + /segment.tai-le + /segment.khmer-symbols + /segment.phonetic-extensions + /segment.latin-extended-additional + /segment.greek-extended + /segment.general-punctuation + /segment.superscripts-and-subscripts + /segment.currency-symbols + /segment.combining-diacritical-marks-for-symbols + /segment.letterlike-symbols + /segment.number-forms + /segment.arrows + /segment.mathematical-operators + /segment.miscellaneous-technical + /segment.control-pictures + /segment.optical-character-recognition + /segment.enclosed-alphanumerics + /segment.box-drawing )])) (def: half/1 - (..set [block-elements - (list geometric-shapes - miscellaneous-symbols - dingbats - miscellaneous-mathematical-symbols-a - supplemental-arrows-a - braille-patterns - supplemental-arrows-b - miscellaneous-mathematical-symbols-b - supplemental-mathematical-operators - miscellaneous-symbols-and-arrows - cjk-radicals-supplement - kangxi-radicals - ideographic-description-characters - cjk-symbols-and-punctuation - hiragana - katakana - bopomofo - hangul-compatibility-jamo - kanbun - bopomofo-extended - katakana-phonetic-extensions - enclosed-cjk-letters-and-months - cjk-compatibility - cjk-unified-ideographs-extension-a - yijing-hexagram-symbols - cjk-unified-ideographs - yi-syllables - yi-radicals - hangul-syllables - ## high-surrogates - ## high-private-use-surrogates - ## low-surrogates - ## private-use-area - cjk-compatibility-ideographs - alphabetic-presentation-forms - arabic-presentation-forms-a - variation-selectors - combining-half-marks - cjk-compatibility-forms - small-form-variants - arabic-presentation-forms-b - halfwidth-and-fullwidth-forms - specials - ## linear-b-syllabary - ## linear-b-ideograms - ## aegean-numbers - ## old-italic - ## gothic - ## ugaritic - ## deseret - ## shavian - ## osmanya - ## cypriot-syllabary - ## byzantine-musical-symbols - ## musical-symbols - ## tai-xuan-jing-symbols - ## mathematical-alphanumeric-symbols - ## cjk-unified-ideographs-extension-b - ## cjk-compatibility-ideographs-supplement - ## tags + (..set [/segment.block-elements + (list /segment.geometric-shapes + /segment.miscellaneous-symbols + /segment.dingbats + /segment.miscellaneous-mathematical-symbols-a + /segment.supplemental-arrows-a + /segment.braille-patterns + /segment.supplemental-arrows-b + /segment.miscellaneous-mathematical-symbols-b + /segment.supplemental-mathematical-operators + /segment.miscellaneous-symbols-and-arrows + /segment.cjk-radicals-supplement + /segment.kangxi-radicals + /segment.ideographic-description-characters + /segment.cjk-symbols-and-punctuation + /segment.hiragana + /segment.katakana + /segment.bopomofo + /segment.hangul-compatibility-jamo + /segment.kanbun + /segment.bopomofo-extended + /segment.katakana-phonetic-extensions + /segment.enclosed-cjk-letters-and-months + /segment.cjk-compatibility + /segment.cjk-unified-ideographs-extension-a + /segment.yijing-hexagram-symbols + /segment.cjk-unified-ideographs + /segment.yi-syllables + /segment.yi-radicals + /segment.hangul-syllables + ## /segment.high-surrogates + ## /segment.high-private-use-surrogates + ## /segment.low-surrogates + ## /segment.private-use-area + /segment.cjk-compatibility-ideographs + /segment.alphabetic-presentation-forms + /segment.arabic-presentation-forms-a + /segment.variation-selectors + /segment.combining-half-marks + /segment.cjk-compatibility-forms + /segment.small-form-variants + /segment.arabic-presentation-forms-b + /segment.halfwidth-and-fullwidth-forms + /segment.specials + ## /segment.linear-b-syllabary + ## /segment.linear-b-ideograms + ## /segment.aegean-numbers + ## /segment.old-italic + ## /segment.gothic + ## /segment.ugaritic + ## /segment.deseret + ## /segment.shavian + ## /segment.osmanya + ## /segment.cypriot-syllabary + ## /segment.byzantine-musical-symbols + ## /segment.musical-symbols + ## /segment.tai-xuan-jing-symbols + ## /segment.mathematical-alphanumeric-symbols + ## /segment.cjk-unified-ideographs-extension-b + ## /segment.cjk-compatibility-ideographs-supplement + ## /segment.tags )])) (def: #export full @@ -351,13 +176,13 @@ (def: #export (range set) (-> Set [Char Char]) (let [tag (tree.tag (:representation set))] - [(..start tag) - (..end tag)])) + [(/segment.start tag) + (/segment.end tag)])) (def: #export (member? set character) (-> Set Char Bit) (loop [tree (:representation set)] - (if (..within? (tree.tag tree) character) + (if (/segment.within? (tree.tag tree) character) (case (tree.root tree) (0 #0 _) true @@ -366,15 +191,22 @@ (or (recur left) (recur right))) false))) + + (structure: #export equivalence + (Equivalence Set) + + (def: (= reference subject) + (set\= (set.from-list /segment.hash (tree.tags (:representation reference))) + (set.from-list /segment.hash (tree.tags (:representation subject)))))) ) (template [<name> <segments>] [(def: #export <name> (..set <segments>))] - [ascii [basic-latin (list)]] - [ascii/alpha [basic-latin/upper-alpha (list basic-latin/lower-alpha)]] - [ascii/alpha-num [basic-latin/upper-alpha (list basic-latin/lower-alpha basic-latin/decimal)]] - [ascii/upper-alpha [basic-latin/upper-alpha (list)]] - [ascii/lower-alpha [basic-latin/lower-alpha (list)]] + [ascii [/segment.basic-latin (list)]] + [ascii/alpha [/segment.basic-latin/upper-alpha (list /segment.basic-latin/lower-alpha)]] + [ascii/alpha-num [/segment.basic-latin/upper-alpha (list /segment.basic-latin/lower-alpha /segment.basic-latin/decimal)]] + [ascii/upper-alpha [/segment.basic-latin/upper-alpha (list)]] + [ascii/lower-alpha [/segment.basic-latin/lower-alpha (list)]] ) diff --git a/stdlib/source/lux/data/text/unicode/segment.lux b/stdlib/source/lux/data/text/unicode/segment.lux new file mode 100644 index 000000000..a2507cc1e --- /dev/null +++ b/stdlib/source/lux/data/text/unicode/segment.lux @@ -0,0 +1,204 @@ +(.module: + [lux #* + [abstract + [equivalence (#+ Equivalence)] + [hash (#+ Hash)] + [monoid (#+ Monoid)] + ["." interval (#+ Interval)]] + [data + [number (#+ hex) + ["n" nat ("#\." interval)] + ["." i64]]] + [type + abstract]] + [/// (#+ Char)]) + +(abstract: #export Segment + (Interval Char) + + (structure: #export monoid + (Monoid Segment) + + (def: identity + (:abstraction (interval.between n.enum n\top n\bottom))) + (def: (compose left right) + (let [left (:representation left) + right (:representation right)] + (:abstraction + (interval.between n.enum + (n.min (\ left bottom) + (\ right bottom)) + (n.max (\ left top) + (\ right top))))))) + + (def: #export (segment start end) + (-> Char Char Segment) + (:abstraction (interval.between n.enum (n.min start end) (n.max start end)))) + + (template [<name> <slot>] + [(def: #export <name> + (-> Segment Char) + (|>> :representation (get@ <slot>)))] + + [start #interval.bottom] + [end #interval.top] + ) + + (def: #export (size segment) + (-> Segment Nat) + (let [start (get@ #interval.bottom (:representation segment)) + end (get@ #interval.top (:representation segment))] + (|> end (n.- start) inc))) + + (def: #export (within? segment char) + (All [a] (-> Segment Char Bit)) + (interval.within? (:representation segment) char)) + ) + +(structure: #export equivalence + (Equivalence Segment) + + (def: (= reference subject) + (and (n.= (..start reference) (..start subject)) + (n.= (..end reference) (..end subject))))) + +(structure: #export hash + (Hash Segment) + + (def: &equivalence ..equivalence) + (def: (hash value) + (i64.or (i64.left-shift 32 (..start value)) + (..end value)))) + +(template [<name> <start> <end>] + [(def: #export <name> Segment (..segment (hex <start>) (hex <end>)))] + + ## Normal segments + [basic-latin "0000" "007F"] + [latin-1-supplement "00A0" "00FF"] + [latin-extended-a "0100" "017F"] + [latin-extended-b "0180" "024F"] + [ipa-extensions "0250" "02AF"] + [spacing-modifier-letters "02B0" "02FF"] + [combining-diacritical-marks "0300" "036F"] + [greek-and-coptic "0370" "03FF"] + [cyrillic "0400" "04FF"] + [cyrillic-supplementary "0500" "052F"] + [armenian "0530" "058F"] + [hebrew "0590" "05FF"] + [arabic "0600" "06FF"] + [syriac "0700" "074F"] + [thaana "0780" "07BF"] + [devanagari "0900" "097F"] + [bengali "0980" "09FF"] + [gurmukhi "0A00" "0A7F"] + [gujarati "0A80" "0AFF"] + [oriya "0B00" "0B7F"] + [tamil "0B80" "0BFF"] + [telugu "0C00" "0C7F"] + [kannada "0C80" "0CFF"] + [malayalam "0D00" "0D7F"] + [sinhala "0D80" "0DFF"] + [thai "0E00" "0E7F"] + [lao "0E80" "0EFF"] + [tibetan "0F00" "0FFF"] + [myanmar "1000" "109F"] + [georgian "10A0" "10FF"] + [hangul-jamo "1100" "11FF"] + [ethiopic "1200" "137F"] + [cherokee "13A0" "13FF"] + [unified-canadian-aboriginal-syllabics "1400" "167F"] + [ogham "1680" "169F"] + [runic "16A0" "16FF"] + [tagalog "1700" "171F"] + [hanunoo "1720" "173F"] + [buhid "1740" "175F"] + [tagbanwa "1760" "177F"] + [khmer "1780" "17FF"] + [mongolian "1800" "18AF"] + [limbu "1900" "194F"] + [tai-le "1950" "197F"] + [khmer-symbols "19E0" "19FF"] + [phonetic-extensions "1D00" "1D7F"] + [latin-extended-additional "1E00" "1EFF"] + [greek-extended "1F00" "1FFF"] + [general-punctuation "2000" "206F"] + [superscripts-and-subscripts "2070" "209F"] + [currency-symbols "20A0" "20CF"] + [combining-diacritical-marks-for-symbols "20D0" "20FF"] + [letterlike-symbols "2100" "214F"] + [number-forms "2150" "218F"] + [arrows "2190" "21FF"] + [mathematical-operators "2200" "22FF"] + [miscellaneous-technical "2300" "23FF"] + [control-pictures "2400" "243F"] + [optical-character-recognition "2440" "245F"] + [enclosed-alphanumerics "2460" "24FF"] + [box-drawing "2500" "257F"] + [block-elements "2580" "259F"] + [geometric-shapes "25A0" "25FF"] + [miscellaneous-symbols "2600" "26FF"] + [dingbats "2700" "27BF"] + [miscellaneous-mathematical-symbols-a "27C0" "27EF"] + [supplemental-arrows-a "27F0" "27FF"] + [braille-patterns "2800" "28FF"] + [supplemental-arrows-b "2900" "297F"] + [miscellaneous-mathematical-symbols-b "2980" "29FF"] + [supplemental-mathematical-operators "2A00" "2AFF"] + [miscellaneous-symbols-and-arrows "2B00" "2BFF"] + [cjk-radicals-supplement "2E80" "2EFF"] + [kangxi-radicals "2F00" "2FDF"] + [ideographic-description-characters "2FF0" "2FFF"] + [cjk-symbols-and-punctuation "3000" "303F"] + [hiragana "3040" "309F"] + [katakana "30A0" "30FF"] + [bopomofo "3100" "312F"] + [hangul-compatibility-jamo "3130" "318F"] + [kanbun "3190" "319F"] + [bopomofo-extended "31A0" "31BF"] + [katakana-phonetic-extensions "31F0" "31FF"] + [enclosed-cjk-letters-and-months "3200" "32FF"] + [cjk-compatibility "3300" "33FF"] + [cjk-unified-ideographs-extension-a "3400" "4DBF"] + [yijing-hexagram-symbols "4DC0" "4DFF"] + [cjk-unified-ideographs "4E00" "9FFF"] + [yi-syllables "A000" "A48F"] + [yi-radicals "A490" "A4CF"] + [hangul-syllables "AC00" "D7AF"] + [high-surrogates "D800" "DB7F"] + [high-private-use-surrogates "DB80" "DBFF"] + [low-surrogates "DC00" "DFFF"] + [private-use-area "E000" "F8FF"] + [cjk-compatibility-ideographs "F900" "FAFF"] + [alphabetic-presentation-forms "FB00" "FB4F"] + [arabic-presentation-forms-a "FB50" "FDFF"] + [variation-selectors "FE00" "FE0F"] + [combining-half-marks "FE20" "FE2F"] + [cjk-compatibility-forms "FE30" "FE4F"] + [small-form-variants "FE50" "FE6F"] + [arabic-presentation-forms-b "FE70" "FEFF"] + [halfwidth-and-fullwidth-forms "FF00" "FFEF"] + [specials "FFF0" "FFFF"] + ## [linear-b-syllabary "10000" "1007F"] + ## [linear-b-ideograms "10080" "100FF"] + ## [aegean-numbers "10100" "1013F"] + ## [old-italic "10300" "1032F"] + ## [gothic "10330" "1034F"] + ## [ugaritic "10380" "1039F"] + ## [deseret "10400" "1044F"] + ## [shavian "10450" "1047F"] + ## [osmanya "10480" "104AF"] + ## [cypriot-syllabary "10800" "1083F"] + ## [byzantine-musical-symbols "1D000" "1D0FF"] + ## [musical-symbols "1D100" "1D1FF"] + ## [tai-xuan-jing-symbols "1D300" "1D35F"] + ## [mathematical-alphanumeric-symbols "1D400" "1D7FF"] + ## [cjk-unified-ideographs-extension-b "20000" "2A6DF"] + ## [cjk-compatibility-ideographs-supplement "2F800" "2FA1F"] + ## [tags "E0000" "E007F"] + + ## Specialized segments + [basic-latin/decimal "0030" "0039"] + [basic-latin/upper-alpha "0041" "005A"] + [basic-latin/lower-alpha "0061" "007A"] + ) |