diff options
author | Eduardo Julián | 2021-07-14 14:44:53 -0400 |
---|---|---|
committer | GitHub | 2021-07-14 14:44:53 -0400 |
commit | 89ca40f2f101b2b38187eab5cf905371cd47eb57 (patch) | |
tree | f05fd1677a70988c6b39c07e52d031d86eff28f1 /stdlib/source/library/lux/data/text/unicode/block.lux | |
parent | 2431e767a09894c2f685911ba7f1ba0b7de2a165 (diff) | |
parent | 8252bdb938a0284dd12e7365b4eb84b5357bacac (diff) |
Merge pull request #58 from LuxLang/hierarchy_normalization
Hierarchy normalization
Diffstat (limited to 'stdlib/source/library/lux/data/text/unicode/block.lux')
-rw-r--r-- | stdlib/source/library/lux/data/text/unicode/block.lux | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/stdlib/source/library/lux/data/text/unicode/block.lux b/stdlib/source/library/lux/data/text/unicode/block.lux new file mode 100644 index 000000000..24ddb34e2 --- /dev/null +++ b/stdlib/source/library/lux/data/text/unicode/block.lux @@ -0,0 +1,205 @@ +(.module: + [library + [lux #* + [abstract + [equivalence (#+ Equivalence)] + [hash (#+ Hash)] + [monoid (#+ Monoid)] + ["." interval (#+ Interval)]] + [math + [number (#+ hex) + ["n" nat ("#\." interval)] + ["." i64]]] + [type + abstract]]] + [/// (#+ Char)]) + +(abstract: #export Block + (Interval Char) + + (implementation: #export monoid + (Monoid Block) + + (def: identity + (:abstraction (interval.between n.enum n\top n\bottom))) + (def: (compose left right) + (let [left (:representation left) + right (:representation right)] + (:abstraction + (interval.between n.enum + (n.min (\ left bottom) + (\ right bottom)) + (n.max (\ left top) + (\ right top))))))) + + (def: #export (block start end) + (-> Char Char Block) + (:abstraction (interval.between n.enum (n.min start end) (n.max start end)))) + + (template [<name> <slot>] + [(def: #export <name> + (-> Block Char) + (|>> :representation (get@ <slot>)))] + + [start #interval.bottom] + [end #interval.top] + ) + + (def: #export (size block) + (-> Block Nat) + (let [start (get@ #interval.bottom (:representation block)) + end (get@ #interval.top (:representation block))] + (|> end (n.- start) inc))) + + (def: #export (within? block char) + (All [a] (-> Block Char Bit)) + (interval.within? (:representation block) char)) + ) + +(implementation: #export equivalence + (Equivalence Block) + + (def: (= reference subject) + (and (n.= (..start reference) (..start subject)) + (n.= (..end reference) (..end subject))))) + +(implementation: #export hash + (Hash Block) + + (def: &equivalence ..equivalence) + (def: (hash value) + (i64.or (i64.left_shift 32 (..start value)) + (..end value)))) + +(template [<name> <start> <end>] + [(def: #export <name> Block (..block (hex <start>) (hex <end>)))] + + ## Normal blocks + [basic_latin "0000" "007F"] + [latin_1_supplement "00A0" "00FF"] + [latin_extended_a "0100" "017F"] + [latin_extended_b "0180" "024F"] + [ipa_extensions "0250" "02AF"] + [spacing_modifier_letters "02B0" "02FF"] + [combining_diacritical_marks "0300" "036F"] + [greek_and_coptic "0370" "03FF"] + [cyrillic "0400" "04FF"] + [cyrillic_supplementary "0500" "052F"] + [armenian "0530" "058F"] + [hebrew "0590" "05FF"] + [arabic "0600" "06FF"] + [syriac "0700" "074F"] + [thaana "0780" "07BF"] + [devanagari "0900" "097F"] + [bengali "0980" "09FF"] + [gurmukhi "0A00" "0A7F"] + [gujarati "0A80" "0AFF"] + [oriya "0B00" "0B7F"] + [tamil "0B80" "0BFF"] + [telugu "0C00" "0C7F"] + [kannada "0C80" "0CFF"] + [malayalam "0D00" "0D7F"] + [sinhala "0D80" "0DFF"] + [thai "0E00" "0E7F"] + [lao "0E80" "0EFF"] + [tibetan "0F00" "0FFF"] + [myanmar "1000" "109F"] + [georgian "10A0" "10FF"] + [hangul_jamo "1100" "11FF"] + [ethiopic "1200" "137F"] + [cherokee "13A0" "13FF"] + [unified_canadian_aboriginal_syllabics "1400" "167F"] + [ogham "1680" "169F"] + [runic "16A0" "16FF"] + [tagalog "1700" "171F"] + [hanunoo "1720" "173F"] + [buhid "1740" "175F"] + [tagbanwa "1760" "177F"] + [khmer "1780" "17FF"] + [mongolian "1800" "18AF"] + [limbu "1900" "194F"] + [tai_le "1950" "197F"] + [khmer_symbols "19E0" "19FF"] + [phonetic_extensions "1D00" "1D7F"] + [latin_extended_additional "1E00" "1EFF"] + [greek_extended "1F00" "1FFF"] + [general_punctuation "2000" "206F"] + [superscripts_and_subscripts "2070" "209F"] + [currency_symbols "20A0" "20CF"] + [combining_diacritical_marks_for_symbols "20D0" "20FF"] + [letterlike_symbols "2100" "214F"] + [number_forms "2150" "218F"] + [arrows "2190" "21FF"] + [mathematical_operators "2200" "22FF"] + [miscellaneous_technical "2300" "23FF"] + [control_pictures "2400" "243F"] + [optical_character_recognition "2440" "245F"] + [enclosed_alphanumerics "2460" "24FF"] + [box_drawing "2500" "257F"] + [block_elements "2580" "259F"] + [geometric_shapes "25A0" "25FF"] + [miscellaneous_symbols "2600" "26FF"] + [dingbats "2700" "27BF"] + [miscellaneous_mathematical_symbols_a "27C0" "27EF"] + [supplemental_arrows_a "27F0" "27FF"] + [braille_patterns "2800" "28FF"] + [supplemental_arrows_b "2900" "297F"] + [miscellaneous_mathematical_symbols_b "2980" "29FF"] + [supplemental_mathematical_operators "2A00" "2AFF"] + [miscellaneous_symbols_and_arrows "2B00" "2BFF"] + [cjk_radicals_supplement "2E80" "2EFF"] + [kangxi_radicals "2F00" "2FDF"] + [ideographic_description_characters "2FF0" "2FFF"] + [cjk_symbols_and_punctuation "3000" "303F"] + [hiragana "3040" "309F"] + [katakana "30A0" "30FF"] + [bopomofo "3100" "312F"] + [hangul_compatibility_jamo "3130" "318F"] + [kanbun "3190" "319F"] + [bopomofo_extended "31A0" "31BF"] + [katakana_phonetic_extensions "31F0" "31FF"] + [enclosed_cjk_letters_and_months "3200" "32FF"] + [cjk_compatibility "3300" "33FF"] + [cjk_unified_ideographs_extension_a "3400" "4DBF"] + [yijing_hexagram_symbols "4DC0" "4DFF"] + [cjk_unified_ideographs "4E00" "9FFF"] + [yi_syllables "A000" "A48F"] + [yi_radicals "A490" "A4CF"] + [hangul_syllables "AC00" "D7AF"] + [high_surrogates "D800" "DB7F"] + [high_private_use_surrogates "DB80" "DBFF"] + [low_surrogates "DC00" "DFFF"] + [private_use_area "E000" "F8FF"] + [cjk_compatibility_ideographs "F900" "FAFF"] + [alphabetic_presentation_forms "FB00" "FB4F"] + [arabic_presentation_forms_a "FB50" "FDFF"] + [variation_selectors "FE00" "FE0F"] + [combining_half_marks "FE20" "FE2F"] + [cjk_compatibility_forms "FE30" "FE4F"] + [small_form_variants "FE50" "FE6F"] + [arabic_presentation_forms_b "FE70" "FEFF"] + [halfwidth_and_fullwidth_forms "FF00" "FFEF"] + [specials "FFF0" "FFFF"] + ## [linear_b_syllabary "10000" "1007F"] + ## [linear_b_ideograms "10080" "100FF"] + ## [aegean_numbers "10100" "1013F"] + ## [old_italic "10300" "1032F"] + ## [gothic "10330" "1034F"] + ## [ugaritic "10380" "1039F"] + ## [deseret "10400" "1044F"] + ## [shavian "10450" "1047F"] + ## [osmanya "10480" "104AF"] + ## [cypriot_syllabary "10800" "1083F"] + ## [byzantine_musical_symbols "1D000" "1D0FF"] + ## [musical_symbols "1D100" "1D1FF"] + ## [tai_xuan_jing_symbols "1D300" "1D35F"] + ## [mathematical_alphanumeric_symbols "1D400" "1D7FF"] + ## [cjk_unified_ideographs_extension_b "20000" "2A6DF"] + ## [cjk_compatibility_ideographs_supplement "2F800" "2FA1F"] + ## [tags "E0000" "E007F"] + + ## Specialized blocks + [basic_latin/decimal "0030" "0039"] + [basic_latin/upper "0041" "005A"] + [basic_latin/lower "0061" "007A"] + ) |