diff options
Diffstat (limited to 'stdlib/source/library/lux/data/text/unicode/set.lux')
-rw-r--r-- | stdlib/source/library/lux/data/text/unicode/set.lux | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/stdlib/source/library/lux/data/text/unicode/set.lux b/stdlib/source/library/lux/data/text/unicode/set.lux new file mode 100644 index 000000000..2c48aed41 --- /dev/null +++ b/stdlib/source/library/lux/data/text/unicode/set.lux @@ -0,0 +1,240 @@ +(.module: + [library + [lux #* + [abstract + [equivalence (#+ Equivalence)]] + [data + [collection + ["." list ("#\." fold functor)] + ["." set ("#\." equivalence)] + ["." tree #_ + ["#" finger (#+ Tree)]]]] + [type (#+ :by_example) + abstract]]] + ["." / #_ + ["/#" // #_ + [// (#+ Char)] + ["#." block (#+ Block)]]]) + +(def: builder + (tree.builder //block.monoid)) + +(def: :@: + (:by_example [@] + (tree.Builder @ Block) + ..builder + + @)) + +(abstract: #export Set + (Tree :@: Block []) + + (def: #export (compose left right) + (-> Set Set Set) + (:abstraction + (\ builder branch + (:representation left) + (:representation right)))) + + (def: (singleton block) + (-> Block Set) + (:abstraction + (\ builder leaf block []))) + + (def: #export (set [head tail]) + (-> [Block (List Block)] Set) + (list\fold (: (-> Block Set Set) + (function (_ block set) + (..compose (..singleton block) set))) + (..singleton head) + tail)) + + (def: character/0 + Set + (..set [//block.basic_latin + (list //block.latin_1_supplement + //block.latin_extended_a + //block.latin_extended_b + //block.ipa_extensions + //block.spacing_modifier_letters + //block.combining_diacritical_marks + //block.greek_and_coptic + //block.cyrillic + //block.cyrillic_supplementary + //block.armenian + //block.hebrew + //block.arabic + //block.syriac + //block.thaana + //block.devanagari + //block.bengali + //block.gurmukhi + //block.gujarati + //block.oriya + //block.tamil + //block.telugu + //block.kannada + //block.malayalam + //block.sinhala + //block.thai + //block.lao + //block.tibetan + //block.myanmar + //block.georgian)])) + + (def: character/1 + Set + (..set [//block.hangul_jamo + (list //block.ethiopic + //block.cherokee + //block.unified_canadian_aboriginal_syllabics + //block.ogham + //block.runic + //block.tagalog + //block.hanunoo + //block.buhid + //block.tagbanwa + //block.khmer + //block.mongolian + //block.limbu + //block.tai_le + //block.khmer_symbols + //block.phonetic_extensions + //block.latin_extended_additional + //block.greek_extended + //block.general_punctuation + //block.superscripts_and_subscripts + //block.currency_symbols + //block.combining_diacritical_marks_for_symbols + //block.letterlike_symbols + //block.number_forms + //block.arrows + //block.mathematical_operators + //block.miscellaneous_technical + //block.control_pictures + //block.optical_character_recognition + //block.enclosed_alphanumerics + //block.box_drawing)])) + + (def: character/2 + Set + (..set [//block.block_elements + (list //block.geometric_shapes + //block.miscellaneous_symbols + //block.dingbats + //block.miscellaneous_mathematical_symbols_a + //block.supplemental_arrows_a + //block.braille_patterns + //block.supplemental_arrows_b + //block.miscellaneous_mathematical_symbols_b + //block.supplemental_mathematical_operators + //block.miscellaneous_symbols_and_arrows + //block.cjk_radicals_supplement + //block.kangxi_radicals + //block.ideographic_description_characters + //block.cjk_symbols_and_punctuation + //block.hiragana + //block.katakana + //block.bopomofo + //block.hangul_compatibility_jamo + //block.kanbun + //block.bopomofo_extended + //block.katakana_phonetic_extensions + //block.enclosed_cjk_letters_and_months + //block.cjk_compatibility + //block.cjk_unified_ideographs_extension_a + //block.yijing_hexagram_symbols + //block.cjk_unified_ideographs + //block.yi_syllables + //block.yi_radicals + //block.hangul_syllables + )])) + + (def: #export character + Set + ($_ ..compose + ..character/0 + ..character/1 + ..character/2 + )) + + (def: #export non_character + Set + (..set [//block.high_surrogates + (list //block.high_private_use_surrogates + //block.low_surrogates + //block.private_use_area + //block.cjk_compatibility_ideographs + //block.alphabetic_presentation_forms + //block.arabic_presentation_forms_a + //block.variation_selectors + //block.combining_half_marks + //block.cjk_compatibility_forms + //block.small_form_variants + //block.arabic_presentation_forms_b + //block.halfwidth_and_fullwidth_forms + //block.specials + ## //block.linear_b_syllabary + ## //block.linear_b_ideograms + ## //block.aegean_numbers + ## //block.old_italic + ## //block.gothic + ## //block.ugaritic + ## //block.deseret + ## //block.shavian + ## //block.osmanya + ## //block.cypriot_syllabary + ## //block.byzantine_musical_symbols + ## //block.musical_symbols + ## //block.tai_xuan_jing_symbols + ## //block.mathematical_alphanumeric_symbols + ## //block.cjk_unified_ideographs_extension_b + ## //block.cjk_compatibility_ideographs_supplement + ## //block.tags + )])) + + (def: #export full + Set + ($_ ..compose + ..character + ..non_character + )) + + (def: #export (range set) + (-> Set [Char Char]) + (let [tag (tree.tag (:representation set))] + [(//block.start tag) + (//block.end tag)])) + + (def: #export (member? set character) + (-> Set Char Bit) + (loop [tree (:representation set)] + (if (//block.within? (tree.tag tree) character) + (case (tree.root tree) + (0 #0 _) + true + + (0 #1 left right) + (or (recur left) + (recur right))) + false))) + + (implementation: #export equivalence + (Equivalence Set) + + (def: (= reference subject) + (set\= (set.from_list //block.hash (tree.tags (:representation reference))) + (set.from_list //block.hash (tree.tags (:representation subject)))))) + ) + +(template [<name> <blocks>] + [(def: #export <name> + (..set <blocks>))] + + [ascii [//block.basic_latin (list)]] + [ascii/alpha [//block.basic_latin/upper (list //block.basic_latin/lower)]] + [ascii/alpha_num [//block.basic_latin/upper (list //block.basic_latin/lower //block.basic_latin/decimal)]] + [ascii/numeric [//block.basic_latin/decimal (list)]] + [ascii/upper [//block.basic_latin/upper (list)]] + [ascii/lower [//block.basic_latin/lower (list)]] + ) |