aboutsummaryrefslogtreecommitdiff
path: root/stdlib/source/lux
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--stdlib/source/lux/control/concurrency/frp.lux2
-rw-r--r--stdlib/source/lux/data/collection/tree/finger.lux23
-rw-r--r--stdlib/source/lux/data/maybe.lux15
-rw-r--r--stdlib/source/lux/data/text/unicode.lux454
-rw-r--r--stdlib/source/lux/data/text/unicode/segment.lux204
5 files changed, 378 insertions, 320 deletions
diff --git a/stdlib/source/lux/control/concurrency/frp.lux b/stdlib/source/lux/control/concurrency/frp.lux
index 4709a0cad..aea0b082a 100644
--- a/stdlib/source/lux/control/concurrency/frp.lux
+++ b/stdlib/source/lux/control/concurrency/frp.lux
@@ -161,7 +161,7 @@
(recur tail)
#.None
- (recur tail))
+ (wrap []))
#.None
(wrap [])))))
diff --git a/stdlib/source/lux/data/collection/tree/finger.lux b/stdlib/source/lux/data/collection/tree/finger.lux
index c3e20ce08..c18ff7251 100644
--- a/stdlib/source/lux/data/collection/tree/finger.lux
+++ b/stdlib/source/lux/data/collection/tree/finger.lux
@@ -3,6 +3,9 @@
[abstract
[predicate (#+ Predicate)]
["." monoid (#+ Monoid)]]
+ [data
+ [collection
+ ["." list ("#\." monoid)]]]
[type (#+ :by-example)
[abstract (#+ abstract: :abstraction :representation)]]])
@@ -55,6 +58,26 @@
(0 #1 [left right])
(value left)))
+ (def: #export (tags tree)
+ (All [@ t v] (-> (Tree @ t v) (List t)))
+ (case (get@ #root (:representation tree))
+ (0 #0 value)
+ (list (get@ #tag (:representation tree)))
+
+ (0 #1 [left right])
+ (list\compose (tags left)
+ (tags right))))
+
+ (def: #export (values tree)
+ (All [@ t v] (-> (Tree @ t v) (List v)))
+ (case (get@ #root (:representation tree))
+ (0 #0 value)
+ (list value)
+
+ (0 #1 [left right])
+ (list\compose (values left)
+ (values right))))
+
(def: #export (search predicate tree)
(All [@ t v] (-> (Predicate t) (Tree @ t v) (Maybe v)))
(let [[monoid tag root] (:representation tree)]
diff --git a/stdlib/source/lux/data/maybe.lux b/stdlib/source/lux/data/maybe.lux
index 7b6f3ace4..6584eaf6a 100644
--- a/stdlib/source/lux/data/maybe.lux
+++ b/stdlib/source/lux/data/maybe.lux
@@ -125,14 +125,13 @@
+20)}
(case tokens
(^ (list else maybe))
- (let [g!temp (: Code [location.dummy (#.Identifier ["" ""])])
- code (` (case (~ maybe)
- (#.Some (~ g!temp))
- (~ g!temp)
-
- #.None
- (~ else)))]
- (#.Right [state (list code)]))
+ (let [g!temp (: Code [location.dummy (#.Identifier ["" ""])])]
+ (#.Right [state (list (` (case (~ maybe)
+ (#.Some (~ g!temp))
+ (~ g!temp)
+
+ #.None
+ (~ else))))]))
_
(#.Left "Wrong syntax for default")))
diff --git a/stdlib/source/lux/data/text/unicode.lux b/stdlib/source/lux/data/text/unicode.lux
index 00c67f2c1..2aad089b9 100644
--- a/stdlib/source/lux/data/text/unicode.lux
+++ b/stdlib/source/lux/data/text/unicode.lux
@@ -1,196 +1,21 @@
(.module:
[lux #*
[abstract
- [monoid (#+ Monoid)]
- ["." interval (#+ Interval)]]
+ [equivalence (#+ Equivalence)]]
[data
- [number (#+ hex)
- ["n" nat ("#\." interval)]]
[collection
["." list ("#\." fold functor)]
+ ["." set ("#\." equivalence)]
["." tree #_
["#" finger (#+ Tree)]]]]
[type (#+ :by-example)
abstract]]
- [// (#+ Char)])
-
-(abstract: #export Segment
- (Interval Char)
-
- (structure: monoid
- (Monoid Segment)
-
- (def: identity
- (:abstraction (interval.between n.enum n\top n\bottom)))
- (def: (compose left right)
- (let [left (:representation left)
- right (:representation right)]
- (:abstraction
- (interval.between n.enum
- (n.min (\ left bottom)
- (\ right bottom))
- (n.max (\ left top)
- (\ right top)))))))
-
- (def: #export (segment start end)
- (-> Char Char Segment)
- (:abstraction (interval.between n.enum (n.min start end) (n.max start end))))
-
- (template [<name> <slot>]
- [(def: #export <name>
- (-> Segment Char)
- (|>> :representation (get@ <slot>)))]
-
- [start #interval.bottom]
- [end #interval.top]
- )
-
- (def: #export (size segment)
- (-> Segment Nat)
- (let [start (get@ #interval.bottom (:representation segment))
- end (get@ #interval.top (:representation segment))]
- (|> end (n.- start) inc)))
-
- (def: #export (within? segment char)
- (All [a] (-> Segment Char Bit))
- (interval.within? (:representation segment) char))
- )
-
-(template [<name> <start> <end>]
- [(def: #export <name> Segment (..segment (hex <start>) (hex <end>)))]
-
- ## Normal segments
- [basic-latin "0000" "007F"]
- [latin-1-supplement "00A0" "00FF"]
- [latin-extended-a "0100" "017F"]
- [latin-extended-b "0180" "024F"]
- [ipa-extensions "0250" "02AF"]
- [spacing-modifier-letters "02B0" "02FF"]
- [combining-diacritical-marks "0300" "036F"]
- [greek-and-coptic "0370" "03FF"]
- [cyrillic "0400" "04FF"]
- [cyrillic-supplementary "0500" "052F"]
- [armenian "0530" "058F"]
- [hebrew "0590" "05FF"]
- [arabic "0600" "06FF"]
- [syriac "0700" "074F"]
- [thaana "0780" "07BF"]
- [devanagari "0900" "097F"]
- [bengali "0980" "09FF"]
- [gurmukhi "0A00" "0A7F"]
- [gujarati "0A80" "0AFF"]
- [oriya "0B00" "0B7F"]
- [tamil "0B80" "0BFF"]
- [telugu "0C00" "0C7F"]
- [kannada "0C80" "0CFF"]
- [malayalam "0D00" "0D7F"]
- [sinhala "0D80" "0DFF"]
- [thai "0E00" "0E7F"]
- [lao "0E80" "0EFF"]
- [tibetan "0F00" "0FFF"]
- [myanmar "1000" "109F"]
- [georgian "10A0" "10FF"]
- [hangul-jamo "1100" "11FF"]
- [ethiopic "1200" "137F"]
- [cherokee "13A0" "13FF"]
- [unified-canadian-aboriginal-syllabics "1400" "167F"]
- [ogham "1680" "169F"]
- [runic "16A0" "16FF"]
- [tagalog "1700" "171F"]
- [hanunoo "1720" "173F"]
- [buhid "1740" "175F"]
- [tagbanwa "1760" "177F"]
- [khmer "1780" "17FF"]
- [mongolian "1800" "18AF"]
- [limbu "1900" "194F"]
- [tai-le "1950" "197F"]
- [khmer-symbols "19E0" "19FF"]
- [phonetic-extensions "1D00" "1D7F"]
- [latin-extended-additional "1E00" "1EFF"]
- [greek-extended "1F00" "1FFF"]
- [general-punctuation "2000" "206F"]
- [superscripts-and-subscripts "2070" "209F"]
- [currency-symbols "20A0" "20CF"]
- [combining-diacritical-marks-for-symbols "20D0" "20FF"]
- [letterlike-symbols "2100" "214F"]
- [number-forms "2150" "218F"]
- [arrows "2190" "21FF"]
- [mathematical-operators "2200" "22FF"]
- [miscellaneous-technical "2300" "23FF"]
- [control-pictures "2400" "243F"]
- [optical-character-recognition "2440" "245F"]
- [enclosed-alphanumerics "2460" "24FF"]
- [box-drawing "2500" "257F"]
- [block-elements "2580" "259F"]
- [geometric-shapes "25A0" "25FF"]
- [miscellaneous-symbols "2600" "26FF"]
- [dingbats "2700" "27BF"]
- [miscellaneous-mathematical-symbols-a "27C0" "27EF"]
- [supplemental-arrows-a "27F0" "27FF"]
- [braille-patterns "2800" "28FF"]
- [supplemental-arrows-b "2900" "297F"]
- [miscellaneous-mathematical-symbols-b "2980" "29FF"]
- [supplemental-mathematical-operators "2A00" "2AFF"]
- [miscellaneous-symbols-and-arrows "2B00" "2BFF"]
- [cjk-radicals-supplement "2E80" "2EFF"]
- [kangxi-radicals "2F00" "2FDF"]
- [ideographic-description-characters "2FF0" "2FFF"]
- [cjk-symbols-and-punctuation "3000" "303F"]
- [hiragana "3040" "309F"]
- [katakana "30A0" "30FF"]
- [bopomofo "3100" "312F"]
- [hangul-compatibility-jamo "3130" "318F"]
- [kanbun "3190" "319F"]
- [bopomofo-extended "31A0" "31BF"]
- [katakana-phonetic-extensions "31F0" "31FF"]
- [enclosed-cjk-letters-and-months "3200" "32FF"]
- [cjk-compatibility "3300" "33FF"]
- [cjk-unified-ideographs-extension-a "3400" "4DBF"]
- [yijing-hexagram-symbols "4DC0" "4DFF"]
- [cjk-unified-ideographs "4E00" "9FFF"]
- [yi-syllables "A000" "A48F"]
- [yi-radicals "A490" "A4CF"]
- [hangul-syllables "AC00" "D7AF"]
- [high-surrogates "D800" "DB7F"]
- [high-private-use-surrogates "DB80" "DBFF"]
- [low-surrogates "DC00" "DFFF"]
- [private-use-area "E000" "F8FF"]
- [cjk-compatibility-ideographs "F900" "FAFF"]
- [alphabetic-presentation-forms "FB00" "FB4F"]
- [arabic-presentation-forms-a "FB50" "FDFF"]
- [variation-selectors "FE00" "FE0F"]
- [combining-half-marks "FE20" "FE2F"]
- [cjk-compatibility-forms "FE30" "FE4F"]
- [small-form-variants "FE50" "FE6F"]
- [arabic-presentation-forms-b "FE70" "FEFF"]
- [halfwidth-and-fullwidth-forms "FF00" "FFEF"]
- [specials "FFF0" "FFFF"]
- ## [linear-b-syllabary "10000" "1007F"]
- ## [linear-b-ideograms "10080" "100FF"]
- ## [aegean-numbers "10100" "1013F"]
- ## [old-italic "10300" "1032F"]
- ## [gothic "10330" "1034F"]
- ## [ugaritic "10380" "1039F"]
- ## [deseret "10400" "1044F"]
- ## [shavian "10450" "1047F"]
- ## [osmanya "10480" "104AF"]
- ## [cypriot-syllabary "10800" "1083F"]
- ## [byzantine-musical-symbols "1D000" "1D0FF"]
- ## [musical-symbols "1D100" "1D1FF"]
- ## [tai-xuan-jing-symbols "1D300" "1D35F"]
- ## [mathematical-alphanumeric-symbols "1D400" "1D7FF"]
- ## [cjk-unified-ideographs-extension-b "20000" "2A6DF"]
- ## [cjk-compatibility-ideographs-supplement "2F800" "2FA1F"]
- ## [tags "E0000" "E007F"]
-
- ## Specialized segments
- [basic-latin/decimal "0030" "0039"]
- [basic-latin/upper-alpha "0041" "005A"]
- [basic-latin/lower-alpha "0061" "007A"]
- )
+ ["." / #_
+ ["#." segment (#+ Segment)]
+ [// (#+ Char)]])
(def: builder
- (tree.builder ..monoid))
+ (tree.builder /segment.monoid))
(def: :@:
(:by-example [@]
@@ -218,131 +43,131 @@
(list\fold ..compose (..singleton head) (list\map ..singleton tail)))
(def: half/0
- (..set [basic-latin
- (list latin-1-supplement
- latin-extended-a
- latin-extended-b
- ipa-extensions
- spacing-modifier-letters
- combining-diacritical-marks
- greek-and-coptic
- cyrillic
- cyrillic-supplementary
- armenian
- hebrew
- arabic
- syriac
- thaana
- devanagari
- bengali
- gurmukhi
- gujarati
- oriya
- tamil
- telugu
- kannada
- malayalam
- sinhala
- thai
- lao
- tibetan
- myanmar
- georgian
- hangul-jamo
- ethiopic
- cherokee
- unified-canadian-aboriginal-syllabics
- ogham
- runic
- tagalog
- hanunoo
- buhid
- tagbanwa
- khmer
- mongolian
- limbu
- tai-le
- khmer-symbols
- phonetic-extensions
- latin-extended-additional
- greek-extended
- general-punctuation
- superscripts-and-subscripts
- currency-symbols
- combining-diacritical-marks-for-symbols
- letterlike-symbols
- number-forms
- arrows
- mathematical-operators
- miscellaneous-technical
- control-pictures
- optical-character-recognition
- enclosed-alphanumerics
- box-drawing
+ (..set [/segment.basic-latin
+ (list /segment.latin-1-supplement
+ /segment.latin-extended-a
+ /segment.latin-extended-b
+ /segment.ipa-extensions
+ /segment.spacing-modifier-letters
+ /segment.combining-diacritical-marks
+ /segment.greek-and-coptic
+ /segment.cyrillic
+ /segment.cyrillic-supplementary
+ /segment.armenian
+ /segment.hebrew
+ /segment.arabic
+ /segment.syriac
+ /segment.thaana
+ /segment.devanagari
+ /segment.bengali
+ /segment.gurmukhi
+ /segment.gujarati
+ /segment.oriya
+ /segment.tamil
+ /segment.telugu
+ /segment.kannada
+ /segment.malayalam
+ /segment.sinhala
+ /segment.thai
+ /segment.lao
+ /segment.tibetan
+ /segment.myanmar
+ /segment.georgian
+ /segment.hangul-jamo
+ /segment.ethiopic
+ /segment.cherokee
+ /segment.unified-canadian-aboriginal-syllabics
+ /segment.ogham
+ /segment.runic
+ /segment.tagalog
+ /segment.hanunoo
+ /segment.buhid
+ /segment.tagbanwa
+ /segment.khmer
+ /segment.mongolian
+ /segment.limbu
+ /segment.tai-le
+ /segment.khmer-symbols
+ /segment.phonetic-extensions
+ /segment.latin-extended-additional
+ /segment.greek-extended
+ /segment.general-punctuation
+ /segment.superscripts-and-subscripts
+ /segment.currency-symbols
+ /segment.combining-diacritical-marks-for-symbols
+ /segment.letterlike-symbols
+ /segment.number-forms
+ /segment.arrows
+ /segment.mathematical-operators
+ /segment.miscellaneous-technical
+ /segment.control-pictures
+ /segment.optical-character-recognition
+ /segment.enclosed-alphanumerics
+ /segment.box-drawing
)]))
(def: half/1
- (..set [block-elements
- (list geometric-shapes
- miscellaneous-symbols
- dingbats
- miscellaneous-mathematical-symbols-a
- supplemental-arrows-a
- braille-patterns
- supplemental-arrows-b
- miscellaneous-mathematical-symbols-b
- supplemental-mathematical-operators
- miscellaneous-symbols-and-arrows
- cjk-radicals-supplement
- kangxi-radicals
- ideographic-description-characters
- cjk-symbols-and-punctuation
- hiragana
- katakana
- bopomofo
- hangul-compatibility-jamo
- kanbun
- bopomofo-extended
- katakana-phonetic-extensions
- enclosed-cjk-letters-and-months
- cjk-compatibility
- cjk-unified-ideographs-extension-a
- yijing-hexagram-symbols
- cjk-unified-ideographs
- yi-syllables
- yi-radicals
- hangul-syllables
- ## high-surrogates
- ## high-private-use-surrogates
- ## low-surrogates
- ## private-use-area
- cjk-compatibility-ideographs
- alphabetic-presentation-forms
- arabic-presentation-forms-a
- variation-selectors
- combining-half-marks
- cjk-compatibility-forms
- small-form-variants
- arabic-presentation-forms-b
- halfwidth-and-fullwidth-forms
- specials
- ## linear-b-syllabary
- ## linear-b-ideograms
- ## aegean-numbers
- ## old-italic
- ## gothic
- ## ugaritic
- ## deseret
- ## shavian
- ## osmanya
- ## cypriot-syllabary
- ## byzantine-musical-symbols
- ## musical-symbols
- ## tai-xuan-jing-symbols
- ## mathematical-alphanumeric-symbols
- ## cjk-unified-ideographs-extension-b
- ## cjk-compatibility-ideographs-supplement
- ## tags
+ (..set [/segment.block-elements
+ (list /segment.geometric-shapes
+ /segment.miscellaneous-symbols
+ /segment.dingbats
+ /segment.miscellaneous-mathematical-symbols-a
+ /segment.supplemental-arrows-a
+ /segment.braille-patterns
+ /segment.supplemental-arrows-b
+ /segment.miscellaneous-mathematical-symbols-b
+ /segment.supplemental-mathematical-operators
+ /segment.miscellaneous-symbols-and-arrows
+ /segment.cjk-radicals-supplement
+ /segment.kangxi-radicals
+ /segment.ideographic-description-characters
+ /segment.cjk-symbols-and-punctuation
+ /segment.hiragana
+ /segment.katakana
+ /segment.bopomofo
+ /segment.hangul-compatibility-jamo
+ /segment.kanbun
+ /segment.bopomofo-extended
+ /segment.katakana-phonetic-extensions
+ /segment.enclosed-cjk-letters-and-months
+ /segment.cjk-compatibility
+ /segment.cjk-unified-ideographs-extension-a
+ /segment.yijing-hexagram-symbols
+ /segment.cjk-unified-ideographs
+ /segment.yi-syllables
+ /segment.yi-radicals
+ /segment.hangul-syllables
+ ## /segment.high-surrogates
+ ## /segment.high-private-use-surrogates
+ ## /segment.low-surrogates
+ ## /segment.private-use-area
+ /segment.cjk-compatibility-ideographs
+ /segment.alphabetic-presentation-forms
+ /segment.arabic-presentation-forms-a
+ /segment.variation-selectors
+ /segment.combining-half-marks
+ /segment.cjk-compatibility-forms
+ /segment.small-form-variants
+ /segment.arabic-presentation-forms-b
+ /segment.halfwidth-and-fullwidth-forms
+ /segment.specials
+ ## /segment.linear-b-syllabary
+ ## /segment.linear-b-ideograms
+ ## /segment.aegean-numbers
+ ## /segment.old-italic
+ ## /segment.gothic
+ ## /segment.ugaritic
+ ## /segment.deseret
+ ## /segment.shavian
+ ## /segment.osmanya
+ ## /segment.cypriot-syllabary
+ ## /segment.byzantine-musical-symbols
+ ## /segment.musical-symbols
+ ## /segment.tai-xuan-jing-symbols
+ ## /segment.mathematical-alphanumeric-symbols
+ ## /segment.cjk-unified-ideographs-extension-b
+ ## /segment.cjk-compatibility-ideographs-supplement
+ ## /segment.tags
)]))
(def: #export full
@@ -351,13 +176,13 @@
(def: #export (range set)
(-> Set [Char Char])
(let [tag (tree.tag (:representation set))]
- [(..start tag)
- (..end tag)]))
+ [(/segment.start tag)
+ (/segment.end tag)]))
(def: #export (member? set character)
(-> Set Char Bit)
(loop [tree (:representation set)]
- (if (..within? (tree.tag tree) character)
+ (if (/segment.within? (tree.tag tree) character)
(case (tree.root tree)
(0 #0 _)
true
@@ -366,15 +191,22 @@
(or (recur left)
(recur right)))
false)))
+
+ (structure: #export equivalence
+ (Equivalence Set)
+
+ (def: (= reference subject)
+ (set\= (set.from-list /segment.hash (tree.tags (:representation reference)))
+ (set.from-list /segment.hash (tree.tags (:representation subject))))))
)
(template [<name> <segments>]
[(def: #export <name>
(..set <segments>))]
- [ascii [basic-latin (list)]]
- [ascii/alpha [basic-latin/upper-alpha (list basic-latin/lower-alpha)]]
- [ascii/alpha-num [basic-latin/upper-alpha (list basic-latin/lower-alpha basic-latin/decimal)]]
- [ascii/upper-alpha [basic-latin/upper-alpha (list)]]
- [ascii/lower-alpha [basic-latin/lower-alpha (list)]]
+ [ascii [/segment.basic-latin (list)]]
+ [ascii/alpha [/segment.basic-latin/upper-alpha (list /segment.basic-latin/lower-alpha)]]
+ [ascii/alpha-num [/segment.basic-latin/upper-alpha (list /segment.basic-latin/lower-alpha /segment.basic-latin/decimal)]]
+ [ascii/upper-alpha [/segment.basic-latin/upper-alpha (list)]]
+ [ascii/lower-alpha [/segment.basic-latin/lower-alpha (list)]]
)
diff --git a/stdlib/source/lux/data/text/unicode/segment.lux b/stdlib/source/lux/data/text/unicode/segment.lux
new file mode 100644
index 000000000..a2507cc1e
--- /dev/null
+++ b/stdlib/source/lux/data/text/unicode/segment.lux
@@ -0,0 +1,204 @@
+(.module:
+ [lux #*
+ [abstract
+ [equivalence (#+ Equivalence)]
+ [hash (#+ Hash)]
+ [monoid (#+ Monoid)]
+ ["." interval (#+ Interval)]]
+ [data
+ [number (#+ hex)
+ ["n" nat ("#\." interval)]
+ ["." i64]]]
+ [type
+ abstract]]
+ [/// (#+ Char)])
+
+(abstract: #export Segment
+ (Interval Char)
+
+ (structure: #export monoid
+ (Monoid Segment)
+
+ (def: identity
+ (:abstraction (interval.between n.enum n\top n\bottom)))
+ (def: (compose left right)
+ (let [left (:representation left)
+ right (:representation right)]
+ (:abstraction
+ (interval.between n.enum
+ (n.min (\ left bottom)
+ (\ right bottom))
+ (n.max (\ left top)
+ (\ right top)))))))
+
+ (def: #export (segment start end)
+ (-> Char Char Segment)
+ (:abstraction (interval.between n.enum (n.min start end) (n.max start end))))
+
+ (template [<name> <slot>]
+ [(def: #export <name>
+ (-> Segment Char)
+ (|>> :representation (get@ <slot>)))]
+
+ [start #interval.bottom]
+ [end #interval.top]
+ )
+
+ (def: #export (size segment)
+ (-> Segment Nat)
+ (let [start (get@ #interval.bottom (:representation segment))
+ end (get@ #interval.top (:representation segment))]
+ (|> end (n.- start) inc)))
+
+ (def: #export (within? segment char)
+ (All [a] (-> Segment Char Bit))
+ (interval.within? (:representation segment) char))
+ )
+
+(structure: #export equivalence
+ (Equivalence Segment)
+
+ (def: (= reference subject)
+ (and (n.= (..start reference) (..start subject))
+ (n.= (..end reference) (..end subject)))))
+
+(structure: #export hash
+ (Hash Segment)
+
+ (def: &equivalence ..equivalence)
+ (def: (hash value)
+ (i64.or (i64.left-shift 32 (..start value))
+ (..end value))))
+
+(template [<name> <start> <end>]
+ [(def: #export <name> Segment (..segment (hex <start>) (hex <end>)))]
+
+ ## Normal segments
+ [basic-latin "0000" "007F"]
+ [latin-1-supplement "00A0" "00FF"]
+ [latin-extended-a "0100" "017F"]
+ [latin-extended-b "0180" "024F"]
+ [ipa-extensions "0250" "02AF"]
+ [spacing-modifier-letters "02B0" "02FF"]
+ [combining-diacritical-marks "0300" "036F"]
+ [greek-and-coptic "0370" "03FF"]
+ [cyrillic "0400" "04FF"]
+ [cyrillic-supplementary "0500" "052F"]
+ [armenian "0530" "058F"]
+ [hebrew "0590" "05FF"]
+ [arabic "0600" "06FF"]
+ [syriac "0700" "074F"]
+ [thaana "0780" "07BF"]
+ [devanagari "0900" "097F"]
+ [bengali "0980" "09FF"]
+ [gurmukhi "0A00" "0A7F"]
+ [gujarati "0A80" "0AFF"]
+ [oriya "0B00" "0B7F"]
+ [tamil "0B80" "0BFF"]
+ [telugu "0C00" "0C7F"]
+ [kannada "0C80" "0CFF"]
+ [malayalam "0D00" "0D7F"]
+ [sinhala "0D80" "0DFF"]
+ [thai "0E00" "0E7F"]
+ [lao "0E80" "0EFF"]
+ [tibetan "0F00" "0FFF"]
+ [myanmar "1000" "109F"]
+ [georgian "10A0" "10FF"]
+ [hangul-jamo "1100" "11FF"]
+ [ethiopic "1200" "137F"]
+ [cherokee "13A0" "13FF"]
+ [unified-canadian-aboriginal-syllabics "1400" "167F"]
+ [ogham "1680" "169F"]
+ [runic "16A0" "16FF"]
+ [tagalog "1700" "171F"]
+ [hanunoo "1720" "173F"]
+ [buhid "1740" "175F"]
+ [tagbanwa "1760" "177F"]
+ [khmer "1780" "17FF"]
+ [mongolian "1800" "18AF"]
+ [limbu "1900" "194F"]
+ [tai-le "1950" "197F"]
+ [khmer-symbols "19E0" "19FF"]
+ [phonetic-extensions "1D00" "1D7F"]
+ [latin-extended-additional "1E00" "1EFF"]
+ [greek-extended "1F00" "1FFF"]
+ [general-punctuation "2000" "206F"]
+ [superscripts-and-subscripts "2070" "209F"]
+ [currency-symbols "20A0" "20CF"]
+ [combining-diacritical-marks-for-symbols "20D0" "20FF"]
+ [letterlike-symbols "2100" "214F"]
+ [number-forms "2150" "218F"]
+ [arrows "2190" "21FF"]
+ [mathematical-operators "2200" "22FF"]
+ [miscellaneous-technical "2300" "23FF"]
+ [control-pictures "2400" "243F"]
+ [optical-character-recognition "2440" "245F"]
+ [enclosed-alphanumerics "2460" "24FF"]
+ [box-drawing "2500" "257F"]
+ [block-elements "2580" "259F"]
+ [geometric-shapes "25A0" "25FF"]
+ [miscellaneous-symbols "2600" "26FF"]
+ [dingbats "2700" "27BF"]
+ [miscellaneous-mathematical-symbols-a "27C0" "27EF"]
+ [supplemental-arrows-a "27F0" "27FF"]
+ [braille-patterns "2800" "28FF"]
+ [supplemental-arrows-b "2900" "297F"]
+ [miscellaneous-mathematical-symbols-b "2980" "29FF"]
+ [supplemental-mathematical-operators "2A00" "2AFF"]
+ [miscellaneous-symbols-and-arrows "2B00" "2BFF"]
+ [cjk-radicals-supplement "2E80" "2EFF"]
+ [kangxi-radicals "2F00" "2FDF"]
+ [ideographic-description-characters "2FF0" "2FFF"]
+ [cjk-symbols-and-punctuation "3000" "303F"]
+ [hiragana "3040" "309F"]
+ [katakana "30A0" "30FF"]
+ [bopomofo "3100" "312F"]
+ [hangul-compatibility-jamo "3130" "318F"]
+ [kanbun "3190" "319F"]
+ [bopomofo-extended "31A0" "31BF"]
+ [katakana-phonetic-extensions "31F0" "31FF"]
+ [enclosed-cjk-letters-and-months "3200" "32FF"]
+ [cjk-compatibility "3300" "33FF"]
+ [cjk-unified-ideographs-extension-a "3400" "4DBF"]
+ [yijing-hexagram-symbols "4DC0" "4DFF"]
+ [cjk-unified-ideographs "4E00" "9FFF"]
+ [yi-syllables "A000" "A48F"]
+ [yi-radicals "A490" "A4CF"]
+ [hangul-syllables "AC00" "D7AF"]
+ [high-surrogates "D800" "DB7F"]
+ [high-private-use-surrogates "DB80" "DBFF"]
+ [low-surrogates "DC00" "DFFF"]
+ [private-use-area "E000" "F8FF"]
+ [cjk-compatibility-ideographs "F900" "FAFF"]
+ [alphabetic-presentation-forms "FB00" "FB4F"]
+ [arabic-presentation-forms-a "FB50" "FDFF"]
+ [variation-selectors "FE00" "FE0F"]
+ [combining-half-marks "FE20" "FE2F"]
+ [cjk-compatibility-forms "FE30" "FE4F"]
+ [small-form-variants "FE50" "FE6F"]
+ [arabic-presentation-forms-b "FE70" "FEFF"]
+ [halfwidth-and-fullwidth-forms "FF00" "FFEF"]
+ [specials "FFF0" "FFFF"]
+ ## [linear-b-syllabary "10000" "1007F"]
+ ## [linear-b-ideograms "10080" "100FF"]
+ ## [aegean-numbers "10100" "1013F"]
+ ## [old-italic "10300" "1032F"]
+ ## [gothic "10330" "1034F"]
+ ## [ugaritic "10380" "1039F"]
+ ## [deseret "10400" "1044F"]
+ ## [shavian "10450" "1047F"]
+ ## [osmanya "10480" "104AF"]
+ ## [cypriot-syllabary "10800" "1083F"]
+ ## [byzantine-musical-symbols "1D000" "1D0FF"]
+ ## [musical-symbols "1D100" "1D1FF"]
+ ## [tai-xuan-jing-symbols "1D300" "1D35F"]
+ ## [mathematical-alphanumeric-symbols "1D400" "1D7FF"]
+ ## [cjk-unified-ideographs-extension-b "20000" "2A6DF"]
+ ## [cjk-compatibility-ideographs-supplement "2F800" "2FA1F"]
+ ## [tags "E0000" "E007F"]
+
+ ## Specialized segments
+ [basic-latin/decimal "0030" "0039"]
+ [basic-latin/upper-alpha "0041" "005A"]
+ [basic-latin/lower-alpha "0061" "007A"]
+ )