aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEduardo Julian2018-05-13 12:59:13 -0400
committerEduardo Julian2018-05-13 12:59:13 -0400
commitbb2ec42843ba0f13adafe1f2f4a7b2820fbcaafa (patch)
tree679a19d763813e6b61aa6dfc05eacd817d971f22
parente1a46ec687d5da858912cf3ee32e61a8d3bfc985 (diff)
- Added dedicated unicode support for text.
- Minor refactoring.
Diffstat (limited to '')
-rw-r--r--stdlib/source/lux/control/predicate.lux29
-rw-r--r--stdlib/source/lux/data/coll/queue/priority.lux48
-rw-r--r--stdlib/source/lux/data/coll/tree/finger.lux51
-rw-r--r--stdlib/source/lux/data/text/unicode.lux338
-rw-r--r--stdlib/source/lux/math/random.lux109
-rw-r--r--stdlib/test/test/lux.lux4
-rw-r--r--stdlib/test/test/lux/cli.lux2
-rw-r--r--stdlib/test/test/lux/data/format/json.lux12
-rw-r--r--stdlib/test/test/lux/data/format/xml.lux2
-rw-r--r--stdlib/test/test/lux/data/ident.lux2
-rw-r--r--stdlib/test/test/lux/data/text.lux14
-rw-r--r--stdlib/test/test/lux/data/text/lexer.lux4
-rw-r--r--stdlib/test/test/lux/data/text/regex.lux6
-rw-r--r--stdlib/test/test/lux/lang/syntax.lux6
-rw-r--r--stdlib/test/test/lux/lang/type.lux2
-rw-r--r--stdlib/test/test/lux/lang/type/check.lux2
-rw-r--r--stdlib/test/test/lux/macro/poly/equality.lux4
17 files changed, 466 insertions, 169 deletions
diff --git a/stdlib/source/lux/control/predicate.lux b/stdlib/source/lux/control/predicate.lux
index a113339b8..45ed4c984 100644
--- a/stdlib/source/lux/control/predicate.lux
+++ b/stdlib/source/lux/control/predicate.lux
@@ -1,21 +1,36 @@
(.module:
lux
- (lux (data (coll (set ["set" unordered #+ Set])))))
+ (lux (control [monoid #+ Monoid])
+ (data (coll (set ["set" unordered #+ Set])))
+ [function]))
(type: #export (Predicate a)
(-> a Bool))
(alias: Pred Predicate)
-(do-template [<name> <combo>]
- [(def: #export (<name> left right)
+(do-template [<identity-name> <identity-value> <composition-name> <composition>]
+ [(def: #export <identity-name>
+ (All [a] (Pred a))
+ (function.const <identity-value>))
+
+ (def: #export (<composition-name> left right)
(All [a] (-> (Pred a) (Pred a) (Pred a)))
(function (_ value)
- (<combo> (left value)
- (right value))))]
+ (<composition> (left value)
+ (right value))))]
+
+ [none false union or]
+ [all true intersection and]
+ )
+
+(do-template [<name> <identity> <composition>]
+ [(struct: #export <name> (All [a] (Monoid (Pred a)))
+ (def: identity <identity>)
+ (def: compose <composition>))]
- [union or]
- [intersection and]
+ [Union@Monoid none union]
+ [Intersection@Monoid all intersection]
)
(def: #export (complement predicate)
diff --git a/stdlib/source/lux/data/coll/queue/priority.lux b/stdlib/source/lux/data/coll/queue/priority.lux
index 1c9c51075..a1e765fc3 100644
--- a/stdlib/source/lux/data/coll/queue/priority.lux
+++ b/stdlib/source/lux/data/coll/queue/priority.lux
@@ -2,14 +2,14 @@
lux
(lux (control [equality #+ Eq]
[monad #+ do Monad])
- (data (coll (tree ["F" finger]))
+ (data (coll (tree [finger #+ Tree]))
[number "nat/" Interval<Nat>]
[maybe])))
(type: #export Priority Nat)
(type: #export (Queue a)
- (Maybe (F.Fingers Priority a)))
+ (Maybe (Tree Priority a)))
(def: #export max Priority nat/top)
(def: #export min Priority nat/bottom)
@@ -22,7 +22,7 @@
(All [a] (-> (Queue a) (Maybe a)))
(do maybe.Monad<Maybe>
[fingers queue]
- (wrap (maybe.assume (F.search (n/= (F.tag fingers)) fingers)))))
+ (wrap (maybe.assume (finger.search (n/= (finger.tag fingers)) fingers)))))
(def: #export (size queue)
(All [a] (-> (Queue a) Nat))
@@ -31,12 +31,12 @@
+0
(#.Some fingers)
- (loop [node (get@ #F.tree fingers)]
+ (loop [node (get@ #finger.node fingers)]
(case node
- (#F.Leaf _ _)
+ (#finger.Leaf _ _)
+1
- (#F.Branch _ left right)
+ (#finger.Branch _ left right)
(n/+ (recur left) (recur right))))))
(def: #export (member? Eq<a> queue member)
@@ -46,12 +46,12 @@
false
(#.Some fingers)
- (loop [node (get@ #F.tree fingers)]
+ (loop [node (get@ #finger.node fingers)]
(case node
- (#F.Leaf _ reference)
+ (#finger.Leaf _ reference)
(:: Eq<a> = reference member)
- (#F.Branch _ left right)
+ (#finger.Branch _ left right)
(or (recur left)
(recur right))))))
@@ -59,44 +59,44 @@
(All [a] (-> (Queue a) (Queue a)))
(do maybe.Monad<Maybe>
[fingers queue
- #let [highest-priority (F.tag fingers)]
- node' (loop [node (get@ #F.tree fingers)]
+ #let [highest-priority (finger.tag fingers)]
+ node' (loop [node (get@ #finger.node fingers)]
(case node
- (#F.Leaf priority reference)
+ (#finger.Leaf priority reference)
(if (n/= highest-priority priority)
#.None
(#.Some node))
- (#F.Branch priority left right)
- (if (n/= highest-priority (F.tag (set@ #F.tree left fingers)))
+ (#finger.Branch priority left right)
+ (if (n/= highest-priority (finger.tag (set@ #finger.node left fingers)))
(case (recur left)
#.None
(#.Some right)
(#.Some =left)
- (|> (F.branch (set@ #F.tree =left fingers)
- (set@ #F.tree right fingers))
- (get@ #F.tree)
+ (|> (finger.branch (set@ #finger.node =left fingers)
+ (set@ #finger.node right fingers))
+ (get@ #finger.node)
#.Some))
(case (recur right)
#.None
(#.Some left)
(#.Some =right)
- (|> (F.branch (set@ #F.tree left fingers)
- (set@ #F.tree =right fingers))
- (get@ #F.tree)
+ (|> (finger.branch (set@ #finger.node left fingers)
+ (set@ #finger.node =right fingers))
+ (get@ #finger.node)
#.Some))
)))]
- (wrap (set@ #F.tree node' fingers))))
+ (wrap (set@ #finger.node node' fingers))))
(def: #export (push priority value queue)
(All [a] (-> Priority a (Queue a) (Queue a)))
- (let [addition {#F.monoid number.Max@Monoid<Nat>
- #F.tree (#F.Leaf priority value)}]
+ (let [addition {#finger.monoid number.Max@Monoid<Nat>
+ #finger.node (#finger.Leaf priority value)}]
(case queue
#.None
(#.Some addition)
(#.Some fingers)
- (#.Some (F.branch fingers addition)))))
+ (#.Some (finger.branch fingers addition)))))
diff --git a/stdlib/source/lux/data/coll/tree/finger.lux b/stdlib/source/lux/data/coll/tree/finger.lux
index 3cf904c3f..ea1ff0eee 100644
--- a/stdlib/source/lux/data/coll/tree/finger.lux
+++ b/stdlib/source/lux/data/coll/tree/finger.lux
@@ -7,46 +7,55 @@
(#Leaf m a)
(#Branch m (Node m a) (Node m a)))
-(type: #export (Fingers m a)
+(type: #export (Tree m a)
{#monoid (m.Monoid m)
- #tree (Node m a)})
+ #node (Node m a)})
-(def: #export (tag fingers)
- (All [m a] (-> (Fingers m a) m))
- (case (get@ #tree fingers)
+(def: #export (tag tree)
+ (All [m a] (-> (Tree m a) m))
+ (case (get@ #node tree)
(^or (#Leaf tag _) (#Branch tag _ _))
tag))
-(def: #export (value fingers)
- (All [m a] (-> (Fingers m a) a))
- (case (get@ #tree fingers)
+(def: #export (value tree)
+ (All [m a] (-> (Tree m a) a))
+ (case (get@ #node tree)
(#Leaf tag value)
value
(#Branch tag left right)
- (value (set@ #tree left fingers))))
+ (value (set@ #node left tree))))
(def: #export (branch left right)
- (All [m a] (-> (Fingers m a) (Fingers m a) (Fingers m a)))
+ (All [m a] (-> (Tree m a) (Tree m a) (Tree m a)))
(let [Monoid<m> (get@ #monoid right)]
{#monoid Monoid<m>
- #tree (#Branch (:: Monoid<m> compose (tag left) (tag right))
- (get@ #tree left)
- (get@ #tree right))}))
-
-(def: #export (search pred fingers)
- (All [m a] (-> (-> m Bool) (Fingers m a) (Maybe a)))
- (let [tag/compose (get@ [#monoid #m.compose] fingers)]
- (if (pred (tag fingers))
- (loop [_tag (get@ [#monoid #m.identity] fingers)
- _node (get@ #tree fingers)]
+ #node (#Branch (:: Monoid<m> compose (tag left) (tag right))
+ (get@ #node left)
+ (get@ #node right))}))
+
+(def: #export (search pred tree)
+ (All [m a] (-> (-> m Bool) (Tree m a) (Maybe a)))
+ (let [tag/compose (get@ [#monoid #m.compose] tree)]
+ (if (pred (tag tree))
+ (loop [_tag (get@ [#monoid #m.identity] tree)
+ _node (get@ #node tree)]
(case _node
(#Leaf _ value)
(#.Some value)
(#Branch _ left right)
- (let [shifted-tag (tag/compose _tag (tag (set@ #tree left fingers)))]
+ (let [shifted-tag (tag/compose _tag (tag (set@ #node left tree)))]
(if (pred shifted-tag)
(recur _tag left)
(recur shifted-tag right)))))
#.None)))
+
+(def: #export (found? pred tree)
+ (All [m a] (-> (-> m Bool) (Tree m a) Bool))
+ (case (search pred tree)
+ (#.Some _)
+ true
+
+ #.None
+ false))
diff --git a/stdlib/source/lux/data/text/unicode.lux b/stdlib/source/lux/data/text/unicode.lux
new file mode 100644
index 000000000..7b1eb0fa9
--- /dev/null
+++ b/stdlib/source/lux/data/text/unicode.lux
@@ -0,0 +1,338 @@
+(.module:
+ lux
+ (lux (control [interval #+ Interval]
+ [monoid #+ Monoid])
+ (data [number #+ hex "nat/" Interval<Nat>]
+ (coll [list]
+ (tree [finger #+ Tree])))
+ (type abstract)))
+
+(type: #export Char Nat)
+
+(abstract: #export Segment
+ {}
+ (Interval Char)
+
+ (def: empty (@abstraction (interval.between number.Enum<Nat> nat/top nat/bottom)))
+
+ (struct: _ (Monoid Segment)
+ (def: identity ..empty)
+ (def: (compose left right)
+ (let [left (@representation left)
+ right (@representation right)]
+ (@abstraction
+ (interval.between number.Enum<Nat>
+ (n/min (:: left bottom)
+ (:: right bottom))
+ (n/max (:: left top)
+ (:: right top)))))))
+
+ (def: #export (segment start end)
+ (-> Char Char Segment)
+ (@abstraction (interval.between number.Enum<Nat> (n/min start end) (n/max start end))))
+
+ (do-template [<name> <slot>]
+ [(def: #export <name>
+ (-> Segment Char)
+ (|>> @representation (get@ <slot>)))]
+
+ [start #interval.bottom]
+ [end #interval.top]
+ )
+
+ (def: #export (size segment)
+ (-> Segment Nat)
+ (let [start (get@ #interval.bottom (@representation segment))
+ end (get@ #interval.top (@representation segment))]
+ (|> end (n/- start) inc)))
+
+ (def: #export (within? segment char)
+ (All [a] (-> Segment Char Bool))
+ (interval.within? (@representation segment) char))
+ )
+
+(do-template [<name> <start> <end>]
+ [(def: #export <name> Segment (..segment (hex <start>) (hex <end>)))]
+
+ [basic-latin "+0000" "+007F"]
+ [latin-1-supplement "+00A0" "+00FF"]
+ [latin-extended-a "+0100" "+017F"]
+ [latin-extended-b "+0180" "+024F"]
+ [ipa-extensions "+0250" "+02AF"]
+ [spacing-modifier-letters "+02B0" "+02FF"]
+ [combining-diacritical-marks "+0300" "+036F"]
+ [greek-and-coptic "+0370" "+03FF"]
+ [cyrillic "+0400" "+04FF"]
+ [cyrillic-supplementary "+0500" "+052F"]
+ [armenian "+0530" "+058F"]
+ [hebrew "+0590" "+05FF"]
+ [arabic "+0600" "+06FF"]
+ [syriac "+0700" "+074F"]
+ [thaana "+0780" "+07BF"]
+ [devanagari "+0900" "+097F"]
+ [bengali "+0980" "+09FF"]
+ [gurmukhi "+0A00" "+0A7F"]
+ [gujarati "+0A80" "+0AFF"]
+ [oriya "+0B00" "+0B7F"]
+ [tamil "+0B80" "+0BFF"]
+ [telugu "+0C00" "+0C7F"]
+ [kannada "+0C80" "+0CFF"]
+ [malayalam "+0D00" "+0D7F"]
+ [sinhala "+0D80" "+0DFF"]
+ [thai "+0E00" "+0E7F"]
+ [lao "+0E80" "+0EFF"]
+ [tibetan "+0F00" "+0FFF"]
+ [myanmar "+1000" "+109F"]
+ [georgian "+10A0" "+10FF"]
+ [hangul-jamo "+1100" "+11FF"]
+ [ethiopic "+1200" "+137F"]
+ [cherokee "+13A0" "+13FF"]
+ [unified-canadian-aboriginal-syllabics "+1400" "+167F"]
+ [ogham "+1680" "+169F"]
+ [runic "+16A0" "+16FF"]
+ [tagalog "+1700" "+171F"]
+ [hanunoo "+1720" "+173F"]
+ [buhid "+1740" "+175F"]
+ [tagbanwa "+1760" "+177F"]
+ [khmer "+1780" "+17FF"]
+ [mongolian "+1800" "+18AF"]
+ [limbu "+1900" "+194F"]
+ [tai-le "+1950" "+197F"]
+ [khmer-symbols "+19E0" "+19FF"]
+ [phonetic-extensions "+1D00" "+1D7F"]
+ [latin-extended-additional "+1E00" "+1EFF"]
+ [greek-extended "+1F00" "+1FFF"]
+ [general-punctuation "+2000" "+206F"]
+ [superscripts-and-subscripts "+2070" "+209F"]
+ [currency-symbols "+20A0" "+20CF"]
+ [combining-diacritical-marks-for-symbols "+20D0" "+20FF"]
+ [letterlike-symbols "+2100" "+214F"]
+ [number-forms "+2150" "+218F"]
+ [arrows "+2190" "+21FF"]
+ [mathematical-operators "+2200" "+22FF"]
+ [miscellaneous-technical "+2300" "+23FF"]
+ [control-pictures "+2400" "+243F"]
+ [optical-character-recognition "+2440" "+245F"]
+ [enclosed-alphanumerics "+2460" "+24FF"]
+ [box-drawing "+2500" "+257F"]
+ [block-elements "+2580" "+259F"]
+ [geometric-shapes "+25A0" "+25FF"]
+ [miscellaneous-symbols "+2600" "+26FF"]
+ [dingbats "+2700" "+27BF"]
+ [miscellaneous-mathematical-symbols-a "+27C0" "+27EF"]
+ [supplemental-arrows-a "+27F0" "+27FF"]
+ [braille-patterns "+2800" "+28FF"]
+ [supplemental-arrows-b "+2900" "+297F"]
+ [miscellaneous-mathematical-symbols-b "+2980" "+29FF"]
+ [supplemental-mathematical-operators "+2A00" "+2AFF"]
+ [miscellaneous-symbols-and-arrows "+2B00" "+2BFF"]
+ [cjk-radicals-supplement "+2E80" "+2EFF"]
+ [kangxi-radicals "+2F00" "+2FDF"]
+ [ideographic-description-characters "+2FF0" "+2FFF"]
+ [cjk-symbols-and-punctuation "+3000" "+303F"]
+ [hiragana "+3040" "+309F"]
+ [katakana "+30A0" "+30FF"]
+ [bopomofo "+3100" "+312F"]
+ [hangul-compatibility-jamo "+3130" "+318F"]
+ [kanbun "+3190" "+319F"]
+ [bopomofo-extended "+31A0" "+31BF"]
+ [katakana-phonetic-extensions "+31F0" "+31FF"]
+ [enclosed-cjk-letters-and-months "+3200" "+32FF"]
+ [cjk-compatibility "+3300" "+33FF"]
+ [cjk-unified-ideographs-extension-a "+3400" "+4DBF"]
+ [yijing-hexagram-symbols "+4DC0" "+4DFF"]
+ [cjk-unified-ideographs "+4E00" "+9FFF"]
+ [yi-syllables "+A000" "+A48F"]
+ [yi-radicals "+A490" "+A4CF"]
+ [hangul-syllables "+AC00" "+D7AF"]
+ [high-surrogates "+D800" "+DB7F"]
+ [high-private-use-surrogates "+DB80" "+DBFF"]
+ [low-surrogates "+DC00" "+DFFF"]
+ [private-use-area "+E000" "+F8FF"]
+ [cjk-compatibility-ideographs "+F900" "+FAFF"]
+ [alphabetic-presentation-forms "+FB00" "+FB4F"]
+ [arabic-presentation-forms-a "+FB50" "+FDFF"]
+ [variation-selectors "+FE00" "+FE0F"]
+ [combining-half-marks "+FE20" "+FE2F"]
+ [cjk-compatibility-forms "+FE30" "+FE4F"]
+ [small-form-variants "+FE50" "+FE6F"]
+ [arabic-presentation-forms-b "+FE70" "+FEFF"]
+ [halfwidth-and-fullwidth-forms "+FF00" "+FFEF"]
+ [specials "+FFF0" "+FFFF"]
+ [linear-b-syllabary "+10000" "+1007F"]
+ [linear-b-ideograms "+10080" "+100FF"]
+ [aegean-numbers "+10100" "+1013F"]
+ [old-italic "+10300" "+1032F"]
+ [gothic "+10330" "+1034F"]
+ [ugaritic "+10380" "+1039F"]
+ [deseret "+10400" "+1044F"]
+ [shavian "+10450" "+1047F"]
+ [osmanya "+10480" "+104AF"]
+ [cypriot-syllabary "+10800" "+1083F"]
+ [byzantine-musical-symbols "+1D000" "+1D0FF"]
+ [musical-symbols "+1D100" "+1D1FF"]
+ [tai-xuan-jing-symbols "+1D300" "+1D35F"]
+ [mathematical-alphanumeric-symbols "+1D400" "+1D7FF"]
+ [cjk-unified-ideographs-extension-b "+20000" "+2A6DF"]
+ [cjk-compatibility-ideographs-supplement "+2F800" "+2FA1F"]
+ [tags "+E0000" "+E007F"]
+ )
+
+(type: #export Set (Tree Segment []))
+
+(def: (singleton segment)
+ (-> Segment Set)
+ {#finger.monoid Monoid<Segment>
+ #finger.node (#finger.Leaf segment [])})
+
+(def: #export (set segments)
+ (-> (List Segment) Set)
+ (case segments
+ (^ (list))
+ (..singleton (:: Monoid<Segment> identity))
+
+ (^ (list singleton))
+ (..singleton singleton)
+
+ (^ (list left right))
+ (..singleton (:: Monoid<Segment> compose left right))
+
+ _
+ (let [[sides extra] (n//% +2 (list.size segments))
+ [left+ right+] (list.split (n/+ sides extra) segments)]
+ (finger.branch (set left+)
+ (set right+)))))
+
+(def: half/0
+ (List Segment)
+ (list basic-latin
+ latin-1-supplement
+ latin-extended-a
+ latin-extended-b
+ ipa-extensions
+ spacing-modifier-letters
+ combining-diacritical-marks
+ greek-and-coptic
+ cyrillic
+ cyrillic-supplementary
+ armenian
+ hebrew
+ arabic
+ syriac
+ thaana
+ devanagari
+ bengali
+ gurmukhi
+ gujarati
+ oriya
+ tamil
+ telugu
+ kannada
+ malayalam
+ sinhala
+ thai
+ lao
+ tibetan
+ myanmar
+ georgian
+ hangul-jamo
+ ethiopic
+ cherokee
+ unified-canadian-aboriginal-syllabics
+ ogham
+ runic
+ tagalog
+ hanunoo
+ buhid
+ tagbanwa
+ khmer
+ mongolian
+ limbu
+ tai-le
+ khmer-symbols
+ phonetic-extensions
+ latin-extended-additional
+ greek-extended
+ general-punctuation
+ superscripts-and-subscripts
+ currency-symbols
+ combining-diacritical-marks-for-symbols
+ letterlike-symbols
+ number-forms
+ arrows
+ mathematical-operators
+ miscellaneous-technical
+ control-pictures
+ optical-character-recognition
+ enclosed-alphanumerics
+ box-drawing
+ ))
+
+(def: half/1
+ (List Segment)
+ (list block-elements
+ geometric-shapes
+ miscellaneous-symbols
+ dingbats
+ miscellaneous-mathematical-symbols-a
+ supplemental-arrows-a
+ braille-patterns
+ supplemental-arrows-b
+ miscellaneous-mathematical-symbols-b
+ supplemental-mathematical-operators
+ miscellaneous-symbols-and-arrows
+ cjk-radicals-supplement
+ kangxi-radicals
+ ideographic-description-characters
+ cjk-symbols-and-punctuation
+ hiragana
+ katakana
+ bopomofo
+ hangul-compatibility-jamo
+ kanbun
+ bopomofo-extended
+ katakana-phonetic-extensions
+ enclosed-cjk-letters-and-months
+ cjk-compatibility
+ cjk-unified-ideographs-extension-a
+ yijing-hexagram-symbols
+ cjk-unified-ideographs
+ yi-syllables
+ yi-radicals
+ hangul-syllables
+ high-surrogates
+ high-private-use-surrogates
+ low-surrogates
+ private-use-area
+ cjk-compatibility-ideographs
+ alphabetic-presentation-forms
+ arabic-presentation-forms-a
+ variation-selectors
+ combining-half-marks
+ cjk-compatibility-forms
+ small-form-variants
+ arabic-presentation-forms-b
+ halfwidth-and-fullwidth-forms
+ specials
+ linear-b-syllabary
+ linear-b-ideograms
+ aegean-numbers
+ old-italic
+ gothic
+ ugaritic
+ deseret
+ shavian
+ osmanya
+ cypriot-syllabary
+ byzantine-musical-symbols
+ musical-symbols
+ tai-xuan-jing-symbols
+ mathematical-alphanumeric-symbols
+ cjk-unified-ideographs-extension-b
+ cjk-compatibility-ideographs-supplement
+ tags
+ ))
+
+(def: #export full
+ Set
+ (finger.branch (set half/0) (set half/1)))
diff --git a/stdlib/source/lux/math/random.lux b/stdlib/source/lux/math/random.lux
index 484574c82..cdb65971c 100644
--- a/stdlib/source/lux/math/random.lux
+++ b/stdlib/source/lux/math/random.lux
@@ -1,11 +1,12 @@
(.module: {#.doc "Pseudo-random number generation (PRNG) algorithms."}
- [lux #- list i64 nat int deg]
+ [lux #- list i64 nat int deg char]
(lux (control [functor #+ Functor]
[apply #+ Apply]
[monad #+ do Monad]
hash)
(data [bit]
[text "text/" Monoid<Text>]
+ (text [unicode #+ Char Segment])
[product]
[maybe]
[number #+ hex]
@@ -17,7 +18,8 @@
[queue #+ Queue]
(set ["set" unordered #+ Set])
[stack #+ Stack]
- [sequence #+ Sequence]))
+ [sequence #+ Sequence]
+ (tree [finger #+ Tree])))
))
(type: #export #rec PRNG
@@ -95,99 +97,32 @@
(Random Frac)
(:: Monad<Random> map number.bits-to-frac nat))
-(def: #export (text' char-gen size)
- (-> (Random Nat) Nat (Random Text))
+(def: #export (char set)
+ (-> unicode.Set (Random Char))
+ (let [summary (finger.tag set)
+ start (unicode.start summary)
+ size (unicode.size summary)
+ in-range (: (-> Char Char)
+ (|>> (n/% size) (n/+ start)))]
+ (|> nat
+ (:: Monad<Random> map in-range)
+ (..filter (function (_ char)
+ (finger.found? (function (_ segment)
+ (unicode.within? segment char))
+ set))))))
+
+(def: #export (text char-gen size)
+ (-> (Random Char) Nat (Random Text))
(if (n/= +0 size)
(:: Monad<Random> wrap "")
(do Monad<Random>
[x char-gen
- xs (text' char-gen (dec size))]
+ xs (text char-gen (dec size))]
(wrap (text/compose (text.from-code x) xs)))))
-(type: Region [Nat Nat])
-
-(do-template [<name> <from> <to>]
- [(def: <name> Region [(hex <from>) (hex <to>)])]
-
- [Thaana "+0780" "+07BF"]
- [Khmer-Symbols "+19E0" "+19FF"]
- [Phonetic-Extensions "+1D00" "+1D7F"]
- [Hangul-Syllables "+AC00" "+D7AF"]
-
- [Cypriot-Syllabary "+10800" "+1083F"]
- [Tai-Xuan-Jing-Symbols "+1D300" "+1D35F"]
- [Mathematical-Alphanumeric-Symbols "+1D400" "+1D7FF"]
- [CJK-Unified-Ideographs-Extension-B "+20000" "+2A6DF"]
- [CJK-Compatibility-Ideographs-Supplement "+2F800" "+2FA1F"]
- )
-
-(def: (within? [from to] char)
- (-> Region Nat Bool)
- (and (n/>= from char)
- (n/<= to char)))
-
-(def: unicode-ceiling (|> CJK-Compatibility-Ideographs-Supplement product.right inc))
-
(def: #export unicode
- (Random Nat)
- (|> ..nat
- (:: Monad<Random> map (n/% unicode-ceiling))
- (..filter (function (_ raw)
- ## From "Basic Latin" to "Syriac"
- (or (n/<= (hex "+074F") raw)
- (within? Thaana raw)
- ## From "Devanagari" to "Ethiopic"
- (and (n/>= (hex "+0900") raw)
- (n/<= (hex "+137F") raw))
- ## From "Cherokee" to "Mongolian"
- (and (n/>= (hex "+13A0") raw)
- (n/<= (hex "+18AF") raw))
- ## From "Limbu" to "Tai Le"
- (and (n/>= (hex "+1900") raw)
- (n/<= (hex "+197F") raw))
- (within? Khmer-Symbols raw)
- (within? Phonetic-Extensions raw)
- ## From "Latin Extended Additional" to "Miscellaneous Symbols and Arrows"
- (and (n/>= (hex "+1E00") raw)
- (n/<= (hex "+2BFF") raw))
- ## From "CJK Radicals Supplement" to "Kangxi Radicals"
- (and (n/>= (hex "+2E80") raw)
- (n/<= (hex "+2FDF") raw))
- ## From "Ideographic Description Characters" to "Bopomofo Extended"
- (and (n/>= (hex "+2FF0") raw)
- (n/<= (hex "+31BF") raw))
- ## From "Katakana Phonetic Extensions" to "CJK Unified Ideographs"
- (and (n/>= (hex "+31F0") raw)
- (n/<= (hex "+9FAF") raw))
- ## From "Yi Syllables" to "Yi Radicals"
- (and (n/>= (hex "+A000") raw)
- (n/<= (hex "+A4CF") raw))
- (within? Hangul-Syllables raw)
- ## From "CJK Compatibility Ideographs" to "Arabic Presentation Forms-A"
- (and (n/>= (hex "+F900") raw)
- (n/<= (hex "+FDFF") raw))
- ## From "Combining Half Marks" to "Halfwidth and Fullwidth Forms"
- (and (n/>= (hex "+FE20") raw)
- (n/<= (hex "+FFEF") raw))
- ## From "Linear B Syllabary" to "Aegean Numbers"
- (and (n/>= (hex "+10000") raw)
- (n/<= (hex "+1013F") raw))
- ## From "Old Italic" to "Osmanya"
- (and (n/>= (hex "+10300") raw)
- (n/<= (hex "+104AF") raw))
- (within? Cypriot-Syllabary raw)
- ## From "Byzantine Musical Symbols" to "Musical Symbols"
- (and (n/>= (hex "+1D000") raw)
- (n/<= (hex "+1D1FF") raw))
- (within? Tai-Xuan-Jing-Symbols raw)
- (within? Mathematical-Alphanumeric-Symbols raw)
- (within? CJK-Unified-Ideographs-Extension-B raw)
- (within? CJK-Compatibility-Ideographs-Supplement raw)
- )))))
-
-(def: #export (text size)
(-> Nat (Random Text))
- (text' unicode size))
+ (text (char unicode.full)))
(do-template [<name> <type> <ctor> <gen>]
[(def: #export <name>
diff --git a/stdlib/test/test/lux.lux b/stdlib/test/test/lux.lux
index 1f0a6e115..3c731ebc4 100644
--- a/stdlib/test/test/lux.lux
+++ b/stdlib/test/test/lux.lux
@@ -15,8 +15,8 @@
(<| (times +100)
(do @
[size (|> r.nat (:: @ map (|>> (n/% +100) (n/max +10))))
- x (r.text size)
- y (r.text size)]
+ x (r.unicode size)
+ y (r.unicode size)]
($_ seq
(test "Every value is identical to itself, and the 'id' function doesn't change values in any way."
(and (is? x x)
diff --git a/stdlib/test/test/lux/cli.lux b/stdlib/test/test/lux/cli.lux
index b5f251c57..4aed244b2 100644
--- a/stdlib/test/test/lux/cli.lux
+++ b/stdlib/test/test/lux/cli.lux
@@ -22,7 +22,7 @@
#let [(^open "Nat/") number.Codec<Text,Nat>
gen-arg (:: @ map Nat/encode r.nat)]
yes gen-arg
- #let [gen-ignore (|> (r.text +5) (r.filter (|>> (text/= yes) not)))]
+ #let [gen-ignore (|> (r.unicode +5) (r.filter (|>> (text/= yes) not)))]
no gen-ignore
pre-ignore (r.list +5 gen-ignore)
post-ignore (r.list +5 gen-ignore)]
diff --git a/stdlib/test/test/lux/data/format/json.lux b/stdlib/test/test/lux/data/format/json.lux
index 43b0851c0..a8a117a04 100644
--- a/stdlib/test/test/lux/data/format/json.lux
+++ b/stdlib/test/test/lux/data/format/json.lux
@@ -42,9 +42,9 @@
(:: @ wrap [])
r.bool
(|> r.frac (:: @ map (f/* 1_000_000.0)))
- (r.text size)
+ (r.unicode size)
(r.sequence size gen-json)
- (r.dict text.Hash<Text> size (r.text size) gen-json)
+ (r.dict text.Hash<Text> size (r.unicode size) gen-json)
)))))
(context: "JSON"
@@ -110,12 +110,12 @@
($_ r.seq
r.bool
r.frac
- (r.text size)
+ (r.unicode size)
(r.maybe r.frac)
(r.list size r.frac)
- (r.dict text.Hash<Text> size (r.text size) r.frac)
- ## ($_ r.alt r.bool (r.text size) r.frac)
- ## ($_ r.seq r.bool r.frac (r.text size))
+ (r.dict text.Hash<Text> size (r.unicode size) r.frac)
+ ## ($_ r.alt r.bool (r.unicode size) r.frac)
+ ## ($_ r.seq r.bool r.frac (r.unicode size))
gen-recursive
## _instant.instant
_duration.duration
diff --git a/stdlib/test/test/lux/data/format/xml.lux b/stdlib/test/test/lux/data/format/xml.lux
index d70b911dc..b7377ab64 100644
--- a/stdlib/test/test/lux/data/format/xml.lux
+++ b/stdlib/test/test/lux/data/format/xml.lux
@@ -37,7 +37,7 @@
(-> Nat Nat (r.Random Text))
(do r.Monad<Random>
[size (size^ bottom top)]
- (r.text' xml-char^ size)))
+ (r.text xml-char^ size)))
(def: xml-identifier^
(r.Random Ident)
diff --git a/stdlib/test/test/lux/data/ident.lux b/stdlib/test/test/lux/data/ident.lux
index d6732619e..5e39bad7f 100644
--- a/stdlib/test/test/lux/data/ident.lux
+++ b/stdlib/test/test/lux/data/ident.lux
@@ -11,7 +11,7 @@
(def: (gen-part size)
(-> Nat (r.Random Text))
- (|> (r.text size) (r.filter (|>> (text.contains? ".") not))))
+ (|> (r.unicode size) (r.filter (|>> (text.contains? ".") not))))
(context: "Idents"
(<| (times +100)
diff --git a/stdlib/test/test/lux/data/text.lux b/stdlib/test/test/lux/data/text.lux
index 2430c9919..99c750f74 100644
--- a/stdlib/test/test/lux/data/text.lux
+++ b/stdlib/test/test/lux/data/text.lux
@@ -14,7 +14,7 @@
(<| (times +100)
(do @
[size (:: @ map (n/% +100) r.nat)
- sample (r.text size)]
+ sample (r.unicode size)]
(test "" (or (and (n/= +0 size)
(&.empty? sample))
(n/= size (&.size sample)))))))
@@ -29,7 +29,7 @@
(do @
[size bounded-size
idx (:: @ map (n/% size) r.nat)
- sample (r.text size)]
+ sample (r.unicode size)]
(test "" (|> sample
(&.nth idx)
(case> (^multi (#.Some char)
@@ -58,8 +58,8 @@
(do @
[sizeL bounded-size
sizeR bounded-size
- sampleL (r.text sizeL)
- sampleR (r.text sizeR)
+ sampleL (r.unicode sizeL)
+ sampleR (r.unicode sizeR)
#let [sample (&.concat (list sampleL sampleR))
fake-sample (&.join-with " " (list sampleL sampleR))
dup-sample (&.join-with "" (list sampleL sampleR))
@@ -105,9 +105,9 @@
## can make text replacement work improperly.
## Because of that, I restrict the charset.
normal-char-gen (|> r.nat (:: @ map (|>> (n/% +128) (n/max +1))))]
- sep1 (r.text' normal-char-gen +1)
- sep2 (r.text' normal-char-gen +1)
- #let [part-gen (|> (r.text' normal-char-gen sizeP)
+ sep1 (r.text normal-char-gen +1)
+ sep2 (r.text normal-char-gen +1)
+ #let [part-gen (|> (r.text normal-char-gen sizeP)
(r.filter (|>> (&.contains? sep1) not)))]
parts (r.list sizeL part-gen)
#let [sample1 (&.concat (list.interpose sep1 parts))
diff --git a/stdlib/test/test/lux/data/text/lexer.lux b/stdlib/test/test/lux/data/text/lexer.lux
index 8706e2f2e..58e142a98 100644
--- a/stdlib/test/test/lux/data/text/lexer.lux
+++ b/stdlib/test/test/lux/data/text/lexer.lux
@@ -73,8 +73,8 @@
(<| (times +100)
(do @
[size (|> r.nat (:: @ map (|>> (n/% +100) (n/max +10))))
- sample (r.text size)
- non-sample (|> (r.text size)
+ sample (r.unicode size)
+ non-sample (|> (r.unicode size)
(r.filter (|>> (text/= sample) not)))]
($_ seq
(test "Can find literal text fragments."
diff --git a/stdlib/test/test/lux/data/text/regex.lux b/stdlib/test/test/lux/data/text/regex.lux
index 7323aeb79..7bc3082a1 100644
--- a/stdlib/test/test/lux/data/text/regex.lux
+++ b/stdlib/test/test/lux/data/text/regex.lux
@@ -269,9 +269,9 @@
(context: "Pattern-matching"
(<| (times +100)
(do @
- [sample1 (r.text +3)
- sample2 (r.text +3)
- sample3 (r.text +4)]
+ [sample1 (r.unicode +3)
+ sample2 (r.unicode +3)
+ sample3 (r.unicode +4)]
(case (format sample1 "-" sample2 "-" sample3)
(&.^regex "(.{3})-(.{3})-(.{4})"
[_ match1 match2 match3])
diff --git a/stdlib/test/test/lux/lang/syntax.lux b/stdlib/test/test/lux/lang/syntax.lux
index 01679b27a..0645bfe25 100644
--- a/stdlib/test/test/lux/lang/syntax.lux
+++ b/stdlib/test/test/lux/lang/syntax.lux
@@ -33,7 +33,7 @@
(not (text.contains? (text.from-code sample)
invalid-range)))))]
size (|> r.nat (:: @ map (|>> (n/% +20) (n/max +1))))]
- (r.text' char-gen size)))
+ (r.text char-gen size)))
(def: ident^
(r.Random Ident)
@@ -52,7 +52,7 @@
($_ r.either
(do r.Monad<Random>
[size (|> r.nat (r/map (n/% +20)))]
- (|> (r.text size) (r/map code.text)))
+ (|> (r.unicode size) (r/map code.text)))
(|> ident^ (r/map code.symbol))
(|> ident^ (r/map code.tag))))
simple^ (: (r.Random Code)
@@ -152,7 +152,7 @@
(n/= (char ")") value))))))]
(do r.Monad<Random>
[size (|> r.nat (r/map (n/% +20)))]
- (r.text' char-gen size))))
+ (r.text char-gen size))))
(def: comment^
(r.Random Text)
diff --git a/stdlib/test/test/lux/lang/type.lux b/stdlib/test/test/lux/lang/type.lux
index 0c7fa84be..be888d321 100644
--- a/stdlib/test/test/lux/lang/type.lux
+++ b/stdlib/test/test/lux/lang/type.lux
@@ -17,7 +17,7 @@
(r.Random Text)
(do r.Monad<Random>
[size (|> r.nat (:: @ map (n/% +10)))]
- (r.text size)))
+ (r.unicode size)))
(def: gen-ident
(r.Random Ident)
diff --git a/stdlib/test/test/lux/lang/type/check.lux b/stdlib/test/test/lux/lang/type/check.lux
index 517b2561b..b384ad2ef 100644
--- a/stdlib/test/test/lux/lang/type/check.lux
+++ b/stdlib/test/test/lux/lang/type/check.lux
@@ -20,7 +20,7 @@
(r.Random Text)
(do r.Monad<Random>
[size (|> r.nat (:: @ map (n/% +10)))]
- (r.text size)))
+ (r.unicode size)))
(def: gen-ident
(r.Random Ident)
diff --git a/stdlib/test/test/lux/macro/poly/equality.lux b/stdlib/test/test/lux/macro/poly/equality.lux
index beb203bcb..832c72355 100644
--- a/stdlib/test/test/lux/macro/poly/equality.lux
+++ b/stdlib/test/test/lux/macro/poly/equality.lux
@@ -51,11 +51,11 @@
r.bool
gen-int
r.frac
- (r.text size)
+ (r.unicode size)
(r.maybe gen-int)
(r.list size gen-int)
($_ r.alt r.bool gen-int r.frac)
- ($_ r.seq gen-int r.frac (r.text size))
+ ($_ r.seq gen-int r.frac (r.unicode size))
gen-recursive)))
(derived: (&.Eq<?> Record))