From bb2ec42843ba0f13adafe1f2f4a7b2820fbcaafa Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Sun, 13 May 2018 12:59:13 -0400 Subject: - Added dedicated unicode support for text. - Minor refactoring. --- stdlib/source/lux/control/predicate.lux | 29 ++- stdlib/source/lux/data/coll/queue/priority.lux | 48 ++-- stdlib/source/lux/data/coll/tree/finger.lux | 51 ++-- stdlib/source/lux/data/text/unicode.lux | 338 +++++++++++++++++++++++++ stdlib/source/lux/math/random.lux | 109 ++------ stdlib/test/test/lux.lux | 4 +- stdlib/test/test/lux/cli.lux | 2 +- stdlib/test/test/lux/data/format/json.lux | 12 +- stdlib/test/test/lux/data/format/xml.lux | 2 +- stdlib/test/test/lux/data/ident.lux | 2 +- stdlib/test/test/lux/data/text.lux | 14 +- stdlib/test/test/lux/data/text/lexer.lux | 4 +- stdlib/test/test/lux/data/text/regex.lux | 6 +- stdlib/test/test/lux/lang/syntax.lux | 6 +- stdlib/test/test/lux/lang/type.lux | 2 +- stdlib/test/test/lux/lang/type/check.lux | 2 +- stdlib/test/test/lux/macro/poly/equality.lux | 4 +- 17 files changed, 466 insertions(+), 169 deletions(-) create mode 100644 stdlib/source/lux/data/text/unicode.lux diff --git a/stdlib/source/lux/control/predicate.lux b/stdlib/source/lux/control/predicate.lux index a113339b8..45ed4c984 100644 --- a/stdlib/source/lux/control/predicate.lux +++ b/stdlib/source/lux/control/predicate.lux @@ -1,21 +1,36 @@ (.module: lux - (lux (data (coll (set ["set" unordered #+ Set]))))) + (lux (control [monoid #+ Monoid]) + (data (coll (set ["set" unordered #+ Set]))) + [function])) (type: #export (Predicate a) (-> a Bool)) (alias: Pred Predicate) -(do-template [ ] - [(def: #export ( left right) +(do-template [ ] + [(def: #export + (All [a] (Pred a)) + (function.const )) + + (def: #export ( left right) (All [a] (-> (Pred a) (Pred a) (Pred a))) (function (_ value) - ( (left value) - (right value))))] + ( (left value) + (right value))))] + + [none false union or] + [all true intersection and] + ) + +(do-template [ ] + [(struct: #export (All [a] (Monoid (Pred a))) + (def: identity ) + (def: compose ))] - [union or] - [intersection and] + [Union@Monoid none union] + [Intersection@Monoid all intersection] ) (def: #export (complement predicate) diff --git a/stdlib/source/lux/data/coll/queue/priority.lux b/stdlib/source/lux/data/coll/queue/priority.lux index 1c9c51075..a1e765fc3 100644 --- a/stdlib/source/lux/data/coll/queue/priority.lux +++ b/stdlib/source/lux/data/coll/queue/priority.lux @@ -2,14 +2,14 @@ lux (lux (control [equality #+ Eq] [monad #+ do Monad]) - (data (coll (tree ["F" finger])) + (data (coll (tree [finger #+ Tree])) [number "nat/" Interval] [maybe]))) (type: #export Priority Nat) (type: #export (Queue a) - (Maybe (F.Fingers Priority a))) + (Maybe (Tree Priority a))) (def: #export max Priority nat/top) (def: #export min Priority nat/bottom) @@ -22,7 +22,7 @@ (All [a] (-> (Queue a) (Maybe a))) (do maybe.Monad [fingers queue] - (wrap (maybe.assume (F.search (n/= (F.tag fingers)) fingers))))) + (wrap (maybe.assume (finger.search (n/= (finger.tag fingers)) fingers))))) (def: #export (size queue) (All [a] (-> (Queue a) Nat)) @@ -31,12 +31,12 @@ +0 (#.Some fingers) - (loop [node (get@ #F.tree fingers)] + (loop [node (get@ #finger.node fingers)] (case node - (#F.Leaf _ _) + (#finger.Leaf _ _) +1 - (#F.Branch _ left right) + (#finger.Branch _ left right) (n/+ (recur left) (recur right)))))) (def: #export (member? Eq queue member) @@ -46,12 +46,12 @@ false (#.Some fingers) - (loop [node (get@ #F.tree fingers)] + (loop [node (get@ #finger.node fingers)] (case node - (#F.Leaf _ reference) + (#finger.Leaf _ reference) (:: Eq = reference member) - (#F.Branch _ left right) + (#finger.Branch _ left right) (or (recur left) (recur right)))))) @@ -59,44 +59,44 @@ (All [a] (-> (Queue a) (Queue a))) (do maybe.Monad [fingers queue - #let [highest-priority (F.tag fingers)] - node' (loop [node (get@ #F.tree fingers)] + #let [highest-priority (finger.tag fingers)] + node' (loop [node (get@ #finger.node fingers)] (case node - (#F.Leaf priority reference) + (#finger.Leaf priority reference) (if (n/= highest-priority priority) #.None (#.Some node)) - (#F.Branch priority left right) - (if (n/= highest-priority (F.tag (set@ #F.tree left fingers))) + (#finger.Branch priority left right) + (if (n/= highest-priority (finger.tag (set@ #finger.node left fingers))) (case (recur left) #.None (#.Some right) (#.Some =left) - (|> (F.branch (set@ #F.tree =left fingers) - (set@ #F.tree right fingers)) - (get@ #F.tree) + (|> (finger.branch (set@ #finger.node =left fingers) + (set@ #finger.node right fingers)) + (get@ #finger.node) #.Some)) (case (recur right) #.None (#.Some left) (#.Some =right) - (|> (F.branch (set@ #F.tree left fingers) - (set@ #F.tree =right fingers)) - (get@ #F.tree) + (|> (finger.branch (set@ #finger.node left fingers) + (set@ #finger.node =right fingers)) + (get@ #finger.node) #.Some)) )))] - (wrap (set@ #F.tree node' fingers)))) + (wrap (set@ #finger.node node' fingers)))) (def: #export (push priority value queue) (All [a] (-> Priority a (Queue a) (Queue a))) - (let [addition {#F.monoid number.Max@Monoid - #F.tree (#F.Leaf priority value)}] + (let [addition {#finger.monoid number.Max@Monoid + #finger.node (#finger.Leaf priority value)}] (case queue #.None (#.Some addition) (#.Some fingers) - (#.Some (F.branch fingers addition))))) + (#.Some (finger.branch fingers addition))))) diff --git a/stdlib/source/lux/data/coll/tree/finger.lux b/stdlib/source/lux/data/coll/tree/finger.lux index 3cf904c3f..ea1ff0eee 100644 --- a/stdlib/source/lux/data/coll/tree/finger.lux +++ b/stdlib/source/lux/data/coll/tree/finger.lux @@ -7,46 +7,55 @@ (#Leaf m a) (#Branch m (Node m a) (Node m a))) -(type: #export (Fingers m a) +(type: #export (Tree m a) {#monoid (m.Monoid m) - #tree (Node m a)}) + #node (Node m a)}) -(def: #export (tag fingers) - (All [m a] (-> (Fingers m a) m)) - (case (get@ #tree fingers) +(def: #export (tag tree) + (All [m a] (-> (Tree m a) m)) + (case (get@ #node tree) (^or (#Leaf tag _) (#Branch tag _ _)) tag)) -(def: #export (value fingers) - (All [m a] (-> (Fingers m a) a)) - (case (get@ #tree fingers) +(def: #export (value tree) + (All [m a] (-> (Tree m a) a)) + (case (get@ #node tree) (#Leaf tag value) value (#Branch tag left right) - (value (set@ #tree left fingers)))) + (value (set@ #node left tree)))) (def: #export (branch left right) - (All [m a] (-> (Fingers m a) (Fingers m a) (Fingers m a))) + (All [m a] (-> (Tree m a) (Tree m a) (Tree m a))) (let [Monoid (get@ #monoid right)] {#monoid Monoid - #tree (#Branch (:: Monoid compose (tag left) (tag right)) - (get@ #tree left) - (get@ #tree right))})) - -(def: #export (search pred fingers) - (All [m a] (-> (-> m Bool) (Fingers m a) (Maybe a))) - (let [tag/compose (get@ [#monoid #m.compose] fingers)] - (if (pred (tag fingers)) - (loop [_tag (get@ [#monoid #m.identity] fingers) - _node (get@ #tree fingers)] + #node (#Branch (:: Monoid compose (tag left) (tag right)) + (get@ #node left) + (get@ #node right))})) + +(def: #export (search pred tree) + (All [m a] (-> (-> m Bool) (Tree m a) (Maybe a))) + (let [tag/compose (get@ [#monoid #m.compose] tree)] + (if (pred (tag tree)) + (loop [_tag (get@ [#monoid #m.identity] tree) + _node (get@ #node tree)] (case _node (#Leaf _ value) (#.Some value) (#Branch _ left right) - (let [shifted-tag (tag/compose _tag (tag (set@ #tree left fingers)))] + (let [shifted-tag (tag/compose _tag (tag (set@ #node left tree)))] (if (pred shifted-tag) (recur _tag left) (recur shifted-tag right))))) #.None))) + +(def: #export (found? pred tree) + (All [m a] (-> (-> m Bool) (Tree m a) Bool)) + (case (search pred tree) + (#.Some _) + true + + #.None + false)) diff --git a/stdlib/source/lux/data/text/unicode.lux b/stdlib/source/lux/data/text/unicode.lux new file mode 100644 index 000000000..7b1eb0fa9 --- /dev/null +++ b/stdlib/source/lux/data/text/unicode.lux @@ -0,0 +1,338 @@ +(.module: + lux + (lux (control [interval #+ Interval] + [monoid #+ Monoid]) + (data [number #+ hex "nat/" Interval] + (coll [list] + (tree [finger #+ Tree]))) + (type abstract))) + +(type: #export Char Nat) + +(abstract: #export Segment + {} + (Interval Char) + + (def: empty (@abstraction (interval.between number.Enum nat/top nat/bottom))) + + (struct: _ (Monoid Segment) + (def: identity ..empty) + (def: (compose left right) + (let [left (@representation left) + right (@representation right)] + (@abstraction + (interval.between number.Enum + (n/min (:: left bottom) + (:: right bottom)) + (n/max (:: left top) + (:: right top))))))) + + (def: #export (segment start end) + (-> Char Char Segment) + (@abstraction (interval.between number.Enum (n/min start end) (n/max start end)))) + + (do-template [ ] + [(def: #export + (-> Segment Char) + (|>> @representation (get@ )))] + + [start #interval.bottom] + [end #interval.top] + ) + + (def: #export (size segment) + (-> Segment Nat) + (let [start (get@ #interval.bottom (@representation segment)) + end (get@ #interval.top (@representation segment))] + (|> end (n/- start) inc))) + + (def: #export (within? segment char) + (All [a] (-> Segment Char Bool)) + (interval.within? (@representation segment) char)) + ) + +(do-template [ ] + [(def: #export Segment (..segment (hex ) (hex )))] + + [basic-latin "+0000" "+007F"] + [latin-1-supplement "+00A0" "+00FF"] + [latin-extended-a "+0100" "+017F"] + [latin-extended-b "+0180" "+024F"] + [ipa-extensions "+0250" "+02AF"] + [spacing-modifier-letters "+02B0" "+02FF"] + [combining-diacritical-marks "+0300" "+036F"] + [greek-and-coptic "+0370" "+03FF"] + [cyrillic "+0400" "+04FF"] + [cyrillic-supplementary "+0500" "+052F"] + [armenian "+0530" "+058F"] + [hebrew "+0590" "+05FF"] + [arabic "+0600" "+06FF"] + [syriac "+0700" "+074F"] + [thaana "+0780" "+07BF"] + [devanagari "+0900" "+097F"] + [bengali "+0980" "+09FF"] + [gurmukhi "+0A00" "+0A7F"] + [gujarati "+0A80" "+0AFF"] + [oriya "+0B00" "+0B7F"] + [tamil "+0B80" "+0BFF"] + [telugu "+0C00" "+0C7F"] + [kannada "+0C80" "+0CFF"] + [malayalam "+0D00" "+0D7F"] + [sinhala "+0D80" "+0DFF"] + [thai "+0E00" "+0E7F"] + [lao "+0E80" "+0EFF"] + [tibetan "+0F00" "+0FFF"] + [myanmar "+1000" "+109F"] + [georgian "+10A0" "+10FF"] + [hangul-jamo "+1100" "+11FF"] + [ethiopic "+1200" "+137F"] + [cherokee "+13A0" "+13FF"] + [unified-canadian-aboriginal-syllabics "+1400" "+167F"] + [ogham "+1680" "+169F"] + [runic "+16A0" "+16FF"] + [tagalog "+1700" "+171F"] + [hanunoo "+1720" "+173F"] + [buhid "+1740" "+175F"] + [tagbanwa "+1760" "+177F"] + [khmer "+1780" "+17FF"] + [mongolian "+1800" "+18AF"] + [limbu "+1900" "+194F"] + [tai-le "+1950" "+197F"] + [khmer-symbols "+19E0" "+19FF"] + [phonetic-extensions "+1D00" "+1D7F"] + [latin-extended-additional "+1E00" "+1EFF"] + [greek-extended "+1F00" "+1FFF"] + [general-punctuation "+2000" "+206F"] + [superscripts-and-subscripts "+2070" "+209F"] + [currency-symbols "+20A0" "+20CF"] + [combining-diacritical-marks-for-symbols "+20D0" "+20FF"] + [letterlike-symbols "+2100" "+214F"] + [number-forms "+2150" "+218F"] + [arrows "+2190" "+21FF"] + [mathematical-operators "+2200" "+22FF"] + [miscellaneous-technical "+2300" "+23FF"] + [control-pictures "+2400" "+243F"] + [optical-character-recognition "+2440" "+245F"] + [enclosed-alphanumerics "+2460" "+24FF"] + [box-drawing "+2500" "+257F"] + [block-elements "+2580" "+259F"] + [geometric-shapes "+25A0" "+25FF"] + [miscellaneous-symbols "+2600" "+26FF"] + [dingbats "+2700" "+27BF"] + [miscellaneous-mathematical-symbols-a "+27C0" "+27EF"] + [supplemental-arrows-a "+27F0" "+27FF"] + [braille-patterns "+2800" "+28FF"] + [supplemental-arrows-b "+2900" "+297F"] + [miscellaneous-mathematical-symbols-b "+2980" "+29FF"] + [supplemental-mathematical-operators "+2A00" "+2AFF"] + [miscellaneous-symbols-and-arrows "+2B00" "+2BFF"] + [cjk-radicals-supplement "+2E80" "+2EFF"] + [kangxi-radicals "+2F00" "+2FDF"] + [ideographic-description-characters "+2FF0" "+2FFF"] + [cjk-symbols-and-punctuation "+3000" "+303F"] + [hiragana "+3040" "+309F"] + [katakana "+30A0" "+30FF"] + [bopomofo "+3100" "+312F"] + [hangul-compatibility-jamo "+3130" "+318F"] + [kanbun "+3190" "+319F"] + [bopomofo-extended "+31A0" "+31BF"] + [katakana-phonetic-extensions "+31F0" "+31FF"] + [enclosed-cjk-letters-and-months "+3200" "+32FF"] + [cjk-compatibility "+3300" "+33FF"] + [cjk-unified-ideographs-extension-a "+3400" "+4DBF"] + [yijing-hexagram-symbols "+4DC0" "+4DFF"] + [cjk-unified-ideographs "+4E00" "+9FFF"] + [yi-syllables "+A000" "+A48F"] + [yi-radicals "+A490" "+A4CF"] + [hangul-syllables "+AC00" "+D7AF"] + [high-surrogates "+D800" "+DB7F"] + [high-private-use-surrogates "+DB80" "+DBFF"] + [low-surrogates "+DC00" "+DFFF"] + [private-use-area "+E000" "+F8FF"] + [cjk-compatibility-ideographs "+F900" "+FAFF"] + [alphabetic-presentation-forms "+FB00" "+FB4F"] + [arabic-presentation-forms-a "+FB50" "+FDFF"] + [variation-selectors "+FE00" "+FE0F"] + [combining-half-marks "+FE20" "+FE2F"] + [cjk-compatibility-forms "+FE30" "+FE4F"] + [small-form-variants "+FE50" "+FE6F"] + [arabic-presentation-forms-b "+FE70" "+FEFF"] + [halfwidth-and-fullwidth-forms "+FF00" "+FFEF"] + [specials "+FFF0" "+FFFF"] + [linear-b-syllabary "+10000" "+1007F"] + [linear-b-ideograms "+10080" "+100FF"] + [aegean-numbers "+10100" "+1013F"] + [old-italic "+10300" "+1032F"] + [gothic "+10330" "+1034F"] + [ugaritic "+10380" "+1039F"] + [deseret "+10400" "+1044F"] + [shavian "+10450" "+1047F"] + [osmanya "+10480" "+104AF"] + [cypriot-syllabary "+10800" "+1083F"] + [byzantine-musical-symbols "+1D000" "+1D0FF"] + [musical-symbols "+1D100" "+1D1FF"] + [tai-xuan-jing-symbols "+1D300" "+1D35F"] + [mathematical-alphanumeric-symbols "+1D400" "+1D7FF"] + [cjk-unified-ideographs-extension-b "+20000" "+2A6DF"] + [cjk-compatibility-ideographs-supplement "+2F800" "+2FA1F"] + [tags "+E0000" "+E007F"] + ) + +(type: #export Set (Tree Segment [])) + +(def: (singleton segment) + (-> Segment Set) + {#finger.monoid Monoid + #finger.node (#finger.Leaf segment [])}) + +(def: #export (set segments) + (-> (List Segment) Set) + (case segments + (^ (list)) + (..singleton (:: Monoid identity)) + + (^ (list singleton)) + (..singleton singleton) + + (^ (list left right)) + (..singleton (:: Monoid compose left right)) + + _ + (let [[sides extra] (n//% +2 (list.size segments)) + [left+ right+] (list.split (n/+ sides extra) segments)] + (finger.branch (set left+) + (set right+))))) + +(def: half/0 + (List Segment) + (list basic-latin + latin-1-supplement + latin-extended-a + latin-extended-b + ipa-extensions + spacing-modifier-letters + combining-diacritical-marks + greek-and-coptic + cyrillic + cyrillic-supplementary + armenian + hebrew + arabic + syriac + thaana + devanagari + bengali + gurmukhi + gujarati + oriya + tamil + telugu + kannada + malayalam + sinhala + thai + lao + tibetan + myanmar + georgian + hangul-jamo + ethiopic + cherokee + unified-canadian-aboriginal-syllabics + ogham + runic + tagalog + hanunoo + buhid + tagbanwa + khmer + mongolian + limbu + tai-le + khmer-symbols + phonetic-extensions + latin-extended-additional + greek-extended + general-punctuation + superscripts-and-subscripts + currency-symbols + combining-diacritical-marks-for-symbols + letterlike-symbols + number-forms + arrows + mathematical-operators + miscellaneous-technical + control-pictures + optical-character-recognition + enclosed-alphanumerics + box-drawing + )) + +(def: half/1 + (List Segment) + (list block-elements + geometric-shapes + miscellaneous-symbols + dingbats + miscellaneous-mathematical-symbols-a + supplemental-arrows-a + braille-patterns + supplemental-arrows-b + miscellaneous-mathematical-symbols-b + supplemental-mathematical-operators + miscellaneous-symbols-and-arrows + cjk-radicals-supplement + kangxi-radicals + ideographic-description-characters + cjk-symbols-and-punctuation + hiragana + katakana + bopomofo + hangul-compatibility-jamo + kanbun + bopomofo-extended + katakana-phonetic-extensions + enclosed-cjk-letters-and-months + cjk-compatibility + cjk-unified-ideographs-extension-a + yijing-hexagram-symbols + cjk-unified-ideographs + yi-syllables + yi-radicals + hangul-syllables + high-surrogates + high-private-use-surrogates + low-surrogates + private-use-area + cjk-compatibility-ideographs + alphabetic-presentation-forms + arabic-presentation-forms-a + variation-selectors + combining-half-marks + cjk-compatibility-forms + small-form-variants + arabic-presentation-forms-b + halfwidth-and-fullwidth-forms + specials + linear-b-syllabary + linear-b-ideograms + aegean-numbers + old-italic + gothic + ugaritic + deseret + shavian + osmanya + cypriot-syllabary + byzantine-musical-symbols + musical-symbols + tai-xuan-jing-symbols + mathematical-alphanumeric-symbols + cjk-unified-ideographs-extension-b + cjk-compatibility-ideographs-supplement + tags + )) + +(def: #export full + Set + (finger.branch (set half/0) (set half/1))) diff --git a/stdlib/source/lux/math/random.lux b/stdlib/source/lux/math/random.lux index 484574c82..cdb65971c 100644 --- a/stdlib/source/lux/math/random.lux +++ b/stdlib/source/lux/math/random.lux @@ -1,11 +1,12 @@ (.module: {#.doc "Pseudo-random number generation (PRNG) algorithms."} - [lux #- list i64 nat int deg] + [lux #- list i64 nat int deg char] (lux (control [functor #+ Functor] [apply #+ Apply] [monad #+ do Monad] hash) (data [bit] [text "text/" Monoid] + (text [unicode #+ Char Segment]) [product] [maybe] [number #+ hex] @@ -17,7 +18,8 @@ [queue #+ Queue] (set ["set" unordered #+ Set]) [stack #+ Stack] - [sequence #+ Sequence])) + [sequence #+ Sequence] + (tree [finger #+ Tree]))) )) (type: #export #rec PRNG @@ -95,99 +97,32 @@ (Random Frac) (:: Monad map number.bits-to-frac nat)) -(def: #export (text' char-gen size) - (-> (Random Nat) Nat (Random Text)) +(def: #export (char set) + (-> unicode.Set (Random Char)) + (let [summary (finger.tag set) + start (unicode.start summary) + size (unicode.size summary) + in-range (: (-> Char Char) + (|>> (n/% size) (n/+ start)))] + (|> nat + (:: Monad map in-range) + (..filter (function (_ char) + (finger.found? (function (_ segment) + (unicode.within? segment char)) + set)))))) + +(def: #export (text char-gen size) + (-> (Random Char) Nat (Random Text)) (if (n/= +0 size) (:: Monad wrap "") (do Monad [x char-gen - xs (text' char-gen (dec size))] + xs (text char-gen (dec size))] (wrap (text/compose (text.from-code x) xs))))) -(type: Region [Nat Nat]) - -(do-template [ ] - [(def: Region [(hex ) (hex )])] - - [Thaana "+0780" "+07BF"] - [Khmer-Symbols "+19E0" "+19FF"] - [Phonetic-Extensions "+1D00" "+1D7F"] - [Hangul-Syllables "+AC00" "+D7AF"] - - [Cypriot-Syllabary "+10800" "+1083F"] - [Tai-Xuan-Jing-Symbols "+1D300" "+1D35F"] - [Mathematical-Alphanumeric-Symbols "+1D400" "+1D7FF"] - [CJK-Unified-Ideographs-Extension-B "+20000" "+2A6DF"] - [CJK-Compatibility-Ideographs-Supplement "+2F800" "+2FA1F"] - ) - -(def: (within? [from to] char) - (-> Region Nat Bool) - (and (n/>= from char) - (n/<= to char))) - -(def: unicode-ceiling (|> CJK-Compatibility-Ideographs-Supplement product.right inc)) - (def: #export unicode - (Random Nat) - (|> ..nat - (:: Monad map (n/% unicode-ceiling)) - (..filter (function (_ raw) - ## From "Basic Latin" to "Syriac" - (or (n/<= (hex "+074F") raw) - (within? Thaana raw) - ## From "Devanagari" to "Ethiopic" - (and (n/>= (hex "+0900") raw) - (n/<= (hex "+137F") raw)) - ## From "Cherokee" to "Mongolian" - (and (n/>= (hex "+13A0") raw) - (n/<= (hex "+18AF") raw)) - ## From "Limbu" to "Tai Le" - (and (n/>= (hex "+1900") raw) - (n/<= (hex "+197F") raw)) - (within? Khmer-Symbols raw) - (within? Phonetic-Extensions raw) - ## From "Latin Extended Additional" to "Miscellaneous Symbols and Arrows" - (and (n/>= (hex "+1E00") raw) - (n/<= (hex "+2BFF") raw)) - ## From "CJK Radicals Supplement" to "Kangxi Radicals" - (and (n/>= (hex "+2E80") raw) - (n/<= (hex "+2FDF") raw)) - ## From "Ideographic Description Characters" to "Bopomofo Extended" - (and (n/>= (hex "+2FF0") raw) - (n/<= (hex "+31BF") raw)) - ## From "Katakana Phonetic Extensions" to "CJK Unified Ideographs" - (and (n/>= (hex "+31F0") raw) - (n/<= (hex "+9FAF") raw)) - ## From "Yi Syllables" to "Yi Radicals" - (and (n/>= (hex "+A000") raw) - (n/<= (hex "+A4CF") raw)) - (within? Hangul-Syllables raw) - ## From "CJK Compatibility Ideographs" to "Arabic Presentation Forms-A" - (and (n/>= (hex "+F900") raw) - (n/<= (hex "+FDFF") raw)) - ## From "Combining Half Marks" to "Halfwidth and Fullwidth Forms" - (and (n/>= (hex "+FE20") raw) - (n/<= (hex "+FFEF") raw)) - ## From "Linear B Syllabary" to "Aegean Numbers" - (and (n/>= (hex "+10000") raw) - (n/<= (hex "+1013F") raw)) - ## From "Old Italic" to "Osmanya" - (and (n/>= (hex "+10300") raw) - (n/<= (hex "+104AF") raw)) - (within? Cypriot-Syllabary raw) - ## From "Byzantine Musical Symbols" to "Musical Symbols" - (and (n/>= (hex "+1D000") raw) - (n/<= (hex "+1D1FF") raw)) - (within? Tai-Xuan-Jing-Symbols raw) - (within? Mathematical-Alphanumeric-Symbols raw) - (within? CJK-Unified-Ideographs-Extension-B raw) - (within? CJK-Compatibility-Ideographs-Supplement raw) - ))))) - -(def: #export (text size) (-> Nat (Random Text)) - (text' unicode size)) + (text (char unicode.full))) (do-template [ ] [(def: #export diff --git a/stdlib/test/test/lux.lux b/stdlib/test/test/lux.lux index 1f0a6e115..3c731ebc4 100644 --- a/stdlib/test/test/lux.lux +++ b/stdlib/test/test/lux.lux @@ -15,8 +15,8 @@ (<| (times +100) (do @ [size (|> r.nat (:: @ map (|>> (n/% +100) (n/max +10)))) - x (r.text size) - y (r.text size)] + x (r.unicode size) + y (r.unicode size)] ($_ seq (test "Every value is identical to itself, and the 'id' function doesn't change values in any way." (and (is? x x) diff --git a/stdlib/test/test/lux/cli.lux b/stdlib/test/test/lux/cli.lux index b5f251c57..4aed244b2 100644 --- a/stdlib/test/test/lux/cli.lux +++ b/stdlib/test/test/lux/cli.lux @@ -22,7 +22,7 @@ #let [(^open "Nat/") number.Codec gen-arg (:: @ map Nat/encode r.nat)] yes gen-arg - #let [gen-ignore (|> (r.text +5) (r.filter (|>> (text/= yes) not)))] + #let [gen-ignore (|> (r.unicode +5) (r.filter (|>> (text/= yes) not)))] no gen-ignore pre-ignore (r.list +5 gen-ignore) post-ignore (r.list +5 gen-ignore)] diff --git a/stdlib/test/test/lux/data/format/json.lux b/stdlib/test/test/lux/data/format/json.lux index 43b0851c0..a8a117a04 100644 --- a/stdlib/test/test/lux/data/format/json.lux +++ b/stdlib/test/test/lux/data/format/json.lux @@ -42,9 +42,9 @@ (:: @ wrap []) r.bool (|> r.frac (:: @ map (f/* 1_000_000.0))) - (r.text size) + (r.unicode size) (r.sequence size gen-json) - (r.dict text.Hash size (r.text size) gen-json) + (r.dict text.Hash size (r.unicode size) gen-json) ))))) (context: "JSON" @@ -110,12 +110,12 @@ ($_ r.seq r.bool r.frac - (r.text size) + (r.unicode size) (r.maybe r.frac) (r.list size r.frac) - (r.dict text.Hash size (r.text size) r.frac) - ## ($_ r.alt r.bool (r.text size) r.frac) - ## ($_ r.seq r.bool r.frac (r.text size)) + (r.dict text.Hash size (r.unicode size) r.frac) + ## ($_ r.alt r.bool (r.unicode size) r.frac) + ## ($_ r.seq r.bool r.frac (r.unicode size)) gen-recursive ## _instant.instant _duration.duration diff --git a/stdlib/test/test/lux/data/format/xml.lux b/stdlib/test/test/lux/data/format/xml.lux index d70b911dc..b7377ab64 100644 --- a/stdlib/test/test/lux/data/format/xml.lux +++ b/stdlib/test/test/lux/data/format/xml.lux @@ -37,7 +37,7 @@ (-> Nat Nat (r.Random Text)) (do r.Monad [size (size^ bottom top)] - (r.text' xml-char^ size))) + (r.text xml-char^ size))) (def: xml-identifier^ (r.Random Ident) diff --git a/stdlib/test/test/lux/data/ident.lux b/stdlib/test/test/lux/data/ident.lux index d6732619e..5e39bad7f 100644 --- a/stdlib/test/test/lux/data/ident.lux +++ b/stdlib/test/test/lux/data/ident.lux @@ -11,7 +11,7 @@ (def: (gen-part size) (-> Nat (r.Random Text)) - (|> (r.text size) (r.filter (|>> (text.contains? ".") not)))) + (|> (r.unicode size) (r.filter (|>> (text.contains? ".") not)))) (context: "Idents" (<| (times +100) diff --git a/stdlib/test/test/lux/data/text.lux b/stdlib/test/test/lux/data/text.lux index 2430c9919..99c750f74 100644 --- a/stdlib/test/test/lux/data/text.lux +++ b/stdlib/test/test/lux/data/text.lux @@ -14,7 +14,7 @@ (<| (times +100) (do @ [size (:: @ map (n/% +100) r.nat) - sample (r.text size)] + sample (r.unicode size)] (test "" (or (and (n/= +0 size) (&.empty? sample)) (n/= size (&.size sample))))))) @@ -29,7 +29,7 @@ (do @ [size bounded-size idx (:: @ map (n/% size) r.nat) - sample (r.text size)] + sample (r.unicode size)] (test "" (|> sample (&.nth idx) (case> (^multi (#.Some char) @@ -58,8 +58,8 @@ (do @ [sizeL bounded-size sizeR bounded-size - sampleL (r.text sizeL) - sampleR (r.text sizeR) + sampleL (r.unicode sizeL) + sampleR (r.unicode sizeR) #let [sample (&.concat (list sampleL sampleR)) fake-sample (&.join-with " " (list sampleL sampleR)) dup-sample (&.join-with "" (list sampleL sampleR)) @@ -105,9 +105,9 @@ ## can make text replacement work improperly. ## Because of that, I restrict the charset. normal-char-gen (|> r.nat (:: @ map (|>> (n/% +128) (n/max +1))))] - sep1 (r.text' normal-char-gen +1) - sep2 (r.text' normal-char-gen +1) - #let [part-gen (|> (r.text' normal-char-gen sizeP) + sep1 (r.text normal-char-gen +1) + sep2 (r.text normal-char-gen +1) + #let [part-gen (|> (r.text normal-char-gen sizeP) (r.filter (|>> (&.contains? sep1) not)))] parts (r.list sizeL part-gen) #let [sample1 (&.concat (list.interpose sep1 parts)) diff --git a/stdlib/test/test/lux/data/text/lexer.lux b/stdlib/test/test/lux/data/text/lexer.lux index 8706e2f2e..58e142a98 100644 --- a/stdlib/test/test/lux/data/text/lexer.lux +++ b/stdlib/test/test/lux/data/text/lexer.lux @@ -73,8 +73,8 @@ (<| (times +100) (do @ [size (|> r.nat (:: @ map (|>> (n/% +100) (n/max +10)))) - sample (r.text size) - non-sample (|> (r.text size) + sample (r.unicode size) + non-sample (|> (r.unicode size) (r.filter (|>> (text/= sample) not)))] ($_ seq (test "Can find literal text fragments." diff --git a/stdlib/test/test/lux/data/text/regex.lux b/stdlib/test/test/lux/data/text/regex.lux index 7323aeb79..7bc3082a1 100644 --- a/stdlib/test/test/lux/data/text/regex.lux +++ b/stdlib/test/test/lux/data/text/regex.lux @@ -269,9 +269,9 @@ (context: "Pattern-matching" (<| (times +100) (do @ - [sample1 (r.text +3) - sample2 (r.text +3) - sample3 (r.text +4)] + [sample1 (r.unicode +3) + sample2 (r.unicode +3) + sample3 (r.unicode +4)] (case (format sample1 "-" sample2 "-" sample3) (&.^regex "(.{3})-(.{3})-(.{4})" [_ match1 match2 match3]) diff --git a/stdlib/test/test/lux/lang/syntax.lux b/stdlib/test/test/lux/lang/syntax.lux index 01679b27a..0645bfe25 100644 --- a/stdlib/test/test/lux/lang/syntax.lux +++ b/stdlib/test/test/lux/lang/syntax.lux @@ -33,7 +33,7 @@ (not (text.contains? (text.from-code sample) invalid-range)))))] size (|> r.nat (:: @ map (|>> (n/% +20) (n/max +1))))] - (r.text' char-gen size))) + (r.text char-gen size))) (def: ident^ (r.Random Ident) @@ -52,7 +52,7 @@ ($_ r.either (do r.Monad [size (|> r.nat (r/map (n/% +20)))] - (|> (r.text size) (r/map code.text))) + (|> (r.unicode size) (r/map code.text))) (|> ident^ (r/map code.symbol)) (|> ident^ (r/map code.tag)))) simple^ (: (r.Random Code) @@ -152,7 +152,7 @@ (n/= (char ")") value))))))] (do r.Monad [size (|> r.nat (r/map (n/% +20)))] - (r.text' char-gen size)))) + (r.text char-gen size)))) (def: comment^ (r.Random Text) diff --git a/stdlib/test/test/lux/lang/type.lux b/stdlib/test/test/lux/lang/type.lux index 0c7fa84be..be888d321 100644 --- a/stdlib/test/test/lux/lang/type.lux +++ b/stdlib/test/test/lux/lang/type.lux @@ -17,7 +17,7 @@ (r.Random Text) (do r.Monad [size (|> r.nat (:: @ map (n/% +10)))] - (r.text size))) + (r.unicode size))) (def: gen-ident (r.Random Ident) diff --git a/stdlib/test/test/lux/lang/type/check.lux b/stdlib/test/test/lux/lang/type/check.lux index 517b2561b..b384ad2ef 100644 --- a/stdlib/test/test/lux/lang/type/check.lux +++ b/stdlib/test/test/lux/lang/type/check.lux @@ -20,7 +20,7 @@ (r.Random Text) (do r.Monad [size (|> r.nat (:: @ map (n/% +10)))] - (r.text size))) + (r.unicode size))) (def: gen-ident (r.Random Ident) diff --git a/stdlib/test/test/lux/macro/poly/equality.lux b/stdlib/test/test/lux/macro/poly/equality.lux index beb203bcb..832c72355 100644 --- a/stdlib/test/test/lux/macro/poly/equality.lux +++ b/stdlib/test/test/lux/macro/poly/equality.lux @@ -51,11 +51,11 @@ r.bool gen-int r.frac - (r.text size) + (r.unicode size) (r.maybe gen-int) (r.list size gen-int) ($_ r.alt r.bool gen-int r.frac) - ($_ r.seq gen-int r.frac (r.text size)) + ($_ r.seq gen-int r.frac (r.unicode size)) gen-recursive))) (derived: (&.Eq Record)) -- cgit v1.2.3