diff options
Diffstat (limited to 'stdlib/source/lux/data/text')
-rw-r--r-- | stdlib/source/lux/data/text/regex.lux | 277 | ||||
-rw-r--r-- | stdlib/source/lux/data/text/unicode.lux | 2 |
2 files changed, 140 insertions, 139 deletions
diff --git a/stdlib/source/lux/data/text/regex.lux b/stdlib/source/lux/data/text/regex.lux index 7c8395d71..af99c6f90 100644 --- a/stdlib/source/lux/data/text/regex.lux +++ b/stdlib/source/lux/data/text/regex.lux @@ -4,9 +4,9 @@ monad] [control ["." try] - ["p" parser ("#@." monad) - ["l" text (#+ Parser)] - ["s" code]]] + ["<>" parser ("#@." monad) + ["<t>" text (#+ Parser)] + ["<c>" code]]] [data ["." product] ["." maybe] @@ -22,101 +22,101 @@ (def: regex-char^ (Parser Text) - (l.none-of "\.|&()[]{}")) + (<t>.none-of "\.|&()[]{}")) (def: escaped-char^ (Parser Text) - (do p.monad - [? (l.this? "\")] + (do <>.monad + [? (<>.parses? (<t>.this "\"))] (if ? - l.any + <t>.any regex-char^))) (def: (refine^ refinement^ base^) (All [a] (-> (Parser a) (Parser Text) (Parser Text))) - (do p.monad + (do <>.monad [output base^ - _ (l.local output refinement^)] + _ (<t>.local output refinement^)] (wrap output))) (def: word^ (Parser Text) - (p.either l.alpha-num - (l.one-of "_"))) + (<>.either <t>.alpha-num + (<t>.one-of "_"))) (def: (copy reference) (-> Text (Parser Text)) - (p.after (l.this reference) (p@wrap reference))) + (<>.after (<t>.this reference) (<>@wrap reference))) (def: (join-text^ part^) (-> (Parser (List Text)) (Parser Text)) - (do p.monad + (do <>.monad [parts part^] (wrap (//.join-with "" parts)))) (def: name-char^ (Parser Text) - (l.none-of (format "[]{}()s#.<>" //.double-quote))) + (<t>.none-of (format "[]{}()s#.<>" //.double-quote))) (def: name-part^ (Parser Text) - (do p.monad - [head (refine^ (l.not l.decimal) + (do <>.monad + [head (refine^ (<t>.not <t>.decimal) name-char^) - tail (l.some name-char^)] + tail (<t>.some name-char^)] (wrap (format head tail)))) (def: (name^ current-module) (-> Text (Parser Name)) - ($_ p.either - (p.and (p@wrap current-module) (p.after (l.this "..") name-part^)) - (p.and name-part^ (p.after (l.this ".") name-part^)) - (p.and (p@wrap "lux") (p.after (l.this ".") name-part^)) - (p.and (p@wrap "") name-part^))) + ($_ <>.either + (<>.and (<>@wrap current-module) (<>.after (<t>.this "..") name-part^)) + (<>.and name-part^ (<>.after (<t>.this ".") name-part^)) + (<>.and (<>@wrap "lux") (<>.after (<t>.this ".") name-part^)) + (<>.and (<>@wrap "") name-part^))) (def: (re-var^ current-module) (-> Text (Parser Code)) - (do p.monad - [name (l.enclosed ["\@<" ">"] (name^ current-module))] + (do <>.monad + [name (<t>.enclosed ["\@<" ">"] (name^ current-module))] (wrap (` (: (Parser Text) (~ (code.identifier name))))))) (def: re-range^ (Parser Code) - (do {@ p.monad} + (do {@ <>.monad} [from (|> regex-char^ (:: @ map (|>> (//.nth 0) maybe.assume))) - _ (l.this "-") + _ (<t>.this "-") to (|> regex-char^ (:: @ map (|>> (//.nth 0) maybe.assume)))] - (wrap (` (l.range (~ (code.nat from)) (~ (code.nat to))))))) + (wrap (` (<t>.range (~ (code.nat from)) (~ (code.nat to))))))) (def: re-char^ (Parser Code) - (do p.monad + (do <>.monad [char escaped-char^] (wrap (` ((~! ..copy) (~ (code.text char))))))) (def: re-options^ (Parser Code) - (do p.monad - [options (l.many escaped-char^)] - (wrap (` (l.one-of (~ (code.text options))))))) + (do <>.monad + [options (<t>.many escaped-char^)] + (wrap (` (<t>.one-of (~ (code.text options))))))) (def: re-user-class^' (Parser Code) - (do p.monad - [negate? (p.maybe (l.this "^")) - parts (p.many ($_ p.either - re-range^ - re-options^))] + (do <>.monad + [negate? (<>.maybe (<t>.this "^")) + parts (<>.many ($_ <>.either + re-range^ + re-options^))] (wrap (case negate? - (#.Some _) (` (l.not ($_ p.either (~+ parts)))) - #.None (` ($_ p.either (~+ parts))))))) + (#.Some _) (` (<t>.not ($_ <>.either (~+ parts)))) + #.None (` ($_ <>.either (~+ parts))))))) (def: re-user-class^ (Parser Code) - (do p.monad + (do <>.monad [_ (wrap []) init re-user-class^' - rest (p.some (p.after (l.this "&&") (l.enclosed ["[" "]"] re-user-class^')))] + rest (<>.some (<>.after (<t>.this "&&") (<t>.enclosed ["[" "]"] re-user-class^')))] (wrap (list@fold (function (_ refinement base) (` ((~! refine^) (~ refinement) (~ base)))) init @@ -124,85 +124,85 @@ (def: blank^ (Parser Text) - (l.one-of (format " " //.tab))) + (<t>.one-of (format " " //.tab))) (def: ascii^ (Parser Text) - (l.range (hex "0") (hex "7F"))) + (<t>.range (hex "0") (hex "7F"))) (def: control^ (Parser Text) - (p.either (l.range (hex "0") (hex "1F")) - (l.one-of (//.from-code (hex "7F"))))) + (<>.either (<t>.range (hex "0") (hex "1F")) + (<t>.one-of (//.from-code (hex "7F"))))) (def: punct^ (Parser Text) - (l.one-of (format "!#$%&'()*+,-./:;<=>?@[\]^_`{|}~" - //.double-quote))) + (<t>.one-of (format "!#$%&'()*+,-./:;<=>?@[\]^_`{|}~" + //.double-quote))) (def: graph^ (Parser Text) - (p.either punct^ l.alpha-num)) + (<>.either punct^ <t>.alpha-num)) (def: print^ (Parser Text) - (p.either graph^ - (l.one-of (//.from-code (hex "20"))))) + (<>.either graph^ + (<t>.one-of (//.from-code (hex "20"))))) (def: re-system-class^ (Parser Code) - (do p.monad + (do <>.monad [] - ($_ p.either - (p.after (l.this ".") (wrap (` l.any))) - (p.after (l.this "\d") (wrap (` l.decimal))) - (p.after (l.this "\D") (wrap (` (l.not l.decimal)))) - (p.after (l.this "\s") (wrap (` l.space))) - (p.after (l.this "\S") (wrap (` (l.not l.space)))) - (p.after (l.this "\w") (wrap (` (~! word^)))) - (p.after (l.this "\W") (wrap (` (l.not (~! word^))))) - - (p.after (l.this "\p{Lower}") (wrap (` l.lower))) - (p.after (l.this "\p{Upper}") (wrap (` l.upper))) - (p.after (l.this "\p{Alpha}") (wrap (` l.alpha))) - (p.after (l.this "\p{Digit}") (wrap (` l.decimal))) - (p.after (l.this "\p{Alnum}") (wrap (` l.alpha-num))) - (p.after (l.this "\p{Space}") (wrap (` l.space))) - (p.after (l.this "\p{HexDigit}") (wrap (` l.hexadecimal))) - (p.after (l.this "\p{OctDigit}") (wrap (` l.octal))) - (p.after (l.this "\p{Blank}") (wrap (` (~! blank^)))) - (p.after (l.this "\p{ASCII}") (wrap (` (~! ascii^)))) - (p.after (l.this "\p{Contrl}") (wrap (` (~! control^)))) - (p.after (l.this "\p{Punct}") (wrap (` (~! punct^)))) - (p.after (l.this "\p{Graph}") (wrap (` (~! graph^)))) - (p.after (l.this "\p{Print}") (wrap (` (~! print^)))) + ($_ <>.either + (<>.after (<t>.this ".") (wrap (` <t>.any))) + (<>.after (<t>.this "\d") (wrap (` <t>.decimal))) + (<>.after (<t>.this "\D") (wrap (` (<t>.not <t>.decimal)))) + (<>.after (<t>.this "\s") (wrap (` <t>.space))) + (<>.after (<t>.this "\S") (wrap (` (<t>.not <t>.space)))) + (<>.after (<t>.this "\w") (wrap (` (~! word^)))) + (<>.after (<t>.this "\W") (wrap (` (<t>.not (~! word^))))) + + (<>.after (<t>.this "\p{Lower}") (wrap (` <t>.lower))) + (<>.after (<t>.this "\p{Upper}") (wrap (` <t>.upper))) + (<>.after (<t>.this "\p{Alpha}") (wrap (` <t>.alpha))) + (<>.after (<t>.this "\p{Digit}") (wrap (` <t>.decimal))) + (<>.after (<t>.this "\p{Alnum}") (wrap (` <t>.alpha-num))) + (<>.after (<t>.this "\p{Space}") (wrap (` <t>.space))) + (<>.after (<t>.this "\p{HexDigit}") (wrap (` <t>.hexadecimal))) + (<>.after (<t>.this "\p{OctDigit}") (wrap (` <t>.octal))) + (<>.after (<t>.this "\p{Blank}") (wrap (` (~! blank^)))) + (<>.after (<t>.this "\p{ASCII}") (wrap (` (~! ascii^)))) + (<>.after (<t>.this "\p{Contrl}") (wrap (` (~! control^)))) + (<>.after (<t>.this "\p{Punct}") (wrap (` (~! punct^)))) + (<>.after (<t>.this "\p{Graph}") (wrap (` (~! graph^)))) + (<>.after (<t>.this "\p{Print}") (wrap (` (~! print^)))) ))) (def: re-class^ (Parser Code) - (p.either re-system-class^ - (l.enclosed ["[" "]"] re-user-class^))) + (<>.either re-system-class^ + (<t>.enclosed ["[" "]"] re-user-class^))) (def: number^ (Parser Nat) - (|> (l.many l.decimal) - (p.codec n.decimal))) + (|> (<t>.many <t>.decimal) + (<>.codec n.decimal))) (def: re-back-reference^ (Parser Code) - (p.either (do p.monad - [_ (l.this "\") - id number^] - (wrap (` ((~! ..copy) (~ (code.identifier ["" (n@encode id)])))))) - (do p.monad - [_ (l.this "\k<") - captured-name name-part^ - _ (l.this ">")] - (wrap (` ((~! ..copy) (~ (code.identifier ["" captured-name])))))))) + (<>.either (do <>.monad + [_ (<t>.this "\") + id number^] + (wrap (` ((~! ..copy) (~ (code.identifier ["" (n@encode id)])))))) + (do <>.monad + [_ (<t>.this "\k<") + captured-name name-part^ + _ (<t>.this ">")] + (wrap (` ((~! ..copy) (~ (code.identifier ["" captured-name])))))))) (def: (re-simple^ current-module) (-> Text (Parser Code)) - ($_ p.either + ($_ <>.either re-class^ (re-var^ current-module) re-back-reference^ @@ -211,50 +211,50 @@ (def: (re-simple-quantified^ current-module) (-> Text (Parser Code)) - (do p.monad + (do <>.monad [base (re-simple^ current-module) - quantifier (l.one-of "?*+")] + quantifier (<t>.one-of "?*+")] (case quantifier "?" - (wrap (` (p.default "" (~ base)))) + (wrap (` (<>.default "" (~ base)))) "*" - (wrap (` ((~! join-text^) (p.some (~ base))))) + (wrap (` ((~! join-text^) (<>.some (~ base))))) ## "+" _ - (wrap (` ((~! join-text^) (p.many (~ base))))) + (wrap (` ((~! join-text^) (<>.many (~ base))))) ))) (def: (re-counted-quantified^ current-module) (-> Text (Parser Code)) - (do {@ p.monad} + (do {@ <>.monad} [base (re-simple^ current-module)] - (l.enclosed ["{" "}"] - ($_ p.either - (do @ - [[from to] (p.and number^ (p.after (l.this ",") number^))] - (wrap (` ((~! join-text^) (p.between (~ (code.nat from)) - (~ (code.nat to)) - (~ base)))))) - (do @ - [limit (p.after (l.this ",") number^)] - (wrap (` ((~! join-text^) (p.at-most (~ (code.nat limit)) (~ base)))))) - (do @ - [limit (p.before (l.this ",") number^)] - (wrap (` ((~! join-text^) (p.at-least (~ (code.nat limit)) (~ base)))))) - (do @ - [limit number^] - (wrap (` ((~! join-text^) (p.exactly (~ (code.nat limit)) (~ base)))))))))) + (<t>.enclosed ["{" "}"] + ($_ <>.either + (do @ + [[from to] (<>.and number^ (<>.after (<t>.this ",") number^))] + (wrap (` ((~! join-text^) (<>.between (~ (code.nat from)) + (~ (code.nat to)) + (~ base)))))) + (do @ + [limit (<>.after (<t>.this ",") number^)] + (wrap (` ((~! join-text^) (<>.at-most (~ (code.nat limit)) (~ base)))))) + (do @ + [limit (<>.before (<t>.this ",") number^)] + (wrap (` ((~! join-text^) (<>.at-least (~ (code.nat limit)) (~ base)))))) + (do @ + [limit number^] + (wrap (` ((~! join-text^) (<>.exactly (~ (code.nat limit)) (~ base)))))))))) (def: (re-quantified^ current-module) (-> Text (Parser Code)) - (p.either (re-simple-quantified^ current-module) - (re-counted-quantified^ current-module))) + (<>.either (re-simple-quantified^ current-module) + (re-counted-quantified^ current-module))) (def: (re-complex^ current-module) (-> Text (Parser Code)) - ($_ p.either + ($_ <>.either (re-quantified^ current-module) (re-simple^ current-module))) @@ -267,9 +267,9 @@ (-> Text (Parser [Re-Group Code])) Text (Parser [Nat Code])) - (do p.monad - [parts (p.many (p.or (re-complex^ current-module) - (re-scoped^ current-module))) + (do <>.monad + [parts (<>.many (<>.or (re-complex^ current-module) + (re-scoped^ current-module))) #let [g!total (code.identifier ["" "0total"]) g!temp (code.identifier ["" "0temp"]) [_ names steps] (list@fold (: (-> (Either Code [Re-Group Code]) @@ -307,7 +307,7 @@ (wrap [(if capturing? (list.size names) 0) - (` (do p.monad + (` (do <>.monad [(~ (' #let)) [(~ g!total) ""] (~+ (|> steps list.reverse list@join))] ((~ (' wrap)) [(~ g!total) (~+ (list.reverse names))])))]) @@ -315,7 +315,7 @@ (def: (unflatten^ lexer) (-> (Parser Text) (Parser [Text Any])) - (p.and lexer (:: p.monad wrap []))) + (<>.and lexer (:: <>.monad wrap []))) (def: (|||^ left right) (All [l r] (-> (Parser [Text l]) (Parser [Text r]) (Parser [Text (| l r)]))) @@ -358,10 +358,10 @@ (-> Text (Parser [Re-Group Code])) Text (Parser [Nat Code])) - (do p.monad + (do <>.monad [#let [sub^ (re-sequential^ capturing? re-scoped^ current-module)] head sub^ - tail (p.some (p.after (l.this "|") sub^))] + tail (<>.some (<>.after (<t>.this "|") sub^))] (if (list.empty? tail) (wrap head) (wrap [(list@fold n.max (product.left head) (list@map product.left tail)) @@ -373,33 +373,33 @@ (def: (re-scoped^ current-module) (-> Text (Parser [Re-Group Code])) - ($_ p.either - (do p.monad - [_ (l.this "(?:") + ($_ <>.either + (do <>.monad + [_ (<t>.this "(?:") [_ scoped] (re-alternative^ #0 re-scoped^ current-module) - _ (l.this ")")] + _ (<t>.this ")")] (wrap [#Non-Capturing scoped])) - (do p.monad + (do <>.monad [complex (re-complex^ current-module)] (wrap [#Non-Capturing complex])) - (do p.monad - [_ (l.this "(?<") + (do <>.monad + [_ (<t>.this "(?<") captured-name name-part^ - _ (l.this ">") + _ (<t>.this ">") [num-captures pattern] (re-alternative^ #1 re-scoped^ current-module) - _ (l.this ")")] + _ (<t>.this ")")] (wrap [(#Capturing [(#.Some captured-name) num-captures]) pattern])) - (do p.monad - [_ (l.this "(") + (do <>.monad + [_ (<t>.this "(") [num-captures pattern] (re-alternative^ #1 re-scoped^ current-module) - _ (l.this ")")] + _ (<t>.this ")")] (wrap [(#Capturing [#.None num-captures]) pattern])))) (def: (regex^ current-module) (-> Text (Parser Code)) - (:: p.monad map product.right (re-alternative^ #1 re-scoped^ current-module))) + (:: <>.monad map product.right (re-alternative^ #1 re-scoped^ current-module))) -(syntax: #export (regex {pattern s.text}) +(syntax: #export (regex {pattern <c>.text}) {#.doc (doc "Create lexers using regular-expression syntax." "For example:" @@ -460,9 +460,8 @@ )} (do macro.monad [current-module macro.current-module-name] - (case (l.run (p.before l.end - (regex^ current-module)) - pattern) + (case (<t>.run (regex^ current-module) + pattern) (#try.Failure error) (macro.fail (format "Error while parsing regular-expression:" //.new-line error)) @@ -471,9 +470,9 @@ (wrap (list regex)) ))) -(syntax: #export (^regex {[pattern bindings] (s.form (p.and s.text (p.maybe s.any)))} +(syntax: #export (^regex {[pattern bindings] (<c>.form (<>.and <c>.text (<>.maybe <c>.any)))} body - {branches (p.many s.any)}) + {branches (<>.many <c>.any)}) {#.doc (doc "Allows you to test text against regular expressions." (case some-text (^regex "(\d{3})-(\d{3})-(\d{4})" @@ -487,7 +486,7 @@ do-something-else))} (with-gensyms [g!temp] (wrap (list& (` (^multi (~ g!temp) - [((~! l.run) (..regex (~ (code.text pattern))) (~ g!temp)) + [((~! <t>.run) (..regex (~ (code.text pattern))) (~ g!temp)) (#try.Success (~ (maybe.default g!temp bindings)))])) body branches)))) diff --git a/stdlib/source/lux/data/text/unicode.lux b/stdlib/source/lux/data/text/unicode.lux index cbead2be1..6a4192b4c 100644 --- a/stdlib/source/lux/data/text/unicode.lux +++ b/stdlib/source/lux/data/text/unicode.lux @@ -184,6 +184,7 @@ [tags "E0000" "E007F"] ## Specialized segments + [basic-latin/decimal "0030" "0039"] [basic-latin/upper-alpha "0041" "005A"] [basic-latin/lower-alpha "0061" "007A"] ) @@ -352,6 +353,7 @@ [ascii (list basic-latin)] [ascii/alpha (list basic-latin/upper-alpha basic-latin/lower-alpha)] + [ascii/alpha-num (list basic-latin/upper-alpha basic-latin/lower-alpha basic-latin/decimal)] [ascii/upper-alpha (list basic-latin/upper-alpha)] [ascii/lower-alpha (list basic-latin/lower-alpha)] ) |