diff options
Diffstat (limited to 'stdlib/source/library/lux/data/text/regex.lux')
-rw-r--r-- | stdlib/source/library/lux/data/text/regex.lux | 186 |
1 files changed, 97 insertions, 89 deletions
diff --git a/stdlib/source/library/lux/data/text/regex.lux b/stdlib/source/library/lux/data/text/regex.lux index 63aca69fb..691fccad7 100644 --- a/stdlib/source/library/lux/data/text/regex.lux +++ b/stdlib/source/library/lux/data/text/regex.lux @@ -6,9 +6,10 @@ monad] [control ["." try] + ["." exception (#+ exception:)] ["<>" parser ("#\." monad) - ["<t>" text (#+ Parser)] - ["<c>" code]]] + ["<.>" text (#+ Parser)] + ["<.>" code]]] [data ["." product] ["." maybe] @@ -25,31 +26,31 @@ (def: regex_char^ (Parser Text) - (<t>.none_of "\.|&()[]{}")) + (<text>.none_of "\.|&()[]{}")) (def: escaped_char^ (Parser Text) (do <>.monad - [? (<>.parses? (<t>.this "\"))] + [? (<>.parses? (<text>.this "\"))] (if ? - <t>.any + <text>.any regex_char^))) (def: (refine^ refinement^ base^) (All [a] (-> (Parser a) (Parser Text) (Parser Text))) (do <>.monad [output base^ - _ (<t>.local output refinement^)] + _ (<text>.local output refinement^)] (in output))) (def: word^ (Parser Text) - (<>.either <t>.alpha_num - (<t>.one_of "_"))) + (<>.either <text>.alpha_num + (<text>.one_of "_"))) (def: (copy reference) (-> Text (Parser Text)) - (<>.after (<t>.this reference) (<>\in reference))) + (<>.after (<text>.this reference) (<>\in reference))) (def: (join_text^ part^) (-> (Parser (List Text)) (Parser Text)) @@ -59,37 +60,37 @@ (def: name_char^ (Parser Text) - (<t>.none_of (format "[]{}()s#.<>" //.double_quote))) + (<text>.none_of (format "[]{}()s#.<>" //.double_quote))) (def: name_part^ (Parser Text) (do <>.monad - [head (refine^ (<t>.not <t>.decimal) + [head (refine^ (<text>.not <text>.decimal) name_char^) - tail (<t>.some name_char^)] + tail (<text>.some name_char^)] (in (format head tail)))) (def: (name^ current_module) (-> Text (Parser Name)) ($_ <>.either - (<>.and (<>\in current_module) (<>.after (<t>.this "..") name_part^)) - (<>.and name_part^ (<>.after (<t>.this ".") name_part^)) - (<>.and (<>\in .prelude_module) (<>.after (<t>.this ".") name_part^)) + (<>.and (<>\in current_module) (<>.after (<text>.this "..") name_part^)) + (<>.and name_part^ (<>.after (<text>.this ".") name_part^)) + (<>.and (<>\in .prelude_module) (<>.after (<text>.this ".") name_part^)) (<>.and (<>\in "") name_part^))) (def: (re_var^ current_module) (-> Text (Parser Code)) (do <>.monad - [name (<t>.enclosed ["\@<" ">"] (name^ current_module))] + [name (<text>.enclosed ["\@<" ">"] (name^ current_module))] (in (` (: (Parser Text) (~ (code.identifier name))))))) (def: re_range^ (Parser Code) (do {! <>.monad} [from (|> regex_char^ (\ ! map (|>> (//.nth 0) maybe.assume))) - _ (<t>.this "-") + _ (<text>.this "-") to (|> regex_char^ (\ ! map (|>> (//.nth 0) maybe.assume)))] - (in (` (<t>.range (~ (code.nat from)) (~ (code.nat to))))))) + (in (` (<text>.range (~ (code.nat from)) (~ (code.nat to))))))) (def: re_char^ (Parser Code) @@ -100,18 +101,18 @@ (def: re_options^ (Parser Code) (do <>.monad - [options (<t>.many escaped_char^)] - (in (` (<t>.one_of (~ (code.text options))))))) + [options (<text>.many escaped_char^)] + (in (` (<text>.one_of (~ (code.text options))))))) (def: re_user_class^' (Parser Code) (do <>.monad - [negate? (<>.maybe (<t>.this "^")) + [negate? (<>.maybe (<text>.this "^")) parts (<>.many ($_ <>.either re_range^ re_options^))] (in (case negate? - (#.Some _) (` (<t>.not ($_ <>.either (~+ parts)))) + (#.Some _) (` (<text>.not ($_ <>.either (~+ parts)))) #.None (` ($_ <>.either (~+ parts))))))) (def: re_user_class^ @@ -119,7 +120,7 @@ (do <>.monad [_ (in []) init re_user_class^' - rest (<>.some (<>.after (<t>.this "&&") (<t>.enclosed ["[" "]"] re_user_class^')))] + rest (<>.some (<>.after (<text>.this "&&") (<text>.enclosed ["[" "]"] re_user_class^')))] (in (list\fold (function (_ refinement base) (` ((~! refine^) (~ refinement) (~ base)))) init @@ -127,80 +128,80 @@ (def: blank^ (Parser Text) - (<t>.one_of (format " " //.tab))) + (<text>.one_of (format " " //.tab))) (def: ascii^ (Parser Text) - (<t>.range (hex "0") (hex "7F"))) + (<text>.range (hex "0") (hex "7F"))) (def: control^ (Parser Text) - (<>.either (<t>.range (hex "0") (hex "1F")) - (<t>.one_of (//.of_code (hex "7F"))))) + (<>.either (<text>.range (hex "0") (hex "1F")) + (<text>.one_of (//.of_code (hex "7F"))))) (def: punct^ (Parser Text) - (<t>.one_of (format "!#$%&'()*+,-./:;<=>?@[\]^_`{|}~" - //.double_quote))) + (<text>.one_of (format "!#$%&'()*+,-./:;<=>?@[\]^_`{|}~" + //.double_quote))) (def: graph^ (Parser Text) - (<>.either punct^ <t>.alpha_num)) + (<>.either punct^ <text>.alpha_num)) (def: print^ (Parser Text) (<>.either graph^ - (<t>.one_of (//.of_code (hex "20"))))) + (<text>.one_of (//.of_code (hex "20"))))) (def: re_system_class^ (Parser Code) (do <>.monad [] ($_ <>.either - (<>.after (<t>.this ".") (in (` <t>.any))) - (<>.after (<t>.this "\d") (in (` <t>.decimal))) - (<>.after (<t>.this "\D") (in (` (<t>.not <t>.decimal)))) - (<>.after (<t>.this "\s") (in (` <t>.space))) - (<>.after (<t>.this "\S") (in (` (<t>.not <t>.space)))) - (<>.after (<t>.this "\w") (in (` (~! word^)))) - (<>.after (<t>.this "\W") (in (` (<t>.not (~! word^))))) - - (<>.after (<t>.this "\p{Lower}") (in (` <t>.lower))) - (<>.after (<t>.this "\p{Upper}") (in (` <t>.upper))) - (<>.after (<t>.this "\p{Alpha}") (in (` <t>.alpha))) - (<>.after (<t>.this "\p{Digit}") (in (` <t>.decimal))) - (<>.after (<t>.this "\p{Alnum}") (in (` <t>.alpha_num))) - (<>.after (<t>.this "\p{Space}") (in (` <t>.space))) - (<>.after (<t>.this "\p{HexDigit}") (in (` <t>.hexadecimal))) - (<>.after (<t>.this "\p{OctDigit}") (in (` <t>.octal))) - (<>.after (<t>.this "\p{Blank}") (in (` (~! blank^)))) - (<>.after (<t>.this "\p{ASCII}") (in (` (~! ascii^)))) - (<>.after (<t>.this "\p{Contrl}") (in (` (~! control^)))) - (<>.after (<t>.this "\p{Punct}") (in (` (~! punct^)))) - (<>.after (<t>.this "\p{Graph}") (in (` (~! graph^)))) - (<>.after (<t>.this "\p{Print}") (in (` (~! print^)))) + (<>.after (<text>.this ".") (in (` <text>.any))) + (<>.after (<text>.this "\d") (in (` <text>.decimal))) + (<>.after (<text>.this "\D") (in (` (<text>.not <text>.decimal)))) + (<>.after (<text>.this "\s") (in (` <text>.space))) + (<>.after (<text>.this "\S") (in (` (<text>.not <text>.space)))) + (<>.after (<text>.this "\w") (in (` (~! word^)))) + (<>.after (<text>.this "\W") (in (` (<text>.not (~! word^))))) + + (<>.after (<text>.this "\p{Lower}") (in (` <text>.lower))) + (<>.after (<text>.this "\p{Upper}") (in (` <text>.upper))) + (<>.after (<text>.this "\p{Alpha}") (in (` <text>.alpha))) + (<>.after (<text>.this "\p{Digit}") (in (` <text>.decimal))) + (<>.after (<text>.this "\p{Alnum}") (in (` <text>.alpha_num))) + (<>.after (<text>.this "\p{Space}") (in (` <text>.space))) + (<>.after (<text>.this "\p{HexDigit}") (in (` <text>.hexadecimal))) + (<>.after (<text>.this "\p{OctDigit}") (in (` <text>.octal))) + (<>.after (<text>.this "\p{Blank}") (in (` (~! blank^)))) + (<>.after (<text>.this "\p{ASCII}") (in (` (~! ascii^)))) + (<>.after (<text>.this "\p{Contrl}") (in (` (~! control^)))) + (<>.after (<text>.this "\p{Punct}") (in (` (~! punct^)))) + (<>.after (<text>.this "\p{Graph}") (in (` (~! graph^)))) + (<>.after (<text>.this "\p{Print}") (in (` (~! print^)))) ))) (def: re_class^ (Parser Code) (<>.either re_system_class^ - (<t>.enclosed ["[" "]"] re_user_class^))) + (<text>.enclosed ["[" "]"] re_user_class^))) (def: number^ (Parser Nat) - (|> (<t>.many <t>.decimal) + (|> (<text>.many <text>.decimal) (<>.codec n.decimal))) (def: re_back_reference^ (Parser Code) (<>.either (do <>.monad - [_ (<t>.this "\") + [_ (<text>.this "\") id number^] (in (` ((~! ..copy) (~ (code.identifier ["" (n\encode id)])))))) (do <>.monad - [_ (<t>.this "\k<") + [_ (<text>.this "\k<") captured_name name_part^ - _ (<t>.this ">")] + _ (<text>.this ">")] (in (` ((~! ..copy) (~ (code.identifier ["" captured_name])))))))) (def: (re_simple^ current_module) @@ -216,7 +217,7 @@ (-> Text (Parser Code)) (do <>.monad [base (re_simple^ current_module) - quantifier (<t>.one_of "?*+")] + quantifier (<text>.one_of "?*+")] (case quantifier "?" (in (` (<>.default "" (~ base)))) @@ -229,26 +230,33 @@ (in (` ((~! join_text^) (<>.many (~ base))))) ))) +(exception: #export (incorrect_quantification {from Nat} {to Nat}) + (exception.report + ["Input" (format (%.nat from) "," (%.nat to))] + ["Should be" (format (%.nat to) "," (%.nat from))])) + (def: (re_counted_quantified^ current_module) (-> Text (Parser Code)) (do {! <>.monad} [base (re_simple^ current_module)] - (<t>.enclosed ["{" "}"] - ($_ <>.either - (do ! - [[from to] (<>.and number^ (<>.after (<t>.this ",") number^))] - (in (` ((~! join_text^) (<>.between (~ (code.nat from)) - (~ (code.nat to)) - (~ base)))))) - (do ! - [limit (<>.after (<t>.this ",") number^)] - (in (` ((~! join_text^) (<>.at_most (~ (code.nat limit)) (~ base)))))) - (do ! - [limit (<>.before (<t>.this ",") number^)] - (in (` ((~! join_text^) (<>.at_least (~ (code.nat limit)) (~ base)))))) - (do ! - [limit number^] - (in (` ((~! join_text^) (<>.exactly (~ (code.nat limit)) (~ base)))))))))) + (<| (<text>.enclosed ["{" "}"]) + ($_ <>.either + (do ! + [[from to] (<>.and number^ (<>.after (<text>.this ",") number^)) + _ (<>.assert (exception.construct ..incorrect_quantification [from to]) + (n.<= to from))] + (in (` ((~! join_text^) (<>.between (~ (code.nat from)) + (~ (code.nat (n.- from to))) + (~ base)))))) + (do ! + [limit (<>.after (<text>.this ",") number^)] + (in (` ((~! join_text^) (<>.at_most (~ (code.nat limit)) (~ base)))))) + (do ! + [limit (<>.before (<text>.this ",") number^)] + (in (` ((~! join_text^) (<>.at_least (~ (code.nat limit)) (~ base)))))) + (do ! + [limit number^] + (in (` ((~! join_text^) (<>.exactly (~ (code.nat limit)) (~ base)))))))))) (def: (re_quantified^ current_module) (-> Text (Parser Code)) @@ -364,7 +372,7 @@ (do <>.monad [#let [sub^ (re_sequential^ capturing? re_scoped^ current_module)] head sub^ - tail (<>.some (<>.after (<t>.this "|") sub^))] + tail (<>.some (<>.after (<text>.this "|") sub^))] (if (list.empty? tail) (in head) (in [(list\fold n.max (product.left head) (list\map product.left tail)) @@ -378,31 +386,31 @@ (-> Text (Parser [Re_Group Code])) ($_ <>.either (do <>.monad - [_ (<t>.this "(?:") + [_ (<text>.this "(?:") [_ scoped] (re_alternative^ #0 re_scoped^ current_module) - _ (<t>.this ")")] + _ (<text>.this ")")] (in [#Non_Capturing scoped])) (do <>.monad [complex (re_complex^ current_module)] (in [#Non_Capturing complex])) (do <>.monad - [_ (<t>.this "(?<") + [_ (<text>.this "(?<") captured_name name_part^ - _ (<t>.this ">") + _ (<text>.this ">") [num_captures pattern] (re_alternative^ #1 re_scoped^ current_module) - _ (<t>.this ")")] + _ (<text>.this ")")] (in [(#Capturing [(#.Some captured_name) num_captures]) pattern])) (do <>.monad - [_ (<t>.this "(") + [_ (<text>.this "(") [num_captures pattern] (re_alternative^ #1 re_scoped^ current_module) - _ (<t>.this ")")] + _ (<text>.this ")")] (in [(#Capturing [#.None num_captures]) pattern])))) (def: (regex^ current_module) (-> Text (Parser Code)) (\ <>.monad map product.right (re_alternative^ #1 re_scoped^ current_module))) -(syntax: #export (regex {pattern <c>.text}) +(syntax: #export (regex {pattern <code>.text}) {#.doc (doc "Create lexers using regular-expression syntax." "For example:" @@ -463,8 +471,8 @@ )} (do meta.monad [current_module meta.current_module_name] - (case (<t>.run (regex^ current_module) - pattern) + (case (<text>.run (regex^ current_module) + pattern) (#try.Failure error) (meta.failure (format "Error while parsing regular-expression:" //.new_line error)) @@ -473,9 +481,9 @@ (in (list regex)) ))) -(syntax: #export (^regex {[pattern bindings] (<c>.form (<>.and <c>.text (<>.maybe <c>.any)))} +(syntax: #export (^regex {[pattern bindings] (<code>.form (<>.and <code>.text (<>.maybe <code>.any)))} body - {branches (<>.many <c>.any)}) + {branches (<>.many <code>.any)}) {#.doc (doc "Allows you to test text against regular expressions." (case some_text (^regex "(\d{3})-(\d{3})-(\d{4})" @@ -489,7 +497,7 @@ do_something_else))} (with_gensyms [g!temp] (in (list& (` (^multi (~ g!temp) - {((~! <t>.run) (..regex (~ (code.text pattern))) (~ g!temp)) + {((~! <text>.run) (..regex (~ (code.text pattern))) (~ g!temp)) (#try.Success (~ (maybe.default g!temp bindings)))})) body branches)))) |