aboutsummaryrefslogtreecommitdiff
path: root/stdlib/source/library/lux/data/text/regex.lux
diff options
context:
space:
mode:
Diffstat (limited to 'stdlib/source/library/lux/data/text/regex.lux')
-rw-r--r--stdlib/source/library/lux/data/text/regex.lux186
1 files changed, 97 insertions, 89 deletions
diff --git a/stdlib/source/library/lux/data/text/regex.lux b/stdlib/source/library/lux/data/text/regex.lux
index 63aca69fb..691fccad7 100644
--- a/stdlib/source/library/lux/data/text/regex.lux
+++ b/stdlib/source/library/lux/data/text/regex.lux
@@ -6,9 +6,10 @@
monad]
[control
["." try]
+ ["." exception (#+ exception:)]
["<>" parser ("#\." monad)
- ["<t>" text (#+ Parser)]
- ["<c>" code]]]
+ ["<.>" text (#+ Parser)]
+ ["<.>" code]]]
[data
["." product]
["." maybe]
@@ -25,31 +26,31 @@
(def: regex_char^
(Parser Text)
- (<t>.none_of "\.|&()[]{}"))
+ (<text>.none_of "\.|&()[]{}"))
(def: escaped_char^
(Parser Text)
(do <>.monad
- [? (<>.parses? (<t>.this "\"))]
+ [? (<>.parses? (<text>.this "\"))]
(if ?
- <t>.any
+ <text>.any
regex_char^)))
(def: (refine^ refinement^ base^)
(All [a] (-> (Parser a) (Parser Text) (Parser Text)))
(do <>.monad
[output base^
- _ (<t>.local output refinement^)]
+ _ (<text>.local output refinement^)]
(in output)))
(def: word^
(Parser Text)
- (<>.either <t>.alpha_num
- (<t>.one_of "_")))
+ (<>.either <text>.alpha_num
+ (<text>.one_of "_")))
(def: (copy reference)
(-> Text (Parser Text))
- (<>.after (<t>.this reference) (<>\in reference)))
+ (<>.after (<text>.this reference) (<>\in reference)))
(def: (join_text^ part^)
(-> (Parser (List Text)) (Parser Text))
@@ -59,37 +60,37 @@
(def: name_char^
(Parser Text)
- (<t>.none_of (format "[]{}()s#.<>" //.double_quote)))
+ (<text>.none_of (format "[]{}()s#.<>" //.double_quote)))
(def: name_part^
(Parser Text)
(do <>.monad
- [head (refine^ (<t>.not <t>.decimal)
+ [head (refine^ (<text>.not <text>.decimal)
name_char^)
- tail (<t>.some name_char^)]
+ tail (<text>.some name_char^)]
(in (format head tail))))
(def: (name^ current_module)
(-> Text (Parser Name))
($_ <>.either
- (<>.and (<>\in current_module) (<>.after (<t>.this "..") name_part^))
- (<>.and name_part^ (<>.after (<t>.this ".") name_part^))
- (<>.and (<>\in .prelude_module) (<>.after (<t>.this ".") name_part^))
+ (<>.and (<>\in current_module) (<>.after (<text>.this "..") name_part^))
+ (<>.and name_part^ (<>.after (<text>.this ".") name_part^))
+ (<>.and (<>\in .prelude_module) (<>.after (<text>.this ".") name_part^))
(<>.and (<>\in "") name_part^)))
(def: (re_var^ current_module)
(-> Text (Parser Code))
(do <>.monad
- [name (<t>.enclosed ["\@<" ">"] (name^ current_module))]
+ [name (<text>.enclosed ["\@<" ">"] (name^ current_module))]
(in (` (: (Parser Text) (~ (code.identifier name)))))))
(def: re_range^
(Parser Code)
(do {! <>.monad}
[from (|> regex_char^ (\ ! map (|>> (//.nth 0) maybe.assume)))
- _ (<t>.this "-")
+ _ (<text>.this "-")
to (|> regex_char^ (\ ! map (|>> (//.nth 0) maybe.assume)))]
- (in (` (<t>.range (~ (code.nat from)) (~ (code.nat to)))))))
+ (in (` (<text>.range (~ (code.nat from)) (~ (code.nat to)))))))
(def: re_char^
(Parser Code)
@@ -100,18 +101,18 @@
(def: re_options^
(Parser Code)
(do <>.monad
- [options (<t>.many escaped_char^)]
- (in (` (<t>.one_of (~ (code.text options)))))))
+ [options (<text>.many escaped_char^)]
+ (in (` (<text>.one_of (~ (code.text options)))))))
(def: re_user_class^'
(Parser Code)
(do <>.monad
- [negate? (<>.maybe (<t>.this "^"))
+ [negate? (<>.maybe (<text>.this "^"))
parts (<>.many ($_ <>.either
re_range^
re_options^))]
(in (case negate?
- (#.Some _) (` (<t>.not ($_ <>.either (~+ parts))))
+ (#.Some _) (` (<text>.not ($_ <>.either (~+ parts))))
#.None (` ($_ <>.either (~+ parts)))))))
(def: re_user_class^
@@ -119,7 +120,7 @@
(do <>.monad
[_ (in [])
init re_user_class^'
- rest (<>.some (<>.after (<t>.this "&&") (<t>.enclosed ["[" "]"] re_user_class^')))]
+ rest (<>.some (<>.after (<text>.this "&&") (<text>.enclosed ["[" "]"] re_user_class^')))]
(in (list\fold (function (_ refinement base)
(` ((~! refine^) (~ refinement) (~ base))))
init
@@ -127,80 +128,80 @@
(def: blank^
(Parser Text)
- (<t>.one_of (format " " //.tab)))
+ (<text>.one_of (format " " //.tab)))
(def: ascii^
(Parser Text)
- (<t>.range (hex "0") (hex "7F")))
+ (<text>.range (hex "0") (hex "7F")))
(def: control^
(Parser Text)
- (<>.either (<t>.range (hex "0") (hex "1F"))
- (<t>.one_of (//.of_code (hex "7F")))))
+ (<>.either (<text>.range (hex "0") (hex "1F"))
+ (<text>.one_of (//.of_code (hex "7F")))))
(def: punct^
(Parser Text)
- (<t>.one_of (format "!#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
- //.double_quote)))
+ (<text>.one_of (format "!#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
+ //.double_quote)))
(def: graph^
(Parser Text)
- (<>.either punct^ <t>.alpha_num))
+ (<>.either punct^ <text>.alpha_num))
(def: print^
(Parser Text)
(<>.either graph^
- (<t>.one_of (//.of_code (hex "20")))))
+ (<text>.one_of (//.of_code (hex "20")))))
(def: re_system_class^
(Parser Code)
(do <>.monad
[]
($_ <>.either
- (<>.after (<t>.this ".") (in (` <t>.any)))
- (<>.after (<t>.this "\d") (in (` <t>.decimal)))
- (<>.after (<t>.this "\D") (in (` (<t>.not <t>.decimal))))
- (<>.after (<t>.this "\s") (in (` <t>.space)))
- (<>.after (<t>.this "\S") (in (` (<t>.not <t>.space))))
- (<>.after (<t>.this "\w") (in (` (~! word^))))
- (<>.after (<t>.this "\W") (in (` (<t>.not (~! word^)))))
-
- (<>.after (<t>.this "\p{Lower}") (in (` <t>.lower)))
- (<>.after (<t>.this "\p{Upper}") (in (` <t>.upper)))
- (<>.after (<t>.this "\p{Alpha}") (in (` <t>.alpha)))
- (<>.after (<t>.this "\p{Digit}") (in (` <t>.decimal)))
- (<>.after (<t>.this "\p{Alnum}") (in (` <t>.alpha_num)))
- (<>.after (<t>.this "\p{Space}") (in (` <t>.space)))
- (<>.after (<t>.this "\p{HexDigit}") (in (` <t>.hexadecimal)))
- (<>.after (<t>.this "\p{OctDigit}") (in (` <t>.octal)))
- (<>.after (<t>.this "\p{Blank}") (in (` (~! blank^))))
- (<>.after (<t>.this "\p{ASCII}") (in (` (~! ascii^))))
- (<>.after (<t>.this "\p{Contrl}") (in (` (~! control^))))
- (<>.after (<t>.this "\p{Punct}") (in (` (~! punct^))))
- (<>.after (<t>.this "\p{Graph}") (in (` (~! graph^))))
- (<>.after (<t>.this "\p{Print}") (in (` (~! print^))))
+ (<>.after (<text>.this ".") (in (` <text>.any)))
+ (<>.after (<text>.this "\d") (in (` <text>.decimal)))
+ (<>.after (<text>.this "\D") (in (` (<text>.not <text>.decimal))))
+ (<>.after (<text>.this "\s") (in (` <text>.space)))
+ (<>.after (<text>.this "\S") (in (` (<text>.not <text>.space))))
+ (<>.after (<text>.this "\w") (in (` (~! word^))))
+ (<>.after (<text>.this "\W") (in (` (<text>.not (~! word^)))))
+
+ (<>.after (<text>.this "\p{Lower}") (in (` <text>.lower)))
+ (<>.after (<text>.this "\p{Upper}") (in (` <text>.upper)))
+ (<>.after (<text>.this "\p{Alpha}") (in (` <text>.alpha)))
+ (<>.after (<text>.this "\p{Digit}") (in (` <text>.decimal)))
+ (<>.after (<text>.this "\p{Alnum}") (in (` <text>.alpha_num)))
+ (<>.after (<text>.this "\p{Space}") (in (` <text>.space)))
+ (<>.after (<text>.this "\p{HexDigit}") (in (` <text>.hexadecimal)))
+ (<>.after (<text>.this "\p{OctDigit}") (in (` <text>.octal)))
+ (<>.after (<text>.this "\p{Blank}") (in (` (~! blank^))))
+ (<>.after (<text>.this "\p{ASCII}") (in (` (~! ascii^))))
+ (<>.after (<text>.this "\p{Contrl}") (in (` (~! control^))))
+ (<>.after (<text>.this "\p{Punct}") (in (` (~! punct^))))
+ (<>.after (<text>.this "\p{Graph}") (in (` (~! graph^))))
+ (<>.after (<text>.this "\p{Print}") (in (` (~! print^))))
)))
(def: re_class^
(Parser Code)
(<>.either re_system_class^
- (<t>.enclosed ["[" "]"] re_user_class^)))
+ (<text>.enclosed ["[" "]"] re_user_class^)))
(def: number^
(Parser Nat)
- (|> (<t>.many <t>.decimal)
+ (|> (<text>.many <text>.decimal)
(<>.codec n.decimal)))
(def: re_back_reference^
(Parser Code)
(<>.either (do <>.monad
- [_ (<t>.this "\")
+ [_ (<text>.this "\")
id number^]
(in (` ((~! ..copy) (~ (code.identifier ["" (n\encode id)]))))))
(do <>.monad
- [_ (<t>.this "\k<")
+ [_ (<text>.this "\k<")
captured_name name_part^
- _ (<t>.this ">")]
+ _ (<text>.this ">")]
(in (` ((~! ..copy) (~ (code.identifier ["" captured_name]))))))))
(def: (re_simple^ current_module)
@@ -216,7 +217,7 @@
(-> Text (Parser Code))
(do <>.monad
[base (re_simple^ current_module)
- quantifier (<t>.one_of "?*+")]
+ quantifier (<text>.one_of "?*+")]
(case quantifier
"?"
(in (` (<>.default "" (~ base))))
@@ -229,26 +230,33 @@
(in (` ((~! join_text^) (<>.many (~ base)))))
)))
+(exception: #export (incorrect_quantification {from Nat} {to Nat})
+ (exception.report
+ ["Input" (format (%.nat from) "," (%.nat to))]
+ ["Should be" (format (%.nat to) "," (%.nat from))]))
+
(def: (re_counted_quantified^ current_module)
(-> Text (Parser Code))
(do {! <>.monad}
[base (re_simple^ current_module)]
- (<t>.enclosed ["{" "}"]
- ($_ <>.either
- (do !
- [[from to] (<>.and number^ (<>.after (<t>.this ",") number^))]
- (in (` ((~! join_text^) (<>.between (~ (code.nat from))
- (~ (code.nat to))
- (~ base))))))
- (do !
- [limit (<>.after (<t>.this ",") number^)]
- (in (` ((~! join_text^) (<>.at_most (~ (code.nat limit)) (~ base))))))
- (do !
- [limit (<>.before (<t>.this ",") number^)]
- (in (` ((~! join_text^) (<>.at_least (~ (code.nat limit)) (~ base))))))
- (do !
- [limit number^]
- (in (` ((~! join_text^) (<>.exactly (~ (code.nat limit)) (~ base))))))))))
+ (<| (<text>.enclosed ["{" "}"])
+ ($_ <>.either
+ (do !
+ [[from to] (<>.and number^ (<>.after (<text>.this ",") number^))
+ _ (<>.assert (exception.construct ..incorrect_quantification [from to])
+ (n.<= to from))]
+ (in (` ((~! join_text^) (<>.between (~ (code.nat from))
+ (~ (code.nat (n.- from to)))
+ (~ base))))))
+ (do !
+ [limit (<>.after (<text>.this ",") number^)]
+ (in (` ((~! join_text^) (<>.at_most (~ (code.nat limit)) (~ base))))))
+ (do !
+ [limit (<>.before (<text>.this ",") number^)]
+ (in (` ((~! join_text^) (<>.at_least (~ (code.nat limit)) (~ base))))))
+ (do !
+ [limit number^]
+ (in (` ((~! join_text^) (<>.exactly (~ (code.nat limit)) (~ base))))))))))
(def: (re_quantified^ current_module)
(-> Text (Parser Code))
@@ -364,7 +372,7 @@
(do <>.monad
[#let [sub^ (re_sequential^ capturing? re_scoped^ current_module)]
head sub^
- tail (<>.some (<>.after (<t>.this "|") sub^))]
+ tail (<>.some (<>.after (<text>.this "|") sub^))]
(if (list.empty? tail)
(in head)
(in [(list\fold n.max (product.left head) (list\map product.left tail))
@@ -378,31 +386,31 @@
(-> Text (Parser [Re_Group Code]))
($_ <>.either
(do <>.monad
- [_ (<t>.this "(?:")
+ [_ (<text>.this "(?:")
[_ scoped] (re_alternative^ #0 re_scoped^ current_module)
- _ (<t>.this ")")]
+ _ (<text>.this ")")]
(in [#Non_Capturing scoped]))
(do <>.monad
[complex (re_complex^ current_module)]
(in [#Non_Capturing complex]))
(do <>.monad
- [_ (<t>.this "(?<")
+ [_ (<text>.this "(?<")
captured_name name_part^
- _ (<t>.this ">")
+ _ (<text>.this ">")
[num_captures pattern] (re_alternative^ #1 re_scoped^ current_module)
- _ (<t>.this ")")]
+ _ (<text>.this ")")]
(in [(#Capturing [(#.Some captured_name) num_captures]) pattern]))
(do <>.monad
- [_ (<t>.this "(")
+ [_ (<text>.this "(")
[num_captures pattern] (re_alternative^ #1 re_scoped^ current_module)
- _ (<t>.this ")")]
+ _ (<text>.this ")")]
(in [(#Capturing [#.None num_captures]) pattern]))))
(def: (regex^ current_module)
(-> Text (Parser Code))
(\ <>.monad map product.right (re_alternative^ #1 re_scoped^ current_module)))
-(syntax: #export (regex {pattern <c>.text})
+(syntax: #export (regex {pattern <code>.text})
{#.doc (doc "Create lexers using regular-expression syntax."
"For example:"
@@ -463,8 +471,8 @@
)}
(do meta.monad
[current_module meta.current_module_name]
- (case (<t>.run (regex^ current_module)
- pattern)
+ (case (<text>.run (regex^ current_module)
+ pattern)
(#try.Failure error)
(meta.failure (format "Error while parsing regular-expression:" //.new_line
error))
@@ -473,9 +481,9 @@
(in (list regex))
)))
-(syntax: #export (^regex {[pattern bindings] (<c>.form (<>.and <c>.text (<>.maybe <c>.any)))}
+(syntax: #export (^regex {[pattern bindings] (<code>.form (<>.and <code>.text (<>.maybe <code>.any)))}
body
- {branches (<>.many <c>.any)})
+ {branches (<>.many <code>.any)})
{#.doc (doc "Allows you to test text against regular expressions."
(case some_text
(^regex "(\d{3})-(\d{3})-(\d{4})"
@@ -489,7 +497,7 @@
do_something_else))}
(with_gensyms [g!temp]
(in (list& (` (^multi (~ g!temp)
- {((~! <t>.run) (..regex (~ (code.text pattern))) (~ g!temp))
+ {((~! <text>.run) (..regex (~ (code.text pattern))) (~ g!temp))
(#try.Success (~ (maybe.default g!temp bindings)))}))
body
branches))))