aboutsummaryrefslogtreecommitdiff
path: root/stdlib/source/test/lux/data/text/regex.lux
diff options
context:
space:
mode:
Diffstat (limited to 'stdlib/source/test/lux/data/text/regex.lux')
-rw-r--r--stdlib/source/test/lux/data/text/regex.lux466
1 files changed, 235 insertions, 231 deletions
diff --git a/stdlib/source/test/lux/data/text/regex.lux b/stdlib/source/test/lux/data/text/regex.lux
index ffa5612da..059adff84 100644
--- a/stdlib/source/test/lux/data/text/regex.lux
+++ b/stdlib/source/test/lux/data/text/regex.lux
@@ -1,36 +1,36 @@
(.module:
[lux #*
+ data/text/format
+ ["_" test (#+ Test)]
[control
[monad (#+ do Monad)]
pipe
["p" parser]]
[data
[number (#+ hex)]
- ["." text ("#;." equivalence)
- format
- ["." lexer (#+ Lexer)]
- ["&" regex]]]
+ ["." text ("#@." equivalence)
+ ["." lexer (#+ Lexer)]]]
[math
["r" random]]
[macro
["s" syntax (#+ syntax:)]]]
- lux/test)
+ {1
+ ["." /]})
-## [Utils]
(def: (should-pass regex input)
(-> (Lexer Text) Text Bit)
(|> (lexer.run input regex)
(case> (#.Right parsed)
- (text;= parsed input)
+ (text@= parsed input)
_
#0)))
-(def: (should-passT test regex input)
+(def: (text-should-pass test regex input)
(-> Text (Lexer Text) Text Bit)
(|> (lexer.run input regex)
(case> (#.Right parsed)
- (text;= test parsed)
+ (text@= test parsed)
_
#0)))
@@ -48,239 +48,243 @@
(~' _)
#0))))))
-## [Tests]
-(context: "Regular Expressions [Basics]"
- (test "Can parse character literals."
- (and (should-pass (&.regex "a") "a")
- (should-fail (&.regex "a") ".")
- (should-pass (&.regex "\.") ".")
- (should-fail (&.regex "\.") "a"))))
-
-(context: "Regular Expressions [System character classes]"
- ($_ seq
- (test "Can parse anything."
- (should-pass (&.regex ".") "a"))
-
- (test "Can parse digits."
- (and (should-pass (&.regex "\d") "0")
- (should-fail (&.regex "\d") "m")))
-
- (test "Can parse non digits."
- (and (should-pass (&.regex "\D") "m")
- (should-fail (&.regex "\D") "0")))
-
- (test "Can parse white-space."
- (and (should-pass (&.regex "\s") " ")
- (should-fail (&.regex "\s") "m")))
-
- (test "Can parse non white-space."
- (and (should-pass (&.regex "\S") "m")
- (should-fail (&.regex "\S") " ")))
-
- (test "Can parse word characters."
- (and (should-pass (&.regex "\w") "_")
- (should-fail (&.regex "\w") "^")))
-
- (test "Can parse non word characters."
- (and (should-pass (&.regex "\W") ".")
- (should-fail (&.regex "\W") "a")))
+(def: basics
+ Test
+ (_.test "Can parse character literals."
+ (and (should-pass (/.regex "a") "a")
+ (should-fail (/.regex "a") ".")
+ (should-pass (/.regex "\.") ".")
+ (should-fail (/.regex "\.") "a"))))
+
+(def: system-character-classes
+ Test
+ ($_ _.and
+ (_.test "Can parse anything."
+ (should-pass (/.regex ".") "a"))
+
+ (_.test "Can parse digits."
+ (and (should-pass (/.regex "\d") "0")
+ (should-fail (/.regex "\d") "m")))
+
+ (_.test "Can parse non digits."
+ (and (should-pass (/.regex "\D") "m")
+ (should-fail (/.regex "\D") "0")))
+
+ (_.test "Can parse white-space."
+ (and (should-pass (/.regex "\s") " ")
+ (should-fail (/.regex "\s") "m")))
+
+ (_.test "Can parse non white-space."
+ (and (should-pass (/.regex "\S") "m")
+ (should-fail (/.regex "\S") " ")))
+
+ (_.test "Can parse word characters."
+ (and (should-pass (/.regex "\w") "_")
+ (should-fail (/.regex "\w") "^")))
+
+ (_.test "Can parse non word characters."
+ (and (should-pass (/.regex "\W") ".")
+ (should-fail (/.regex "\W") "a")))
))
-(context: "Regular Expressions [Special system character classes : Part 1]"
- ($_ seq
- (test "Can parse using special character classes."
- (and (and (should-pass (&.regex "\p{Lower}") "m")
- (should-fail (&.regex "\p{Lower}") "M"))
-
- (and (should-pass (&.regex "\p{Upper}") "M")
- (should-fail (&.regex "\p{Upper}") "m"))
-
- (and (should-pass (&.regex "\p{Alpha}") "M")
- (should-fail (&.regex "\p{Alpha}") "0"))
-
- (and (should-pass (&.regex "\p{Digit}") "1")
- (should-fail (&.regex "\p{Digit}") "n"))
-
- (and (should-pass (&.regex "\p{Alnum}") "1")
- (should-fail (&.regex "\p{Alnum}") "."))
-
- (and (should-pass (&.regex "\p{Space}") " ")
- (should-fail (&.regex "\p{Space}") "."))
- ))
- ))
-
-(context: "Regular Expressions [Special system character classes : Part 2]"
- ($_ seq
- (test "Can parse using special character classes."
- (and (and (should-pass (&.regex "\p{HexDigit}") "a")
- (should-fail (&.regex "\p{HexDigit}") "."))
-
- (and (should-pass (&.regex "\p{OctDigit}") "6")
- (should-fail (&.regex "\p{OctDigit}") "."))
-
- (and (should-pass (&.regex "\p{Blank}") text.tab)
- (should-fail (&.regex "\p{Blank}") "."))
-
- (and (should-pass (&.regex "\p{ASCII}") text.tab)
- (should-fail (&.regex "\p{ASCII}") (text.from-code (hex "1234"))))
-
- (and (should-pass (&.regex "\p{Contrl}") (text.from-code (hex "12")))
- (should-fail (&.regex "\p{Contrl}") "a"))
-
- (and (should-pass (&.regex "\p{Punct}") "@")
- (should-fail (&.regex "\p{Punct}") "a"))
-
- (and (should-pass (&.regex "\p{Graph}") "@")
- (should-fail (&.regex "\p{Graph}") " "))
-
- (and (should-pass (&.regex "\p{Print}") (text.from-code (hex "20")))
- (should-fail (&.regex "\p{Print}") (text.from-code (hex "1234"))))
- ))
- ))
-
-(context: "Regular Expressions [Custom character classes : Part 1]"
- ($_ seq
- (test "Can parse using custom character classes."
- (and (should-pass (&.regex "[abc]") "a")
- (should-fail (&.regex "[abc]") "m")))
-
- (test "Can parse using character ranges."
- (and (should-pass (&.regex "[a-z]") "a")
- (should-pass (&.regex "[a-z]") "m")
- (should-pass (&.regex "[a-z]") "z")))
-
- (test "Can combine character ranges."
- (and (should-pass (&.regex "[a-zA-Z]") "a")
- (should-pass (&.regex "[a-zA-Z]") "m")
- (should-pass (&.regex "[a-zA-Z]") "z")
- (should-pass (&.regex "[a-zA-Z]") "A")
- (should-pass (&.regex "[a-zA-Z]") "M")
- (should-pass (&.regex "[a-zA-Z]") "Z")))
- ))
-
-(context: "Regular Expressions [Custom character classes : Part 2]"
- ($_ seq
- (test "Can negate custom character classes."
- (and (should-fail (&.regex "[^abc]") "a")
- (should-pass (&.regex "[^abc]") "m")))
-
- (test "Can negate character ranges.."
- (and (should-fail (&.regex "[^a-z]") "a")
- (should-pass (&.regex "[^a-z]") "0")))
-
- (test "Can parse negate combinations of character ranges."
- (and (should-fail (&.regex "[^a-zA-Z]") "a")
- (should-pass (&.regex "[^a-zA-Z]") "0")))
+(def: special-system-character-classes
+ Test
+ ($_ _.and
+ (_.test "Lower-case."
+ (and (should-pass (/.regex "\p{Lower}") "m")
+ (should-fail (/.regex "\p{Lower}") "M")))
+ (_.test "Upper-case."
+ (and (should-pass (/.regex "\p{Upper}") "M")
+ (should-fail (/.regex "\p{Upper}") "m")))
+ (_.test "Alphabetic."
+ (and (should-pass (/.regex "\p{Alpha}") "M")
+ (should-fail (/.regex "\p{Alpha}") "0")))
+ (_.test "Numeric digits."
+ (and (should-pass (/.regex "\p{Digit}") "1")
+ (should-fail (/.regex "\p{Digit}") "n")))
+ (_.test "Alphanumeric."
+ (and (should-pass (/.regex "\p{Alnum}") "1")
+ (should-fail (/.regex "\p{Alnum}") ".")))
+ (_.test "Whitespace."
+ (and (should-pass (/.regex "\p{Space}") " ")
+ (should-fail (/.regex "\p{Space}") ".")))
+ (_.test "Hexadecimal."
+ (and (should-pass (/.regex "\p{HexDigit}") "a")
+ (should-fail (/.regex "\p{HexDigit}") ".")))
+ (_.test "Octal."
+ (and (should-pass (/.regex "\p{OctDigit}") "6")
+ (should-fail (/.regex "\p{OctDigit}") ".")))
+ (_.test "Blank."
+ (and (should-pass (/.regex "\p{Blank}") text.tab)
+ (should-fail (/.regex "\p{Blank}") ".")))
+ (_.test "ASCII."
+ (and (should-pass (/.regex "\p{ASCII}") text.tab)
+ (should-fail (/.regex "\p{ASCII}") (text.from-code (hex "1234")))))
+ (_.test "Control characters."
+ (and (should-pass (/.regex "\p{Contrl}") (text.from-code (hex "12")))
+ (should-fail (/.regex "\p{Contrl}") "a")))
+ (_.test "Punctuation."
+ (and (should-pass (/.regex "\p{Punct}") "@")
+ (should-fail (/.regex "\p{Punct}") "a")))
+ (_.test "Graph."
+ (and (should-pass (/.regex "\p{Graph}") "@")
+ (should-fail (/.regex "\p{Graph}") " ")))
+ (_.test "Print."
+ (and (should-pass (/.regex "\p{Print}") (text.from-code (hex "20")))
+ (should-fail (/.regex "\p{Print}") (text.from-code (hex "1234")))))
))
-(context: "Regular Expressions [Custom character classes : Part 3]"
- ($_ seq
- (test "Can make custom character classes more specific."
- (and (let [RE (&.regex "[a-z&&[def]]")]
- (and (should-fail RE "a")
- (should-pass RE "d")))
-
- (let [RE (&.regex "[a-z&&[^bc]]")]
- (and (should-pass RE "a")
- (should-fail RE "b")))
-
- (let [RE (&.regex "[a-z&&[^m-p]]")]
- (and (should-pass RE "a")
- (should-fail RE "m")
- (should-fail RE "p")))))
+(def: custom-character-classes
+ Test
+ ($_ _.and
+ (_.test "Can parse using custom character classes."
+ (and (should-pass (/.regex "[abc]") "a")
+ (should-fail (/.regex "[abc]") "m")))
+ (_.test "Can parse using character ranges."
+ (and (should-pass (/.regex "[a-z]") "a")
+ (should-pass (/.regex "[a-z]") "m")
+ (should-pass (/.regex "[a-z]") "z")))
+ (_.test "Can combine character ranges."
+ (and (should-pass (/.regex "[a-zA-Z]") "a")
+ (should-pass (/.regex "[a-zA-Z]") "m")
+ (should-pass (/.regex "[a-zA-Z]") "z")
+ (should-pass (/.regex "[a-zA-Z]") "A")
+ (should-pass (/.regex "[a-zA-Z]") "M")
+ (should-pass (/.regex "[a-zA-Z]") "Z")))
+ (_.test "Can negate custom character classes."
+ (and (should-fail (/.regex "[^abc]") "a")
+ (should-pass (/.regex "[^abc]") "m")))
+ (_.test "Can negate character ranges.."
+ (and (should-fail (/.regex "[^a-z]") "a")
+ (should-pass (/.regex "[^a-z]") "0")))
+ (_.test "Can parse negate combinations of character ranges."
+ (and (should-fail (/.regex "[^a-zA-Z]") "a")
+ (should-pass (/.regex "[^a-zA-Z]") "0")))
+ (_.test "Can make custom character classes more specific."
+ (and (let [RE (/.regex "[a-z&&[def]]")]
+ (and (should-fail RE "a")
+ (should-pass RE "d")))
+ (let [RE (/.regex "[a-z&&[^bc]]")]
+ (and (should-pass RE "a")
+ (should-fail RE "b")))
+ (let [RE (/.regex "[a-z&&[^m-p]]")]
+ (and (should-pass RE "a")
+ (should-fail RE "m")
+ (should-fail RE "p")))))
))
-(context: "Regular Expressions [Reference]"
- (let [number (&.regex "\d+")]
- (test "Can build complex regexs by combining simpler ones."
- (should-check ["809-345-6789" "809" "345" "6789"] (&.regex "(\@<number>)-(\@<number>)-(\@<number>)") "809-345-6789"))))
-
-(context: "Regular Expressions [Fuzzy Quantifiers]"
- ($_ seq
- (test "Can sequentially combine patterns."
- (should-passT "aa" (&.regex "aa") "aa"))
-
- (test "Can match patterns optionally."
- (and (should-passT "a" (&.regex "a?") "a")
- (should-passT "" (&.regex "a?") "")))
-
- (test "Can match a pattern 0 or more times."
- (and (should-passT "aaa" (&.regex "a*") "aaa")
- (should-passT "" (&.regex "a*") "")))
-
- (test "Can match a pattern 1 or more times."
- (and (should-passT "aaa" (&.regex "a+") "aaa")
- (should-passT "a" (&.regex "a+") "a")
- (should-fail (&.regex "a+") "")))
+(def: references
+ Test
+ (let [number (/.regex "\d+")]
+ (_.test "Can build complex regexs by combining simpler ones."
+ (should-check ["809-345-6789" "809" "345" "6789"] (/.regex "(\@<number>)-(\@<number>)-(\@<number>)") "809-345-6789"))))
+
+(def: fuzzy-quantifiers
+ Test
+ ($_ _.and
+ (_.test "Can sequentially combine patterns."
+ (text-should-pass "aa" (/.regex "aa") "aa"))
+
+ (_.test "Can match patterns optionally."
+ (and (text-should-pass "a" (/.regex "a?") "a")
+ (text-should-pass "" (/.regex "a?") "")))
+
+ (_.test "Can match a pattern 0 or more times."
+ (and (text-should-pass "aaa" (/.regex "a*") "aaa")
+ (text-should-pass "" (/.regex "a*") "")))
+
+ (_.test "Can match a pattern 1 or more times."
+ (and (text-should-pass "aaa" (/.regex "a+") "aaa")
+ (text-should-pass "a" (/.regex "a+") "a")
+ (should-fail (/.regex "a+") "")))
))
-(context: "Regular Expressions [Crisp Quantifiers]"
- ($_ seq
- (test "Can match a pattern N times."
- (and (should-passT "aa" (&.regex "a{2}") "aa")
- (should-passT "a" (&.regex "a{1}") "a")
- (should-fail (&.regex "a{3}") "aa")))
-
- (test "Can match a pattern at-least N times."
- (and (should-passT "aa" (&.regex "a{1,}") "aa")
- (should-passT "aa" (&.regex "a{2,}") "aa")
- (should-fail (&.regex "a{3,}") "aa")))
-
- (test "Can match a pattern at-most N times."
- (and (should-passT "aa" (&.regex "a{,2}") "aa")
- (should-passT "aa" (&.regex "a{,3}") "aa")))
-
- (test "Can match a pattern between N and M times."
- (and (should-passT "a" (&.regex "a{1,2}") "a")
- (should-passT "aa" (&.regex "a{1,2}") "aa")))
+(def: crisp-quantifiers
+ Test
+ ($_ _.and
+ (_.test "Can match a pattern N times."
+ (and (text-should-pass "aa" (/.regex "a{2}") "aa")
+ (text-should-pass "a" (/.regex "a{1}") "a")
+ (should-fail (/.regex "a{3}") "aa")))
+
+ (_.test "Can match a pattern at-least N times."
+ (and (text-should-pass "aa" (/.regex "a{1,}") "aa")
+ (text-should-pass "aa" (/.regex "a{2,}") "aa")
+ (should-fail (/.regex "a{3,}") "aa")))
+
+ (_.test "Can match a pattern at-most N times."
+ (and (text-should-pass "aa" (/.regex "a{,2}") "aa")
+ (text-should-pass "aa" (/.regex "a{,3}") "aa")))
+
+ (_.test "Can match a pattern between N and M times."
+ (and (text-should-pass "a" (/.regex "a{1,2}") "a")
+ (text-should-pass "aa" (/.regex "a{1,2}") "aa")))
))
-(context: "Regular Expressions [Groups]"
- ($_ seq
- (test "Can extract groups of sub-matches specified in a pattern."
- (and (should-check ["abc" "b"] (&.regex "a(.)c") "abc")
- (should-check ["abbbbbc" "bbbbb"] (&.regex "a(b+)c") "abbbbbc")
- (should-check ["809-345-6789" "809" "345" "6789"] (&.regex "(\d{3})-(\d{3})-(\d{4})") "809-345-6789")
- (should-check ["809-345-6789" "809" "6789"] (&.regex "(\d{3})-(?:\d{3})-(\d{4})") "809-345-6789")
- (should-check ["809-809-6789" "809" "6789"] (&.regex "(\d{3})-\0-(\d{4})") "809-809-6789")
- (should-check ["809-809-6789" "809" "6789"] (&.regex "(?<code>\d{3})-\k<code>-(\d{4})") "809-809-6789")
- (should-check ["809-809-6789-6789" "809" "6789"] (&.regex "(?<code>\d{3})-\k<code>-(\d{4})-\0") "809-809-6789-6789")))
-
- (test "Can specify groups within groups."
- (should-check ["809-345-6789" "809" ["345-6789" "345" "6789"]] (&.regex "(\d{3})-((\d{3})-(\d{4}))") "809-345-6789"))
+(def: groups
+ Test
+ ($_ _.and
+ (_.test "Can extract groups of sub-matches specified in a pattern."
+ (and (should-check ["abc" "b"] (/.regex "a(.)c") "abc")
+ (should-check ["abbbbbc" "bbbbb"] (/.regex "a(b+)c") "abbbbbc")
+ (should-check ["809-345-6789" "809" "345" "6789"] (/.regex "(\d{3})-(\d{3})-(\d{4})") "809-345-6789")
+ (should-check ["809-345-6789" "809" "6789"] (/.regex "(\d{3})-(?:\d{3})-(\d{4})") "809-345-6789")
+ (should-check ["809-809-6789" "809" "6789"] (/.regex "(\d{3})-\0-(\d{4})") "809-809-6789")
+ (should-check ["809-809-6789" "809" "6789"] (/.regex "(?<code>\d{3})-\k<code>-(\d{4})") "809-809-6789")
+ (should-check ["809-809-6789-6789" "809" "6789"] (/.regex "(?<code>\d{3})-\k<code>-(\d{4})-\0") "809-809-6789-6789")))
+
+ (_.test "Can specify groups within groups."
+ (should-check ["809-345-6789" "809" ["345-6789" "345" "6789"]] (/.regex "(\d{3})-((\d{3})-(\d{4}))") "809-345-6789"))
))
-(context: "Regular Expressions [Alternation]"
- ($_ seq
- (test "Can specify alternative patterns."
- (and (should-check ["a" (0 [])] (&.regex "a|b") "a")
- (should-check ["b" (1 [])] (&.regex "a|b") "b")
- (should-fail (&.regex "a|b") "c")))
-
- (test "Can have groups within alternations."
- (and (should-check ["abc" (0 ["b" "c"])] (&.regex "a(.)(.)|b(.)(.)") "abc")
- (should-check ["bcd" (1 ["c" "d"])] (&.regex "a(.)(.)|b(.)(.)") "bcd")
- (should-fail (&.regex "a(.)(.)|b(.)(.)") "cde")
-
- (should-check ["809-345-6789" (0 ["809" "345-6789" "345" "6789"])]
- (&.regex "(\d{3})-((\d{3})-(\d{4}))|b(.)d")
- "809-345-6789")))
+(def: alternation
+ Test
+ ($_ _.and
+ (_.test "Can specify alternative patterns."
+ (and (should-check ["a" (0 [])] (/.regex "a|b") "a")
+ (should-check ["b" (1 [])] (/.regex "a|b") "b")
+ (should-fail (/.regex "a|b") "c")))
+
+ (_.test "Can have groups within alternations."
+ (and (should-check ["abc" (0 ["b" "c"])] (/.regex "a(.)(.)|b(.)(.)") "abc")
+ (should-check ["bcd" (1 ["c" "d"])] (/.regex "a(.)(.)|b(.)(.)") "bcd")
+ (should-fail (/.regex "a(.)(.)|b(.)(.)") "cde")
+
+ (should-check ["809-345-6789" (0 ["809" "345-6789" "345" "6789"])]
+ (/.regex "(\d{3})-((\d{3})-(\d{4}))|b(.)d")
+ "809-345-6789")))
))
-(context: "Pattern-matching"
- (<| (times 100)
- (do @
- [sample1 (r.unicode 3)
- sample2 (r.unicode 3)
- sample3 (r.unicode 4)]
- (case (format sample1 "-" sample2 "-" sample3)
- (&.^regex "(.{3})-(.{3})-(.{4})"
- [_ match1 match2 match3])
- (test "Can pattern-match using regular-expressions."
- (and (text;= sample1 match1)
- (text;= sample2 match2)
- (text;= sample3 match3)))
-
- _
- (test "Cannot pattern-match using regular-expressions."
- #0)))))
+(def: pattern-matching
+ Test
+ (do r.monad
+ [sample1 (r.unicode 3)
+ sample2 (r.unicode 3)
+ sample3 (r.unicode 4)]
+ (case (format sample1 "-" sample2 "-" sample3)
+ (/.^regex "(.{3})-(.{3})-(.{4})"
+ [_ match1 match2 match3])
+ (_.test "Can pattern-match using regular-expressions."
+ (and (text@= sample1 match1)
+ (text@= sample2 match2)
+ (text@= sample3 match3)))
+
+ _
+ (_.test "Cannot pattern-match using regular-expressions."
+ #0))))
+
+(def: #export test
+ Test
+ (<| (_.context (%name (name-of /.regex)))
+ ($_ _.and
+ ..basics
+ ..system-character-classes
+ ..special-system-character-classes
+ ..custom-character-classes
+ ..references
+ ..fuzzy-quantifiers
+ ..crisp-quantifiers
+ ..groups
+ ..alternation
+ ..pattern-matching
+ )))