aboutsummaryrefslogtreecommitdiff
path: root/stdlib/source/test/lux/data/text/regex.lux
diff options
context:
space:
mode:
Diffstat (limited to 'stdlib/source/test/lux/data/text/regex.lux')
-rw-r--r--stdlib/source/test/lux/data/text/regex.lux286
1 files changed, 286 insertions, 0 deletions
diff --git a/stdlib/source/test/lux/data/text/regex.lux b/stdlib/source/test/lux/data/text/regex.lux
new file mode 100644
index 000000000..f6bc7d098
--- /dev/null
+++ b/stdlib/source/test/lux/data/text/regex.lux
@@ -0,0 +1,286 @@
+(.module:
+ [lux #*
+ [control
+ [monad (#+ do Monad)]
+ pipe
+ ["p" parser]]
+ [data
+ [number (#+ hex)]
+ ["." text ("text/." equivalence)
+ format
+ ["." lexer (#+ Lexer)]
+ ["&" regex]]]
+ [math
+ ["r" random]]
+ [macro
+ ["s" syntax (#+ syntax:)]]]
+ lux/test)
+
+## [Utils]
+(def: (should-pass regex input)
+ (-> (Lexer Text) Text Bit)
+ (|> (lexer.run input regex)
+ (case> (#.Right parsed)
+ (text/= parsed input)
+
+ _
+ #0)))
+
+(def: (should-passT test regex input)
+ (-> Text (Lexer Text) Text Bit)
+ (|> (lexer.run input regex)
+ (case> (#.Right parsed)
+ (text/= test parsed)
+
+ _
+ #0)))
+
+(def: (should-fail regex input)
+ (All [a] (-> (Lexer a) Text Bit))
+ (|> (lexer.run input regex)
+ (case> (#.Left _) #1 _ #0)))
+
+(syntax: (should-check pattern regex input)
+ (wrap (list (` (|> (lexer.run (~ input) (~ regex))
+ (case> (^ (#.Right (~ pattern)))
+ #1
+
+ (~' _)
+ #0))))))
+
+## [Tests]
+(context: "Regular Expressions [Basics]"
+ (test "Can parse character literals."
+ (and (should-pass (&.regex "a") "a")
+ (should-fail (&.regex "a") ".")
+ (should-pass (&.regex "\.") ".")
+ (should-fail (&.regex "\.") "a"))))
+
+(context: "Regular Expressions [System character classes]"
+ ($_ seq
+ (test "Can parse anything."
+ (should-pass (&.regex ".") "a"))
+
+ (test "Can parse digits."
+ (and (should-pass (&.regex "\d") "0")
+ (should-fail (&.regex "\d") "m")))
+
+ (test "Can parse non digits."
+ (and (should-pass (&.regex "\D") "m")
+ (should-fail (&.regex "\D") "0")))
+
+ (test "Can parse white-space."
+ (and (should-pass (&.regex "\s") " ")
+ (should-fail (&.regex "\s") "m")))
+
+ (test "Can parse non white-space."
+ (and (should-pass (&.regex "\S") "m")
+ (should-fail (&.regex "\S") " ")))
+
+ (test "Can parse word characters."
+ (and (should-pass (&.regex "\w") "_")
+ (should-fail (&.regex "\w") "^")))
+
+ (test "Can parse non word characters."
+ (and (should-pass (&.regex "\W") ".")
+ (should-fail (&.regex "\W") "a")))
+ ))
+
+(context: "Regular Expressions [Special system character classes : Part 1]"
+ ($_ seq
+ (test "Can parse using special character classes."
+ (and (and (should-pass (&.regex "\p{Lower}") "m")
+ (should-fail (&.regex "\p{Lower}") "M"))
+
+ (and (should-pass (&.regex "\p{Upper}") "M")
+ (should-fail (&.regex "\p{Upper}") "m"))
+
+ (and (should-pass (&.regex "\p{Alpha}") "M")
+ (should-fail (&.regex "\p{Alpha}") "0"))
+
+ (and (should-pass (&.regex "\p{Digit}") "1")
+ (should-fail (&.regex "\p{Digit}") "n"))
+
+ (and (should-pass (&.regex "\p{Alnum}") "1")
+ (should-fail (&.regex "\p{Alnum}") "."))
+
+ (and (should-pass (&.regex "\p{Space}") " ")
+ (should-fail (&.regex "\p{Space}") "."))
+ ))
+ ))
+
+(context: "Regular Expressions [Special system character classes : Part 2]"
+ ($_ seq
+ (test "Can parse using special character classes."
+ (and (and (should-pass (&.regex "\p{HexDigit}") "a")
+ (should-fail (&.regex "\p{HexDigit}") "."))
+
+ (and (should-pass (&.regex "\p{OctDigit}") "6")
+ (should-fail (&.regex "\p{OctDigit}") "."))
+
+ (and (should-pass (&.regex "\p{Blank}") text.tab)
+ (should-fail (&.regex "\p{Blank}") "."))
+
+ (and (should-pass (&.regex "\p{ASCII}") text.tab)
+ (should-fail (&.regex "\p{ASCII}") (text.from-code (hex "1234"))))
+
+ (and (should-pass (&.regex "\p{Contrl}") (text.from-code (hex "12")))
+ (should-fail (&.regex "\p{Contrl}") "a"))
+
+ (and (should-pass (&.regex "\p{Punct}") "@")
+ (should-fail (&.regex "\p{Punct}") "a"))
+
+ (and (should-pass (&.regex "\p{Graph}") "@")
+ (should-fail (&.regex "\p{Graph}") " "))
+
+ (and (should-pass (&.regex "\p{Print}") (text.from-code (hex "20")))
+ (should-fail (&.regex "\p{Print}") (text.from-code (hex "1234"))))
+ ))
+ ))
+
+(context: "Regular Expressions [Custom character classes : Part 1]"
+ ($_ seq
+ (test "Can parse using custom character classes."
+ (and (should-pass (&.regex "[abc]") "a")
+ (should-fail (&.regex "[abc]") "m")))
+
+ (test "Can parse using character ranges."
+ (and (should-pass (&.regex "[a-z]") "a")
+ (should-pass (&.regex "[a-z]") "m")
+ (should-pass (&.regex "[a-z]") "z")))
+
+ (test "Can combine character ranges."
+ (and (should-pass (&.regex "[a-zA-Z]") "a")
+ (should-pass (&.regex "[a-zA-Z]") "m")
+ (should-pass (&.regex "[a-zA-Z]") "z")
+ (should-pass (&.regex "[a-zA-Z]") "A")
+ (should-pass (&.regex "[a-zA-Z]") "M")
+ (should-pass (&.regex "[a-zA-Z]") "Z")))
+ ))
+
+(context: "Regular Expressions [Custom character classes : Part 2]"
+ ($_ seq
+ (test "Can negate custom character classes."
+ (and (should-fail (&.regex "[^abc]") "a")
+ (should-pass (&.regex "[^abc]") "m")))
+
+ (test "Can negate character ranges.."
+ (and (should-fail (&.regex "[^a-z]") "a")
+ (should-pass (&.regex "[^a-z]") "0")))
+
+ (test "Can parse negate combinations of character ranges."
+ (and (should-fail (&.regex "[^a-zA-Z]") "a")
+ (should-pass (&.regex "[^a-zA-Z]") "0")))
+ ))
+
+(context: "Regular Expressions [Custom character classes : Part 3]"
+ ($_ seq
+ (test "Can make custom character classes more specific."
+ (and (let [RE (&.regex "[a-z&&[def]]")]
+ (and (should-fail RE "a")
+ (should-pass RE "d")))
+
+ (let [RE (&.regex "[a-z&&[^bc]]")]
+ (and (should-pass RE "a")
+ (should-fail RE "b")))
+
+ (let [RE (&.regex "[a-z&&[^m-p]]")]
+ (and (should-pass RE "a")
+ (should-fail RE "m")
+ (should-fail RE "p")))))
+ ))
+
+(context: "Regular Expressions [Reference]"
+ (let [number (&.regex "\d+")]
+ (test "Can build complex regexs by combining simpler ones."
+ (should-check ["809-345-6789" "809" "345" "6789"] (&.regex "(\@<number>)-(\@<number>)-(\@<number>)") "809-345-6789"))))
+
+(context: "Regular Expressions [Fuzzy Quantifiers]"
+ ($_ seq
+ (test "Can sequentially combine patterns."
+ (should-passT "aa" (&.regex "aa") "aa"))
+
+ (test "Can match patterns optionally."
+ (and (should-passT "a" (&.regex "a?") "a")
+ (should-passT "" (&.regex "a?") "")))
+
+ (test "Can match a pattern 0 or more times."
+ (and (should-passT "aaa" (&.regex "a*") "aaa")
+ (should-passT "" (&.regex "a*") "")))
+
+ (test "Can match a pattern 1 or more times."
+ (and (should-passT "aaa" (&.regex "a+") "aaa")
+ (should-passT "a" (&.regex "a+") "a")
+ (should-fail (&.regex "a+") "")))
+ ))
+
+(context: "Regular Expressions [Crisp Quantifiers]"
+ ($_ seq
+ (test "Can match a pattern N times."
+ (and (should-passT "aa" (&.regex "a{2}") "aa")
+ (should-passT "a" (&.regex "a{1}") "a")
+ (should-fail (&.regex "a{3}") "aa")))
+
+ (test "Can match a pattern at-least N times."
+ (and (should-passT "aa" (&.regex "a{1,}") "aa")
+ (should-passT "aa" (&.regex "a{2,}") "aa")
+ (should-fail (&.regex "a{3,}") "aa")))
+
+ (test "Can match a pattern at-most N times."
+ (and (should-passT "aa" (&.regex "a{,2}") "aa")
+ (should-passT "aa" (&.regex "a{,3}") "aa")))
+
+ (test "Can match a pattern between N and M times."
+ (and (should-passT "a" (&.regex "a{1,2}") "a")
+ (should-passT "aa" (&.regex "a{1,2}") "aa")))
+ ))
+
+(context: "Regular Expressions [Groups]"
+ ($_ seq
+ (test "Can extract groups of sub-matches specified in a pattern."
+ (and (should-check ["abc" "b"] (&.regex "a(.)c") "abc")
+ (should-check ["abbbbbc" "bbbbb"] (&.regex "a(b+)c") "abbbbbc")
+ (should-check ["809-345-6789" "809" "345" "6789"] (&.regex "(\d{3})-(\d{3})-(\d{4})") "809-345-6789")
+ (should-check ["809-345-6789" "809" "6789"] (&.regex "(\d{3})-(?:\d{3})-(\d{4})") "809-345-6789")
+ (should-check ["809-809-6789" "809" "6789"] (&.regex "(\d{3})-\0-(\d{4})") "809-809-6789")
+ (should-check ["809-809-6789" "809" "6789"] (&.regex "(?<code>\d{3})-\k<code>-(\d{4})") "809-809-6789")
+ (should-check ["809-809-6789-6789" "809" "6789"] (&.regex "(?<code>\d{3})-\k<code>-(\d{4})-\0") "809-809-6789-6789")))
+
+ (test "Can specify groups within groups."
+ (should-check ["809-345-6789" "809" ["345-6789" "345" "6789"]] (&.regex "(\d{3})-((\d{3})-(\d{4}))") "809-345-6789"))
+ ))
+
+(context: "Regular Expressions [Alternation]"
+ ($_ seq
+ (test "Can specify alternative patterns."
+ (and (should-check ["a" (0 [])] (&.regex "a|b") "a")
+ (should-check ["b" (1 [])] (&.regex "a|b") "b")
+ (should-fail (&.regex "a|b") "c")))
+
+ (test "Can have groups within alternations."
+ (and (should-check ["abc" (0 ["b" "c"])] (&.regex "a(.)(.)|b(.)(.)") "abc")
+ (should-check ["bcd" (1 ["c" "d"])] (&.regex "a(.)(.)|b(.)(.)") "bcd")
+ (should-fail (&.regex "a(.)(.)|b(.)(.)") "cde")
+
+ (should-check ["809-345-6789" (0 ["809" "345-6789" "345" "6789"])]
+ (&.regex "(\d{3})-((\d{3})-(\d{4}))|b(.)d")
+ "809-345-6789")))
+ ))
+
+(context: "Pattern-matching"
+ (<| (times 100)
+ (do @
+ [sample1 (r.unicode 3)
+ sample2 (r.unicode 3)
+ sample3 (r.unicode 4)]
+ (case (format sample1 "-" sample2 "-" sample3)
+ (&.^regex "(.{3})-(.{3})-(.{4})"
+ [_ match1 match2 match3])
+ (test "Can pattern-match using regular-expressions."
+ (and (text/= sample1 match1)
+ (text/= sample2 match2)
+ (text/= sample3 match3)))
+
+ _
+ (test "Cannot pattern-match using regular-expressions."
+ #0)))))