diff options
Diffstat (limited to 'stdlib/source/test/lux/data/text/regex.lux')
-rw-r--r-- | stdlib/source/test/lux/data/text/regex.lux | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/stdlib/source/test/lux/data/text/regex.lux b/stdlib/source/test/lux/data/text/regex.lux new file mode 100644 index 000000000..f6bc7d098 --- /dev/null +++ b/stdlib/source/test/lux/data/text/regex.lux @@ -0,0 +1,286 @@ +(.module: + [lux #* + [control + [monad (#+ do Monad)] + pipe + ["p" parser]] + [data + [number (#+ hex)] + ["." text ("text/." equivalence) + format + ["." lexer (#+ Lexer)] + ["&" regex]]] + [math + ["r" random]] + [macro + ["s" syntax (#+ syntax:)]]] + lux/test) + +## [Utils] +(def: (should-pass regex input) + (-> (Lexer Text) Text Bit) + (|> (lexer.run input regex) + (case> (#.Right parsed) + (text/= parsed input) + + _ + #0))) + +(def: (should-passT test regex input) + (-> Text (Lexer Text) Text Bit) + (|> (lexer.run input regex) + (case> (#.Right parsed) + (text/= test parsed) + + _ + #0))) + +(def: (should-fail regex input) + (All [a] (-> (Lexer a) Text Bit)) + (|> (lexer.run input regex) + (case> (#.Left _) #1 _ #0))) + +(syntax: (should-check pattern regex input) + (wrap (list (` (|> (lexer.run (~ input) (~ regex)) + (case> (^ (#.Right (~ pattern))) + #1 + + (~' _) + #0)))))) + +## [Tests] +(context: "Regular Expressions [Basics]" + (test "Can parse character literals." + (and (should-pass (&.regex "a") "a") + (should-fail (&.regex "a") ".") + (should-pass (&.regex "\.") ".") + (should-fail (&.regex "\.") "a")))) + +(context: "Regular Expressions [System character classes]" + ($_ seq + (test "Can parse anything." + (should-pass (&.regex ".") "a")) + + (test "Can parse digits." + (and (should-pass (&.regex "\d") "0") + (should-fail (&.regex "\d") "m"))) + + (test "Can parse non digits." + (and (should-pass (&.regex "\D") "m") + (should-fail (&.regex "\D") "0"))) + + (test "Can parse white-space." + (and (should-pass (&.regex "\s") " ") + (should-fail (&.regex "\s") "m"))) + + (test "Can parse non white-space." + (and (should-pass (&.regex "\S") "m") + (should-fail (&.regex "\S") " "))) + + (test "Can parse word characters." + (and (should-pass (&.regex "\w") "_") + (should-fail (&.regex "\w") "^"))) + + (test "Can parse non word characters." + (and (should-pass (&.regex "\W") ".") + (should-fail (&.regex "\W") "a"))) + )) + +(context: "Regular Expressions [Special system character classes : Part 1]" + ($_ seq + (test "Can parse using special character classes." + (and (and (should-pass (&.regex "\p{Lower}") "m") + (should-fail (&.regex "\p{Lower}") "M")) + + (and (should-pass (&.regex "\p{Upper}") "M") + (should-fail (&.regex "\p{Upper}") "m")) + + (and (should-pass (&.regex "\p{Alpha}") "M") + (should-fail (&.regex "\p{Alpha}") "0")) + + (and (should-pass (&.regex "\p{Digit}") "1") + (should-fail (&.regex "\p{Digit}") "n")) + + (and (should-pass (&.regex "\p{Alnum}") "1") + (should-fail (&.regex "\p{Alnum}") ".")) + + (and (should-pass (&.regex "\p{Space}") " ") + (should-fail (&.regex "\p{Space}") ".")) + )) + )) + +(context: "Regular Expressions [Special system character classes : Part 2]" + ($_ seq + (test "Can parse using special character classes." + (and (and (should-pass (&.regex "\p{HexDigit}") "a") + (should-fail (&.regex "\p{HexDigit}") ".")) + + (and (should-pass (&.regex "\p{OctDigit}") "6") + (should-fail (&.regex "\p{OctDigit}") ".")) + + (and (should-pass (&.regex "\p{Blank}") text.tab) + (should-fail (&.regex "\p{Blank}") ".")) + + (and (should-pass (&.regex "\p{ASCII}") text.tab) + (should-fail (&.regex "\p{ASCII}") (text.from-code (hex "1234")))) + + (and (should-pass (&.regex "\p{Contrl}") (text.from-code (hex "12"))) + (should-fail (&.regex "\p{Contrl}") "a")) + + (and (should-pass (&.regex "\p{Punct}") "@") + (should-fail (&.regex "\p{Punct}") "a")) + + (and (should-pass (&.regex "\p{Graph}") "@") + (should-fail (&.regex "\p{Graph}") " ")) + + (and (should-pass (&.regex "\p{Print}") (text.from-code (hex "20"))) + (should-fail (&.regex "\p{Print}") (text.from-code (hex "1234")))) + )) + )) + +(context: "Regular Expressions [Custom character classes : Part 1]" + ($_ seq + (test "Can parse using custom character classes." + (and (should-pass (&.regex "[abc]") "a") + (should-fail (&.regex "[abc]") "m"))) + + (test "Can parse using character ranges." + (and (should-pass (&.regex "[a-z]") "a") + (should-pass (&.regex "[a-z]") "m") + (should-pass (&.regex "[a-z]") "z"))) + + (test "Can combine character ranges." + (and (should-pass (&.regex "[a-zA-Z]") "a") + (should-pass (&.regex "[a-zA-Z]") "m") + (should-pass (&.regex "[a-zA-Z]") "z") + (should-pass (&.regex "[a-zA-Z]") "A") + (should-pass (&.regex "[a-zA-Z]") "M") + (should-pass (&.regex "[a-zA-Z]") "Z"))) + )) + +(context: "Regular Expressions [Custom character classes : Part 2]" + ($_ seq + (test "Can negate custom character classes." + (and (should-fail (&.regex "[^abc]") "a") + (should-pass (&.regex "[^abc]") "m"))) + + (test "Can negate character ranges.." + (and (should-fail (&.regex "[^a-z]") "a") + (should-pass (&.regex "[^a-z]") "0"))) + + (test "Can parse negate combinations of character ranges." + (and (should-fail (&.regex "[^a-zA-Z]") "a") + (should-pass (&.regex "[^a-zA-Z]") "0"))) + )) + +(context: "Regular Expressions [Custom character classes : Part 3]" + ($_ seq + (test "Can make custom character classes more specific." + (and (let [RE (&.regex "[a-z&&[def]]")] + (and (should-fail RE "a") + (should-pass RE "d"))) + + (let [RE (&.regex "[a-z&&[^bc]]")] + (and (should-pass RE "a") + (should-fail RE "b"))) + + (let [RE (&.regex "[a-z&&[^m-p]]")] + (and (should-pass RE "a") + (should-fail RE "m") + (should-fail RE "p"))))) + )) + +(context: "Regular Expressions [Reference]" + (let [number (&.regex "\d+")] + (test "Can build complex regexs by combining simpler ones." + (should-check ["809-345-6789" "809" "345" "6789"] (&.regex "(\@<number>)-(\@<number>)-(\@<number>)") "809-345-6789")))) + +(context: "Regular Expressions [Fuzzy Quantifiers]" + ($_ seq + (test "Can sequentially combine patterns." + (should-passT "aa" (&.regex "aa") "aa")) + + (test "Can match patterns optionally." + (and (should-passT "a" (&.regex "a?") "a") + (should-passT "" (&.regex "a?") ""))) + + (test "Can match a pattern 0 or more times." + (and (should-passT "aaa" (&.regex "a*") "aaa") + (should-passT "" (&.regex "a*") ""))) + + (test "Can match a pattern 1 or more times." + (and (should-passT "aaa" (&.regex "a+") "aaa") + (should-passT "a" (&.regex "a+") "a") + (should-fail (&.regex "a+") ""))) + )) + +(context: "Regular Expressions [Crisp Quantifiers]" + ($_ seq + (test "Can match a pattern N times." + (and (should-passT "aa" (&.regex "a{2}") "aa") + (should-passT "a" (&.regex "a{1}") "a") + (should-fail (&.regex "a{3}") "aa"))) + + (test "Can match a pattern at-least N times." + (and (should-passT "aa" (&.regex "a{1,}") "aa") + (should-passT "aa" (&.regex "a{2,}") "aa") + (should-fail (&.regex "a{3,}") "aa"))) + + (test "Can match a pattern at-most N times." + (and (should-passT "aa" (&.regex "a{,2}") "aa") + (should-passT "aa" (&.regex "a{,3}") "aa"))) + + (test "Can match a pattern between N and M times." + (and (should-passT "a" (&.regex "a{1,2}") "a") + (should-passT "aa" (&.regex "a{1,2}") "aa"))) + )) + +(context: "Regular Expressions [Groups]" + ($_ seq + (test "Can extract groups of sub-matches specified in a pattern." + (and (should-check ["abc" "b"] (&.regex "a(.)c") "abc") + (should-check ["abbbbbc" "bbbbb"] (&.regex "a(b+)c") "abbbbbc") + (should-check ["809-345-6789" "809" "345" "6789"] (&.regex "(\d{3})-(\d{3})-(\d{4})") "809-345-6789") + (should-check ["809-345-6789" "809" "6789"] (&.regex "(\d{3})-(?:\d{3})-(\d{4})") "809-345-6789") + (should-check ["809-809-6789" "809" "6789"] (&.regex "(\d{3})-\0-(\d{4})") "809-809-6789") + (should-check ["809-809-6789" "809" "6789"] (&.regex "(?<code>\d{3})-\k<code>-(\d{4})") "809-809-6789") + (should-check ["809-809-6789-6789" "809" "6789"] (&.regex "(?<code>\d{3})-\k<code>-(\d{4})-\0") "809-809-6789-6789"))) + + (test "Can specify groups within groups." + (should-check ["809-345-6789" "809" ["345-6789" "345" "6789"]] (&.regex "(\d{3})-((\d{3})-(\d{4}))") "809-345-6789")) + )) + +(context: "Regular Expressions [Alternation]" + ($_ seq + (test "Can specify alternative patterns." + (and (should-check ["a" (0 [])] (&.regex "a|b") "a") + (should-check ["b" (1 [])] (&.regex "a|b") "b") + (should-fail (&.regex "a|b") "c"))) + + (test "Can have groups within alternations." + (and (should-check ["abc" (0 ["b" "c"])] (&.regex "a(.)(.)|b(.)(.)") "abc") + (should-check ["bcd" (1 ["c" "d"])] (&.regex "a(.)(.)|b(.)(.)") "bcd") + (should-fail (&.regex "a(.)(.)|b(.)(.)") "cde") + + (should-check ["809-345-6789" (0 ["809" "345-6789" "345" "6789"])] + (&.regex "(\d{3})-((\d{3})-(\d{4}))|b(.)d") + "809-345-6789"))) + )) + +(context: "Pattern-matching" + (<| (times 100) + (do @ + [sample1 (r.unicode 3) + sample2 (r.unicode 3) + sample3 (r.unicode 4)] + (case (format sample1 "-" sample2 "-" sample3) + (&.^regex "(.{3})-(.{3})-(.{4})" + [_ match1 match2 match3]) + (test "Can pattern-match using regular-expressions." + (and (text/= sample1 match1) + (text/= sample2 match2) + (text/= sample3 match3))) + + _ + (test "Cannot pattern-match using regular-expressions." + #0))))) |