(.module: [lux #* [control [monad (#+ do Monad)] pipe ["p" parser]] [data [number (#+ hex)] ["." text ("#;." equivalence) format ["." lexer (#+ Lexer)] ["&" regex]]] [math ["r" random]] [macro ["s" syntax (#+ syntax:)]]] lux/test) ## [Utils] (def: (should-pass regex input) (-> (Lexer Text) Text Bit) (|> (lexer.run input regex) (case> (#.Right parsed) (text;= parsed input) _ #0))) (def: (should-passT test regex input) (-> Text (Lexer Text) Text Bit) (|> (lexer.run input regex) (case> (#.Right parsed) (text;= test parsed) _ #0))) (def: (should-fail regex input) (All [a] (-> (Lexer a) Text Bit)) (|> (lexer.run input regex) (case> (#.Left _) #1 _ #0))) (syntax: (should-check pattern regex input) (wrap (list (` (|> (lexer.run (~ input) (~ regex)) (case> (^ (#.Right (~ pattern))) #1 (~' _) #0)))))) ## [Tests] (context: "Regular Expressions [Basics]" (test "Can parse character literals." (and (should-pass (&.regex "a") "a") (should-fail (&.regex "a") ".") (should-pass (&.regex "\.") ".") (should-fail (&.regex "\.") "a")))) (context: "Regular Expressions [System character classes]" ($_ seq (test "Can parse anything." (should-pass (&.regex ".") "a")) (test "Can parse digits." (and (should-pass (&.regex "\d") "0") (should-fail (&.regex "\d") "m"))) (test "Can parse non digits." (and (should-pass (&.regex "\D") "m") (should-fail (&.regex "\D") "0"))) (test "Can parse white-space." (and (should-pass (&.regex "\s") " ") (should-fail (&.regex "\s") "m"))) (test "Can parse non white-space." (and (should-pass (&.regex "\S") "m") (should-fail (&.regex "\S") " "))) (test "Can parse word characters." (and (should-pass (&.regex "\w") "_") (should-fail (&.regex "\w") "^"))) (test "Can parse non word characters." (and (should-pass (&.regex "\W") ".") (should-fail (&.regex "\W") "a"))) )) (context: "Regular Expressions [Special system character classes : Part 1]" ($_ seq (test "Can parse using special character classes." (and (and (should-pass (&.regex "\p{Lower}") "m") (should-fail (&.regex "\p{Lower}") "M")) (and (should-pass (&.regex "\p{Upper}") "M") (should-fail (&.regex "\p{Upper}") "m")) (and (should-pass (&.regex "\p{Alpha}") "M") (should-fail (&.regex "\p{Alpha}") "0")) (and (should-pass (&.regex "\p{Digit}") "1") (should-fail (&.regex "\p{Digit}") "n")) (and (should-pass (&.regex "\p{Alnum}") "1") (should-fail (&.regex "\p{Alnum}") ".")) (and (should-pass (&.regex "\p{Space}") " ") (should-fail (&.regex "\p{Space}") ".")) )) )) (context: "Regular Expressions [Special system character classes : Part 2]" ($_ seq (test "Can parse using special character classes." (and (and (should-pass (&.regex "\p{HexDigit}") "a") (should-fail (&.regex "\p{HexDigit}") ".")) (and (should-pass (&.regex "\p{OctDigit}") "6") (should-fail (&.regex "\p{OctDigit}") ".")) (and (should-pass (&.regex "\p{Blank}") text.tab) (should-fail (&.regex "\p{Blank}") ".")) (and (should-pass (&.regex "\p{ASCII}") text.tab) (should-fail (&.regex "\p{ASCII}") (text.from-code (hex "1234")))) (and (should-pass (&.regex "\p{Contrl}") (text.from-code (hex "12"))) (should-fail (&.regex "\p{Contrl}") "a")) (and (should-pass (&.regex "\p{Punct}") "@") (should-fail (&.regex "\p{Punct}") "a")) (and (should-pass (&.regex "\p{Graph}") "@") (should-fail (&.regex "\p{Graph}") " ")) (and (should-pass (&.regex "\p{Print}") (text.from-code (hex "20"))) (should-fail (&.regex "\p{Print}") (text.from-code (hex "1234")))) )) )) (context: "Regular Expressions [Custom character classes : Part 1]" ($_ seq (test "Can parse using custom character classes." (and (should-pass (&.regex "[abc]") "a") (should-fail (&.regex "[abc]") "m"))) (test "Can parse using character ranges." (and (should-pass (&.regex "[a-z]") "a") (should-pass (&.regex "[a-z]") "m") (should-pass (&.regex "[a-z]") "z"))) (test "Can combine character ranges." (and (should-pass (&.regex "[a-zA-Z]") "a") (should-pass (&.regex "[a-zA-Z]") "m") (should-pass (&.regex "[a-zA-Z]") "z") (should-pass (&.regex "[a-zA-Z]") "A") (should-pass (&.regex "[a-zA-Z]") "M") (should-pass (&.regex "[a-zA-Z]") "Z"))) )) (context: "Regular Expressions [Custom character classes : Part 2]" ($_ seq (test "Can negate custom character classes." (and (should-fail (&.regex "[^abc]") "a") (should-pass (&.regex "[^abc]") "m"))) (test "Can negate character ranges.." (and (should-fail (&.regex "[^a-z]") "a") (should-pass (&.regex "[^a-z]") "0"))) (test "Can parse negate combinations of character ranges." (and (should-fail (&.regex "[^a-zA-Z]") "a") (should-pass (&.regex "[^a-zA-Z]") "0"))) )) (context: "Regular Expressions [Custom character classes : Part 3]" ($_ seq (test "Can make custom character classes more specific." (and (let [RE (&.regex "[a-z&&[def]]")] (and (should-fail RE "a") (should-pass RE "d"))) (let [RE (&.regex "[a-z&&[^bc]]")] (and (should-pass RE "a") (should-fail RE "b"))) (let [RE (&.regex "[a-z&&[^m-p]]")] (and (should-pass RE "a") (should-fail RE "m") (should-fail RE "p"))))) )) (context: "Regular Expressions [Reference]" (let [number (&.regex "\d+")] (test "Can build complex regexs by combining simpler ones." (should-check ["809-345-6789" "809" "345" "6789"] (&.regex "(\@)-(\@)-(\@)") "809-345-6789")))) (context: "Regular Expressions [Fuzzy Quantifiers]" ($_ seq (test "Can sequentially combine patterns." (should-passT "aa" (&.regex "aa") "aa")) (test "Can match patterns optionally." (and (should-passT "a" (&.regex "a?") "a") (should-passT "" (&.regex "a?") ""))) (test "Can match a pattern 0 or more times." (and (should-passT "aaa" (&.regex "a*") "aaa") (should-passT "" (&.regex "a*") ""))) (test "Can match a pattern 1 or more times." (and (should-passT "aaa" (&.regex "a+") "aaa") (should-passT "a" (&.regex "a+") "a") (should-fail (&.regex "a+") ""))) )) (context: "Regular Expressions [Crisp Quantifiers]" ($_ seq (test "Can match a pattern N times." (and (should-passT "aa" (&.regex "a{2}") "aa") (should-passT "a" (&.regex "a{1}") "a") (should-fail (&.regex "a{3}") "aa"))) (test "Can match a pattern at-least N times." (and (should-passT "aa" (&.regex "a{1,}") "aa") (should-passT "aa" (&.regex "a{2,}") "aa") (should-fail (&.regex "a{3,}") "aa"))) (test "Can match a pattern at-most N times." (and (should-passT "aa" (&.regex "a{,2}") "aa") (should-passT "aa" (&.regex "a{,3}") "aa"))) (test "Can match a pattern between N and M times." (and (should-passT "a" (&.regex "a{1,2}") "a") (should-passT "aa" (&.regex "a{1,2}") "aa"))) )) (context: "Regular Expressions [Groups]" ($_ seq (test "Can extract groups of sub-matches specified in a pattern." (and (should-check ["abc" "b"] (&.regex "a(.)c") "abc") (should-check ["abbbbbc" "bbbbb"] (&.regex "a(b+)c") "abbbbbc") (should-check ["809-345-6789" "809" "345" "6789"] (&.regex "(\d{3})-(\d{3})-(\d{4})") "809-345-6789") (should-check ["809-345-6789" "809" "6789"] (&.regex "(\d{3})-(?:\d{3})-(\d{4})") "809-345-6789") (should-check ["809-809-6789" "809" "6789"] (&.regex "(\d{3})-\0-(\d{4})") "809-809-6789") (should-check ["809-809-6789" "809" "6789"] (&.regex "(?\d{3})-\k-(\d{4})") "809-809-6789") (should-check ["809-809-6789-6789" "809" "6789"] (&.regex "(?\d{3})-\k-(\d{4})-\0") "809-809-6789-6789"))) (test "Can specify groups within groups." (should-check ["809-345-6789" "809" ["345-6789" "345" "6789"]] (&.regex "(\d{3})-((\d{3})-(\d{4}))") "809-345-6789")) )) (context: "Regular Expressions [Alternation]" ($_ seq (test "Can specify alternative patterns." (and (should-check ["a" (0 [])] (&.regex "a|b") "a") (should-check ["b" (1 [])] (&.regex "a|b") "b") (should-fail (&.regex "a|b") "c"))) (test "Can have groups within alternations." (and (should-check ["abc" (0 ["b" "c"])] (&.regex "a(.)(.)|b(.)(.)") "abc") (should-check ["bcd" (1 ["c" "d"])] (&.regex "a(.)(.)|b(.)(.)") "bcd") (should-fail (&.regex "a(.)(.)|b(.)(.)") "cde") (should-check ["809-345-6789" (0 ["809" "345-6789" "345" "6789"])] (&.regex "(\d{3})-((\d{3})-(\d{4}))|b(.)d") "809-345-6789"))) )) (context: "Pattern-matching" (<| (times 100) (do @ [sample1 (r.unicode 3) sample2 (r.unicode 3) sample3 (r.unicode 4)] (case (format sample1 "-" sample2 "-" sample3) (&.^regex "(.{3})-(.{3})-(.{4})" [_ match1 match2 match3]) (test "Can pattern-match using regular-expressions." (and (text;= sample1 match1) (text;= sample2 match2) (text;= sample3 match3))) _ (test "Cannot pattern-match using regular-expressions." #0)))))