diff options
Diffstat (limited to '')
-rw-r--r-- | stdlib/source/test/lux/data/text/regex.lux | 466 |
1 files changed, 235 insertions, 231 deletions
diff --git a/stdlib/source/test/lux/data/text/regex.lux b/stdlib/source/test/lux/data/text/regex.lux index ffa5612da..059adff84 100644 --- a/stdlib/source/test/lux/data/text/regex.lux +++ b/stdlib/source/test/lux/data/text/regex.lux @@ -1,36 +1,36 @@ (.module: [lux #* + data/text/format + ["_" test (#+ Test)] [control [monad (#+ do Monad)] pipe ["p" parser]] [data [number (#+ hex)] - ["." text ("#;." equivalence) - format - ["." lexer (#+ Lexer)] - ["&" regex]]] + ["." text ("#@." equivalence) + ["." lexer (#+ Lexer)]]] [math ["r" random]] [macro ["s" syntax (#+ syntax:)]]] - lux/test) + {1 + ["." /]}) -## [Utils] (def: (should-pass regex input) (-> (Lexer Text) Text Bit) (|> (lexer.run input regex) (case> (#.Right parsed) - (text;= parsed input) + (text@= parsed input) _ #0))) -(def: (should-passT test regex input) +(def: (text-should-pass test regex input) (-> Text (Lexer Text) Text Bit) (|> (lexer.run input regex) (case> (#.Right parsed) - (text;= test parsed) + (text@= test parsed) _ #0))) @@ -48,239 +48,243 @@ (~' _) #0)))))) -## [Tests] -(context: "Regular Expressions [Basics]" - (test "Can parse character literals." - (and (should-pass (&.regex "a") "a") - (should-fail (&.regex "a") ".") - (should-pass (&.regex "\.") ".") - (should-fail (&.regex "\.") "a")))) - -(context: "Regular Expressions [System character classes]" - ($_ seq - (test "Can parse anything." - (should-pass (&.regex ".") "a")) - - (test "Can parse digits." - (and (should-pass (&.regex "\d") "0") - (should-fail (&.regex "\d") "m"))) - - (test "Can parse non digits." - (and (should-pass (&.regex "\D") "m") - (should-fail (&.regex "\D") "0"))) - - (test "Can parse white-space." - (and (should-pass (&.regex "\s") " ") - (should-fail (&.regex "\s") "m"))) - - (test "Can parse non white-space." - (and (should-pass (&.regex "\S") "m") - (should-fail (&.regex "\S") " "))) - - (test "Can parse word characters." - (and (should-pass (&.regex "\w") "_") - (should-fail (&.regex "\w") "^"))) - - (test "Can parse non word characters." - (and (should-pass (&.regex "\W") ".") - (should-fail (&.regex "\W") "a"))) +(def: basics + Test + (_.test "Can parse character literals." + (and (should-pass (/.regex "a") "a") + (should-fail (/.regex "a") ".") + (should-pass (/.regex "\.") ".") + (should-fail (/.regex "\.") "a")))) + +(def: system-character-classes + Test + ($_ _.and + (_.test "Can parse anything." + (should-pass (/.regex ".") "a")) + + (_.test "Can parse digits." + (and (should-pass (/.regex "\d") "0") + (should-fail (/.regex "\d") "m"))) + + (_.test "Can parse non digits." + (and (should-pass (/.regex "\D") "m") + (should-fail (/.regex "\D") "0"))) + + (_.test "Can parse white-space." + (and (should-pass (/.regex "\s") " ") + (should-fail (/.regex "\s") "m"))) + + (_.test "Can parse non white-space." + (and (should-pass (/.regex "\S") "m") + (should-fail (/.regex "\S") " "))) + + (_.test "Can parse word characters." + (and (should-pass (/.regex "\w") "_") + (should-fail (/.regex "\w") "^"))) + + (_.test "Can parse non word characters." + (and (should-pass (/.regex "\W") ".") + (should-fail (/.regex "\W") "a"))) )) -(context: "Regular Expressions [Special system character classes : Part 1]" - ($_ seq - (test "Can parse using special character classes." - (and (and (should-pass (&.regex "\p{Lower}") "m") - (should-fail (&.regex "\p{Lower}") "M")) - - (and (should-pass (&.regex "\p{Upper}") "M") - (should-fail (&.regex "\p{Upper}") "m")) - - (and (should-pass (&.regex "\p{Alpha}") "M") - (should-fail (&.regex "\p{Alpha}") "0")) - - (and (should-pass (&.regex "\p{Digit}") "1") - (should-fail (&.regex "\p{Digit}") "n")) - - (and (should-pass (&.regex "\p{Alnum}") "1") - (should-fail (&.regex "\p{Alnum}") ".")) - - (and (should-pass (&.regex "\p{Space}") " ") - (should-fail (&.regex "\p{Space}") ".")) - )) - )) - -(context: "Regular Expressions [Special system character classes : Part 2]" - ($_ seq - (test "Can parse using special character classes." - (and (and (should-pass (&.regex "\p{HexDigit}") "a") - (should-fail (&.regex "\p{HexDigit}") ".")) - - (and (should-pass (&.regex "\p{OctDigit}") "6") - (should-fail (&.regex "\p{OctDigit}") ".")) - - (and (should-pass (&.regex "\p{Blank}") text.tab) - (should-fail (&.regex "\p{Blank}") ".")) - - (and (should-pass (&.regex "\p{ASCII}") text.tab) - (should-fail (&.regex "\p{ASCII}") (text.from-code (hex "1234")))) - - (and (should-pass (&.regex "\p{Contrl}") (text.from-code (hex "12"))) - (should-fail (&.regex "\p{Contrl}") "a")) - - (and (should-pass (&.regex "\p{Punct}") "@") - (should-fail (&.regex "\p{Punct}") "a")) - - (and (should-pass (&.regex "\p{Graph}") "@") - (should-fail (&.regex "\p{Graph}") " ")) - - (and (should-pass (&.regex "\p{Print}") (text.from-code (hex "20"))) - (should-fail (&.regex "\p{Print}") (text.from-code (hex "1234")))) - )) - )) - -(context: "Regular Expressions [Custom character classes : Part 1]" - ($_ seq - (test "Can parse using custom character classes." - (and (should-pass (&.regex "[abc]") "a") - (should-fail (&.regex "[abc]") "m"))) - - (test "Can parse using character ranges." - (and (should-pass (&.regex "[a-z]") "a") - (should-pass (&.regex "[a-z]") "m") - (should-pass (&.regex "[a-z]") "z"))) - - (test "Can combine character ranges." - (and (should-pass (&.regex "[a-zA-Z]") "a") - (should-pass (&.regex "[a-zA-Z]") "m") - (should-pass (&.regex "[a-zA-Z]") "z") - (should-pass (&.regex "[a-zA-Z]") "A") - (should-pass (&.regex "[a-zA-Z]") "M") - (should-pass (&.regex "[a-zA-Z]") "Z"))) - )) - -(context: "Regular Expressions [Custom character classes : Part 2]" - ($_ seq - (test "Can negate custom character classes." - (and (should-fail (&.regex "[^abc]") "a") - (should-pass (&.regex "[^abc]") "m"))) - - (test "Can negate character ranges.." - (and (should-fail (&.regex "[^a-z]") "a") - (should-pass (&.regex "[^a-z]") "0"))) - - (test "Can parse negate combinations of character ranges." - (and (should-fail (&.regex "[^a-zA-Z]") "a") - (should-pass (&.regex "[^a-zA-Z]") "0"))) +(def: special-system-character-classes + Test + ($_ _.and + (_.test "Lower-case." + (and (should-pass (/.regex "\p{Lower}") "m") + (should-fail (/.regex "\p{Lower}") "M"))) + (_.test "Upper-case." + (and (should-pass (/.regex "\p{Upper}") "M") + (should-fail (/.regex "\p{Upper}") "m"))) + (_.test "Alphabetic." + (and (should-pass (/.regex "\p{Alpha}") "M") + (should-fail (/.regex "\p{Alpha}") "0"))) + (_.test "Numeric digits." + (and (should-pass (/.regex "\p{Digit}") "1") + (should-fail (/.regex "\p{Digit}") "n"))) + (_.test "Alphanumeric." + (and (should-pass (/.regex "\p{Alnum}") "1") + (should-fail (/.regex "\p{Alnum}") "."))) + (_.test "Whitespace." + (and (should-pass (/.regex "\p{Space}") " ") + (should-fail (/.regex "\p{Space}") "."))) + (_.test "Hexadecimal." + (and (should-pass (/.regex "\p{HexDigit}") "a") + (should-fail (/.regex "\p{HexDigit}") "."))) + (_.test "Octal." + (and (should-pass (/.regex "\p{OctDigit}") "6") + (should-fail (/.regex "\p{OctDigit}") "."))) + (_.test "Blank." + (and (should-pass (/.regex "\p{Blank}") text.tab) + (should-fail (/.regex "\p{Blank}") "."))) + (_.test "ASCII." + (and (should-pass (/.regex "\p{ASCII}") text.tab) + (should-fail (/.regex "\p{ASCII}") (text.from-code (hex "1234"))))) + (_.test "Control characters." + (and (should-pass (/.regex "\p{Contrl}") (text.from-code (hex "12"))) + (should-fail (/.regex "\p{Contrl}") "a"))) + (_.test "Punctuation." + (and (should-pass (/.regex "\p{Punct}") "@") + (should-fail (/.regex "\p{Punct}") "a"))) + (_.test "Graph." + (and (should-pass (/.regex "\p{Graph}") "@") + (should-fail (/.regex "\p{Graph}") " "))) + (_.test "Print." + (and (should-pass (/.regex "\p{Print}") (text.from-code (hex "20"))) + (should-fail (/.regex "\p{Print}") (text.from-code (hex "1234"))))) )) -(context: "Regular Expressions [Custom character classes : Part 3]" - ($_ seq - (test "Can make custom character classes more specific." - (and (let [RE (&.regex "[a-z&&[def]]")] - (and (should-fail RE "a") - (should-pass RE "d"))) - - (let [RE (&.regex "[a-z&&[^bc]]")] - (and (should-pass RE "a") - (should-fail RE "b"))) - - (let [RE (&.regex "[a-z&&[^m-p]]")] - (and (should-pass RE "a") - (should-fail RE "m") - (should-fail RE "p"))))) +(def: custom-character-classes + Test + ($_ _.and + (_.test "Can parse using custom character classes." + (and (should-pass (/.regex "[abc]") "a") + (should-fail (/.regex "[abc]") "m"))) + (_.test "Can parse using character ranges." + (and (should-pass (/.regex "[a-z]") "a") + (should-pass (/.regex "[a-z]") "m") + (should-pass (/.regex "[a-z]") "z"))) + (_.test "Can combine character ranges." + (and (should-pass (/.regex "[a-zA-Z]") "a") + (should-pass (/.regex "[a-zA-Z]") "m") + (should-pass (/.regex "[a-zA-Z]") "z") + (should-pass (/.regex "[a-zA-Z]") "A") + (should-pass (/.regex "[a-zA-Z]") "M") + (should-pass (/.regex "[a-zA-Z]") "Z"))) + (_.test "Can negate custom character classes." + (and (should-fail (/.regex "[^abc]") "a") + (should-pass (/.regex "[^abc]") "m"))) + (_.test "Can negate character ranges.." + (and (should-fail (/.regex "[^a-z]") "a") + (should-pass (/.regex "[^a-z]") "0"))) + (_.test "Can parse negate combinations of character ranges." + (and (should-fail (/.regex "[^a-zA-Z]") "a") + (should-pass (/.regex "[^a-zA-Z]") "0"))) + (_.test "Can make custom character classes more specific." + (and (let [RE (/.regex "[a-z&&[def]]")] + (and (should-fail RE "a") + (should-pass RE "d"))) + (let [RE (/.regex "[a-z&&[^bc]]")] + (and (should-pass RE "a") + (should-fail RE "b"))) + (let [RE (/.regex "[a-z&&[^m-p]]")] + (and (should-pass RE "a") + (should-fail RE "m") + (should-fail RE "p"))))) )) -(context: "Regular Expressions [Reference]" - (let [number (&.regex "\d+")] - (test "Can build complex regexs by combining simpler ones." - (should-check ["809-345-6789" "809" "345" "6789"] (&.regex "(\@<number>)-(\@<number>)-(\@<number>)") "809-345-6789")))) - -(context: "Regular Expressions [Fuzzy Quantifiers]" - ($_ seq - (test "Can sequentially combine patterns." - (should-passT "aa" (&.regex "aa") "aa")) - - (test "Can match patterns optionally." - (and (should-passT "a" (&.regex "a?") "a") - (should-passT "" (&.regex "a?") ""))) - - (test "Can match a pattern 0 or more times." - (and (should-passT "aaa" (&.regex "a*") "aaa") - (should-passT "" (&.regex "a*") ""))) - - (test "Can match a pattern 1 or more times." - (and (should-passT "aaa" (&.regex "a+") "aaa") - (should-passT "a" (&.regex "a+") "a") - (should-fail (&.regex "a+") ""))) +(def: references + Test + (let [number (/.regex "\d+")] + (_.test "Can build complex regexs by combining simpler ones." + (should-check ["809-345-6789" "809" "345" "6789"] (/.regex "(\@<number>)-(\@<number>)-(\@<number>)") "809-345-6789")))) + +(def: fuzzy-quantifiers + Test + ($_ _.and + (_.test "Can sequentially combine patterns." + (text-should-pass "aa" (/.regex "aa") "aa")) + + (_.test "Can match patterns optionally." + (and (text-should-pass "a" (/.regex "a?") "a") + (text-should-pass "" (/.regex "a?") ""))) + + (_.test "Can match a pattern 0 or more times." + (and (text-should-pass "aaa" (/.regex "a*") "aaa") + (text-should-pass "" (/.regex "a*") ""))) + + (_.test "Can match a pattern 1 or more times." + (and (text-should-pass "aaa" (/.regex "a+") "aaa") + (text-should-pass "a" (/.regex "a+") "a") + (should-fail (/.regex "a+") ""))) )) -(context: "Regular Expressions [Crisp Quantifiers]" - ($_ seq - (test "Can match a pattern N times." - (and (should-passT "aa" (&.regex "a{2}") "aa") - (should-passT "a" (&.regex "a{1}") "a") - (should-fail (&.regex "a{3}") "aa"))) - - (test "Can match a pattern at-least N times." - (and (should-passT "aa" (&.regex "a{1,}") "aa") - (should-passT "aa" (&.regex "a{2,}") "aa") - (should-fail (&.regex "a{3,}") "aa"))) - - (test "Can match a pattern at-most N times." - (and (should-passT "aa" (&.regex "a{,2}") "aa") - (should-passT "aa" (&.regex "a{,3}") "aa"))) - - (test "Can match a pattern between N and M times." - (and (should-passT "a" (&.regex "a{1,2}") "a") - (should-passT "aa" (&.regex "a{1,2}") "aa"))) +(def: crisp-quantifiers + Test + ($_ _.and + (_.test "Can match a pattern N times." + (and (text-should-pass "aa" (/.regex "a{2}") "aa") + (text-should-pass "a" (/.regex "a{1}") "a") + (should-fail (/.regex "a{3}") "aa"))) + + (_.test "Can match a pattern at-least N times." + (and (text-should-pass "aa" (/.regex "a{1,}") "aa") + (text-should-pass "aa" (/.regex "a{2,}") "aa") + (should-fail (/.regex "a{3,}") "aa"))) + + (_.test "Can match a pattern at-most N times." + (and (text-should-pass "aa" (/.regex "a{,2}") "aa") + (text-should-pass "aa" (/.regex "a{,3}") "aa"))) + + (_.test "Can match a pattern between N and M times." + (and (text-should-pass "a" (/.regex "a{1,2}") "a") + (text-should-pass "aa" (/.regex "a{1,2}") "aa"))) )) -(context: "Regular Expressions [Groups]" - ($_ seq - (test "Can extract groups of sub-matches specified in a pattern." - (and (should-check ["abc" "b"] (&.regex "a(.)c") "abc") - (should-check ["abbbbbc" "bbbbb"] (&.regex "a(b+)c") "abbbbbc") - (should-check ["809-345-6789" "809" "345" "6789"] (&.regex "(\d{3})-(\d{3})-(\d{4})") "809-345-6789") - (should-check ["809-345-6789" "809" "6789"] (&.regex "(\d{3})-(?:\d{3})-(\d{4})") "809-345-6789") - (should-check ["809-809-6789" "809" "6789"] (&.regex "(\d{3})-\0-(\d{4})") "809-809-6789") - (should-check ["809-809-6789" "809" "6789"] (&.regex "(?<code>\d{3})-\k<code>-(\d{4})") "809-809-6789") - (should-check ["809-809-6789-6789" "809" "6789"] (&.regex "(?<code>\d{3})-\k<code>-(\d{4})-\0") "809-809-6789-6789"))) - - (test "Can specify groups within groups." - (should-check ["809-345-6789" "809" ["345-6789" "345" "6789"]] (&.regex "(\d{3})-((\d{3})-(\d{4}))") "809-345-6789")) +(def: groups + Test + ($_ _.and + (_.test "Can extract groups of sub-matches specified in a pattern." + (and (should-check ["abc" "b"] (/.regex "a(.)c") "abc") + (should-check ["abbbbbc" "bbbbb"] (/.regex "a(b+)c") "abbbbbc") + (should-check ["809-345-6789" "809" "345" "6789"] (/.regex "(\d{3})-(\d{3})-(\d{4})") "809-345-6789") + (should-check ["809-345-6789" "809" "6789"] (/.regex "(\d{3})-(?:\d{3})-(\d{4})") "809-345-6789") + (should-check ["809-809-6789" "809" "6789"] (/.regex "(\d{3})-\0-(\d{4})") "809-809-6789") + (should-check ["809-809-6789" "809" "6789"] (/.regex "(?<code>\d{3})-\k<code>-(\d{4})") "809-809-6789") + (should-check ["809-809-6789-6789" "809" "6789"] (/.regex "(?<code>\d{3})-\k<code>-(\d{4})-\0") "809-809-6789-6789"))) + + (_.test "Can specify groups within groups." + (should-check ["809-345-6789" "809" ["345-6789" "345" "6789"]] (/.regex "(\d{3})-((\d{3})-(\d{4}))") "809-345-6789")) )) -(context: "Regular Expressions [Alternation]" - ($_ seq - (test "Can specify alternative patterns." - (and (should-check ["a" (0 [])] (&.regex "a|b") "a") - (should-check ["b" (1 [])] (&.regex "a|b") "b") - (should-fail (&.regex "a|b") "c"))) - - (test "Can have groups within alternations." - (and (should-check ["abc" (0 ["b" "c"])] (&.regex "a(.)(.)|b(.)(.)") "abc") - (should-check ["bcd" (1 ["c" "d"])] (&.regex "a(.)(.)|b(.)(.)") "bcd") - (should-fail (&.regex "a(.)(.)|b(.)(.)") "cde") - - (should-check ["809-345-6789" (0 ["809" "345-6789" "345" "6789"])] - (&.regex "(\d{3})-((\d{3})-(\d{4}))|b(.)d") - "809-345-6789"))) +(def: alternation + Test + ($_ _.and + (_.test "Can specify alternative patterns." + (and (should-check ["a" (0 [])] (/.regex "a|b") "a") + (should-check ["b" (1 [])] (/.regex "a|b") "b") + (should-fail (/.regex "a|b") "c"))) + + (_.test "Can have groups within alternations." + (and (should-check ["abc" (0 ["b" "c"])] (/.regex "a(.)(.)|b(.)(.)") "abc") + (should-check ["bcd" (1 ["c" "d"])] (/.regex "a(.)(.)|b(.)(.)") "bcd") + (should-fail (/.regex "a(.)(.)|b(.)(.)") "cde") + + (should-check ["809-345-6789" (0 ["809" "345-6789" "345" "6789"])] + (/.regex "(\d{3})-((\d{3})-(\d{4}))|b(.)d") + "809-345-6789"))) )) -(context: "Pattern-matching" - (<| (times 100) - (do @ - [sample1 (r.unicode 3) - sample2 (r.unicode 3) - sample3 (r.unicode 4)] - (case (format sample1 "-" sample2 "-" sample3) - (&.^regex "(.{3})-(.{3})-(.{4})" - [_ match1 match2 match3]) - (test "Can pattern-match using regular-expressions." - (and (text;= sample1 match1) - (text;= sample2 match2) - (text;= sample3 match3))) - - _ - (test "Cannot pattern-match using regular-expressions." - #0))))) +(def: pattern-matching + Test + (do r.monad + [sample1 (r.unicode 3) + sample2 (r.unicode 3) + sample3 (r.unicode 4)] + (case (format sample1 "-" sample2 "-" sample3) + (/.^regex "(.{3})-(.{3})-(.{4})" + [_ match1 match2 match3]) + (_.test "Can pattern-match using regular-expressions." + (and (text@= sample1 match1) + (text@= sample2 match2) + (text@= sample3 match3))) + + _ + (_.test "Cannot pattern-match using regular-expressions." + #0)))) + +(def: #export test + Test + (<| (_.context (%name (name-of /.regex))) + ($_ _.and + ..basics + ..system-character-classes + ..special-system-character-classes + ..custom-character-classes + ..references + ..fuzzy-quantifiers + ..crisp-quantifiers + ..groups + ..alternation + ..pattern-matching + ))) |