(.using [library [lux (.except pattern) ["[0]" meta] [abstract [monad (.only do)]] [control ["[0]" maybe] ["[0]" try] ["[0]" exception (.only exception:)] ["<>" parser (.open: "[1]#[0]" monad) ["<[0]>" code]]] [data ["[0]" product] ["[0]" text ["<[1]>" \\parser (.only Parser)]] [collection ["[0]" list (.open: "[1]#[0]" mix monad)]]] [macro (.only with_symbols) [syntax (.only syntax)] ["^" pattern] ["[0]" code]] [math [number (.only hex) ["n" nat (.open: "[1]#[0]" decimal)]]]]] ["[0]" // (.only) ["%" format (.only format)]]) (def: regex_char^ (Parser Text) (.none_of "\.|&()[]{}")) (def: escaped_char^ (Parser Text) (do <>.monad [? (<>.parses? (.this "\"))] (if ? .any regex_char^))) (def: (refine^ refinement^ base^) (All (_ a) (-> (Parser a) (Parser Text) (Parser Text))) (do <>.monad [output base^ _ (.local output refinement^)] (in output))) (def: word^ (Parser Text) (<>.either .alpha_num (.one_of "_"))) (def: (copy reference) (-> Text (Parser Text)) (<>.after (.this reference) (<>#in reference))) (def: together^ (-> (Parser (List Text)) (Parser Text)) (at <>.monad each //.together)) (def: symbol_char^ (Parser Text) (.none_of (format "[]{}()s.<>" //.double_quote))) (def: symbol_part^ (Parser Text) (do <>.monad [head (refine^ (.not .decimal) symbol_char^) tail (.some symbol_char^)] (in (format head tail)))) (def: (symbol^ current_module) (-> Text (Parser Symbol)) (all <>.either (<>.and (<>#in current_module) (<>.after (.this "..") symbol_part^)) (<>.and symbol_part^ (<>.after (.this ".") symbol_part^)) (<>.and (<>#in .prelude_module) (<>.after (.this ".") symbol_part^)) (<>.and (<>#in "") symbol_part^))) (def: (re_var^ current_module) (-> Text (Parser Code)) (do <>.monad [symbol (.enclosed ["\@<" ">"] (symbol^ current_module))] (in (` (is ((~! .Parser) Text) (~ (code.symbol symbol))))))) (def: re_range^ (Parser Code) (do [! <>.monad] [from (|> regex_char^ (at ! each (|>> (//.char 0) maybe.trusted))) _ (.this "-") to (|> regex_char^ (at ! each (|>> (//.char 0) maybe.trusted)))] (in (` ((~! .range) (~ (code.nat from)) (~ (code.nat to))))))) (def: re_char^ (Parser Code) (do <>.monad [char escaped_char^] (in (` ((~! ..copy) (~ (code.text char))))))) (def: re_options^ (Parser Code) (do <>.monad [options (.many escaped_char^)] (in (` ((~! .one_of) (~ (code.text options))))))) (def: re_user_class^' (Parser Code) (do <>.monad [negate? (<>.maybe (.this "^")) parts (<>.many (all <>.either re_range^ re_options^))] (in (case negate? {.#Some _} (` ((~! .not) (all ((~! <>.either)) (~+ parts)))) {.#None} (` (all ((~! <>.either)) (~+ parts))))))) (def: re_user_class^ (Parser Code) (do <>.monad [init ..re_user_class^' rest (<>.some (<>.after (.this "&&") (.enclosed ["[" "]"] ..re_user_class^')))] (in (list#mix (function (_ refinement base) (` ((~! refine^) (~ refinement) (~ base)))) init rest)))) (def: blank^ (Parser Text) (.one_of (format " " //.tab))) (def: ascii^ (Parser Text) (.range (hex "0") (hex "7F"))) (def: control^ (Parser Text) (<>.either (.range (hex "0") (hex "1F")) (.one_of (//.of_char (hex "7F"))))) (def: punct^ (Parser Text) (.one_of (format "!#$%&'()*+,-./:;<=>?@[\]^_`{|}~" //.double_quote))) (def: graph^ (Parser Text) (<>.either punct^ .alpha_num)) (def: print^ (Parser Text) (<>.either graph^ (.one_of (//.of_char (hex "20"))))) (def: re_system_class^ (Parser Code) (do <>.monad [] (all <>.either (<>.after (.this ".") (in (` (~! .any)))) (<>.after (.this "\d") (in (` (~! .decimal)))) (<>.after (.this "\D") (in (` ((~! .not) (~! .decimal))))) (<>.after (.this "\s") (in (` (~! .space)))) (<>.after (.this "\S") (in (` ((~! .not) (~! .space))))) (<>.after (.this "\w") (in (` (~! word^)))) (<>.after (.this "\W") (in (` ((~! .not) (~! word^))))) (<>.after (.this "\p{Lower}") (in (` (~! .lower)))) (<>.after (.this "\p{Upper}") (in (` (~! .upper)))) (<>.after (.this "\p{Alpha}") (in (` (~! .alpha)))) (<>.after (.this "\p{Digit}") (in (` (~! .decimal)))) (<>.after (.this "\p{Alnum}") (in (` (~! .alpha_num)))) (<>.after (.this "\p{Space}") (in (` (~! .space)))) (<>.after (.this "\p{HexDigit}") (in (` (~! .hexadecimal)))) (<>.after (.this "\p{OctDigit}") (in (` (~! .octal)))) (<>.after (.this "\p{Blank}") (in (` (~! blank^)))) (<>.after (.this "\p{ASCII}") (in (` (~! ascii^)))) (<>.after (.this "\p{Contrl}") (in (` (~! control^)))) (<>.after (.this "\p{Punct}") (in (` (~! punct^)))) (<>.after (.this "\p{Graph}") (in (` (~! graph^)))) (<>.after (.this "\p{Print}") (in (` (~! print^)))) ))) (def: re_class^ (Parser Code) (<>.either re_system_class^ (.enclosed ["[" "]"] re_user_class^))) (def: number^ (Parser Nat) (|> (.many .decimal) (<>.codec n.decimal))) (def: re_back_reference^ (Parser Code) (<>.either (do <>.monad [_ (.this "\") id number^] (in (` ((~! ..copy) (~ (code.symbol ["" (n#encoded id)])))))) (do <>.monad [_ (.this "\k<") captured_symbol symbol_part^ _ (.this ">")] (in (` ((~! ..copy) (~ (code.symbol ["" captured_symbol])))))))) (def: (re_simple^ current_module) (-> Text (Parser Code)) (all <>.either re_class^ (re_var^ current_module) re_back_reference^ re_char^ )) (def: (re_simple_quantified^ current_module) (-> Text (Parser Code)) (do <>.monad [base (re_simple^ current_module) quantifier (.one_of "?*+")] (case quantifier "?" (in (` ((~! <>.else) "" (~ base)))) "*" (in (` ((~! together^) ((~! <>.some) (~ base))))) ... "+" _ (in (` ((~! together^) ((~! <>.many) (~ base))))) ))) (exception: .public (incorrect_quantification [from Nat to Nat]) (exception.report "Input" (format (%.nat from) "," (%.nat to)) "Should be" (format (%.nat to) "," (%.nat from)))) (def: (re_counted_quantified^ current_module) (-> Text (Parser Code)) (do [! <>.monad] [base (re_simple^ current_module)] (<| (.enclosed ["{" "}"]) (all <>.either (do ! [[from to] (<>.and number^ (<>.after (.this ",") number^)) _ (<>.assertion (exception.error ..incorrect_quantification [from to]) (n.<= to from))] (in (` ((~! together^) ((~! <>.between) (~ (code.nat from)) (~ (code.nat (n.- from to))) (~ base)))))) (do ! [limit (<>.after (.this ",") number^)] (in (` ((~! together^) ((~! <>.at_most) (~ (code.nat limit)) (~ base)))))) (do ! [limit (<>.before (.this ",") number^)] (in (` ((~! together^) ((~! <>.at_least) (~ (code.nat limit)) (~ base)))))) (do ! [limit number^] (in (` ((~! together^) ((~! <>.exactly) (~ (code.nat limit)) (~ base)))))))))) (def: (re_quantified^ current_module) (-> Text (Parser Code)) (<>.either (re_simple_quantified^ current_module) (re_counted_quantified^ current_module))) (def: (re_complex^ current_module) (-> Text (Parser Code)) (all <>.either (re_quantified^ current_module) (re_simple^ current_module))) (type: Re_Group (Variant {#Non_Capturing} {#Capturing [(Maybe Text) Nat]})) (def: (re_sequential^ capturing? re_scoped^ current_module) (-> Bit (-> Text (Parser [Re_Group Code])) Text (Parser [Nat Code])) (do <>.monad [parts (<>.many (<>.or (re_complex^ current_module) (re_scoped^ current_module))) .let [g!total (code.symbol ["" "0total"]) g!temp (code.symbol ["" "0temp"]) [_ names steps] (list#mix (is (-> (Either Code [Re_Group Code]) [Nat (List Code) (List (List Code))] [Nat (List Code) (List (List Code))]) (function (_ part [idx names steps]) (case part (^.or {.#Left complex} {.#Right [{#Non_Capturing} complex]}) [idx names (partial_list (list g!temp complex (` .let) (` [(~ g!total) (at (~! //.monoid) (~' composite) (~ g!total) (~ g!temp))])) steps)] {.#Right [{#Capturing [?name num_captures]} scoped]} (let [[idx! name!] (case ?name {.#Some _name} [idx (code.symbol ["" _name])] {.#None} [(++ idx) (code.symbol ["" (n#encoded idx)])]) access (if (n.> 0 num_captures) (` ((~! product.left) (~ name!))) name!)] [idx! (partial_list name! names) (partial_list (list name! scoped (` .let) (` [(~ g!total) (at (~! //.monoid) (~' composite) (~ g!total) (~ access))])) steps)]) ))) [0 (is (List Code) (list)) (is (List (List Code)) (list))] parts)]] (in [(if capturing? (list.size names) 0) (` ((~! do) (~! <>.monad) [.let [(~ g!total) ""] (~+ (|> steps list.reversed list#conjoint))] ((~ (' in)) [(~ g!total) (~+ (list.reversed names))])))]) )) (def: (unflatten^ lexer) (-> (Parser Text) (Parser [Text Any])) (<>.and lexer (at <>.monad in []))) (def: (|||^ left right) (All (_ l r) (-> (Parser [Text l]) (Parser [Text r]) (Parser [Text (Or l r)]))) (function (_ input) (case (left input) {try.#Success [input' [lt lv]]} {try.#Success [input' [lt {0 #0 lv}]]} {try.#Failure _} (case (right input) {try.#Success [input' [rt rv]]} {try.#Success [input' [rt {0 #1 rv}]]} {try.#Failure error} {try.#Failure error})))) (def: (|||_^ left right) (All (_ l r) (-> (Parser [Text l]) (Parser [Text r]) (Parser Text))) (function (_ input) (case (left input) {try.#Success [input' [lt lv]]} {try.#Success [input' lt]} {try.#Failure _} (case (right input) {try.#Success [input' [rt rv]]} {try.#Success [input' rt]} {try.#Failure error} {try.#Failure error})))) (def: (prep_alternative [num_captures alt]) (-> [Nat Code] Code) (if (n.> 0 num_captures) alt (` ((~! unflatten^) (~ alt))))) (def: (re_alternative^ capturing? re_scoped^ current_module) (-> Bit (-> Text (Parser [Re_Group Code])) Text (Parser [Nat Code])) (do <>.monad [.let [sub^ (re_sequential^ capturing? re_scoped^ current_module)] head sub^ tail (<>.some (<>.after (.this "|") sub^))] (if (list.empty? tail) (in head) (in [(list#mix n.max (product.left head) (list#each product.left tail)) (` (all ((~ (if capturing? (` (~! |||^)) (` (~! |||_^))))) (~ (prep_alternative head)) (~+ (list#each prep_alternative tail))))])))) (def: (re_scoped^ current_module) (-> Text (Parser [Re_Group Code])) (all <>.either (do <>.monad [_ (.this "(?:") [_ scoped] (re_alternative^ #0 re_scoped^ current_module) _ (.this ")")] (in [{#Non_Capturing} scoped])) (do <>.monad [complex (re_complex^ current_module)] (in [{#Non_Capturing} complex])) (do <>.monad [_ (.this "(?<") captured_symbol symbol_part^ _ (.this ">") [num_captures pattern] (re_alternative^ #1 re_scoped^ current_module) _ (.this ")")] (in [{#Capturing [{.#Some captured_symbol} num_captures]} pattern])) (do <>.monad [_ (.this "(") [num_captures pattern] (re_alternative^ #1 re_scoped^ current_module) _ (.this ")")] (in [{#Capturing [{.#None} num_captures]} pattern])))) (def: (regex^ current_module) (-> Text (Parser Code)) (at <>.monad each product.right (re_alternative^ #1 re_scoped^ current_module))) (def: .public regex (syntax (_ [pattern .text]) (do meta.monad [current_module meta.current_module_name] (case (.result (regex^ current_module) pattern) {try.#Failure error} (meta.failure (format "Error while parsing regular-expression:" //.new_line error)) {try.#Success regex} (in (list regex)))))) (def: .public pattern (syntax (_ [[pattern bindings] (.form (<>.and .text (<>.maybe .any))) body .any branches (<>.many .any)]) (with_symbols [g!temp] (in (partial_list (` (^.multi (~ g!temp) [((~! .result) (..regex (~ (code.text pattern))) (~ g!temp)) {try.#Success (~ (maybe.else g!temp bindings))}])) body branches)))))