diff options
Diffstat (limited to '')
-rw-r--r-- | stdlib/source/lux/data/format/json.lux | 260 | ||||
-rw-r--r-- | stdlib/source/lux/data/format/xml.lux | 56 | ||||
-rw-r--r-- | stdlib/source/lux/data/text/lexer.lux | 110 | ||||
-rw-r--r-- | stdlib/source/lux/data/text/regex.lux | 185 |
4 files changed, 277 insertions, 334 deletions
diff --git a/stdlib/source/lux/data/format/json.lux b/stdlib/source/lux/data/format/json.lux index 1cc3000c3..6cf45dfc9 100644 --- a/stdlib/source/lux/data/format/json.lux +++ b/stdlib/source/lux/data/format/json.lux @@ -10,16 +10,16 @@ (data [bool] [text "Text/" Eq<Text> Monoid<Text>] text/format - (text [lexer #+ Lexer Monad<Lexer>]) + (text ["l" lexer #+ Lexer Monad<Lexer> "Lexer/" Monad<Lexer>]) [number "Real/" Codec<Text,Real>] maybe - [char "Char/" Eq<Char> Codec<Text,Char>] - ["R" result #- fail] + [char "Char/" Codec<Text,Char>] + ["R" result] [sum] [product] (coll [list "" Fold<List> "List/" Monad<List>] [vector #+ Vector vector "Vector/" Monad<Vector>] - [dict #+ Dict])) + ["d" dict])) [macro #+ Monad<Lux> with-gensyms] (macro [syntax #+ syntax:] [code] @@ -43,18 +43,18 @@ (#Number Number) (#String String) (#Array (Vector JSON)) - (#Object (Dict String JSON))) + (#Object (d;Dict String JSON))) (do-template [<name> <type>] [(type: #export <name> <type>)] [Array (Vector JSON)] - [Object (Dict String JSON)] + [Object (d;Dict String JSON)] ) (type: #export (Parser a) {#;doc "JSON parsers."} - (-> JSON (Result a))) + (-> JSON (R;Result a))) (type: #export (Gen a) {#;doc "JSON generators."} @@ -99,7 +99,7 @@ _ (macro;fail "Wrong syntax for JSON object."))) pairs)] - (wrap (list (` (: JSON (#Object (dict;from-list text;Hash<Text> (list (~@ pairs'))))))))) + (wrap (list (` (: JSON (#Object (d;from-list text;Hash<Text> (list (~@ pairs'))))))))) _ (wrap (list token)) @@ -124,7 +124,7 @@ (-> (-> JSON Text) (-> Object Text)) (format "{" (|> object - dict;entries + d;entries (List/map (function [[key value]] (format (:: text;Codec<Text,Text> encode key) ":" (show-json value)))) (text;join-with ",")) "}")) @@ -150,20 +150,20 @@ (def: #export (fields json) {#;doc "Get all the fields in a JSON object."} - (-> JSON (Result (List String))) + (-> JSON (R;Result (List String))) (case json (#Object obj) - (#R;Success (dict;keys obj)) + (#R;Success (d;keys obj)) _ (#R;Error (format "Cannot get the fields of a non-object.")))) (def: #export (get key json) {#;doc "A JSON object field getter."} - (-> String JSON (Result JSON)) + (-> String JSON (R;Result JSON)) (case json (#Object obj) - (case (dict;get key obj) + (case (d;get key obj) (#;Some value) (#R;Success value) @@ -175,10 +175,10 @@ (def: #export (set key value json) {#;doc "A JSON object field setter."} - (-> String JSON JSON (Result JSON)) + (-> String JSON JSON (R;Result JSON)) (case json (#Object obj) - (#R;Success (#Object (dict;put key value obj))) + (#R;Success (#Object (d;put key value obj))) _ (#R;Error (format "Cannot set field " (show-string key) " of a non-object.")))) @@ -186,7 +186,7 @@ (do-template [<name> <tag> <type> <desc>] [(def: #export (<name> key json) {#;doc (#;TextA (format "A JSON object field getter for " <desc> "."))} - (-> Text JSON (Result <type>)) + (-> Text JSON (R;Result <type>)) (case (get key json) (#R;Success (<tag> value)) (#R;Success value) @@ -228,23 +228,23 @@ ## Lexers (def: space~ (Lexer Text) - (lexer;some' lexer;space)) + (l;some' l;space)) (def: data-sep - (Lexer [Text Char Text]) - ($_ lexer;seq space~ (lexer;char #",") space~)) + (Lexer [Text Unit Text]) + ($_ l;seq space~ (l;this ",") space~)) (def: null~ (Lexer Null) (do Monad<Lexer> - [_ (lexer;text "null")] + [_ (l;this "null")] (wrap []))) (do-template [<name> <token> <value>] [(def: <name> (Lexer Boolean) (do Monad<Lexer> - [_ (lexer;text <token>)] + [_ (l;this <token>)] (wrap <value>)))] [t~ "true" true] @@ -253,76 +253,65 @@ (def: boolean~ (Lexer Boolean) - (lexer;either t~ f~)) + (l;either t~ f~)) (def: number~ (Lexer Number) (do Monad<Lexer> - [?sign (: (Lexer Text) - (lexer;default "" - (lexer;text "-"))) + [signed? (l;this? "-") digits (: (Lexer Text) - (lexer;many' lexer;digit)) + (l;many' l;digit)) decimals (: (Lexer Text) - (lexer;default "0" + (l;default "0" (do @ - [_ (lexer;text ".")] - (lexer;many' lexer;digit)))) + [_ (l;this ".")] + (l;many' l;digit)))) exp (: (Lexer Text) - (lexer;default "" + (l;default "" (do @ - [mark (lexer;either (lexer;text "e") (lexer;text "E")) - sign (lexer;default "" (lexer;text "-")) - offset (lexer;many' lexer;digit)] - (wrap (format mark sign offset)))))] - (case (: (Result Real) - (Real/decode (format ?sign digits "." decimals exp))) + [mark (l;one-of "eE") + signed?' (l;this? "-") + offset (l;many' l;digit)] + (wrap (format mark (if signed?' "-" "") offset)))))] + (case (: (R;Result Real) + (Real/decode (format (if signed? "-" "") digits "." decimals exp))) (#R;Error message) - (lexer;fail message) + (l;fail message) (#R;Success value) (wrap value)))) -(def: (un-escape escaped) - (-> Char Text) - (case escaped - #"t" "\t" - #"b" "\b" - #"n" "\n" - #"r" "\r" - #"f" "\f" - #"\"" "\"" - #"\\" "\\" - _ "")) - -(def: string-body~ +(def: escaped~ (Lexer Text) - (loop [_ []] - (do Monad<Lexer> - [chars (lexer;some' (lexer;none-of "\\\"")) - stop-char lexer;peek] - (if (Char/= #"\\" stop-char) - (do @ - [_ lexer;any - escaped lexer;any - next-chars (recur [])] - (wrap (format chars (un-escape escaped) next-chars))) - (wrap chars))))) + ($_ l;either + (l;after (l;this "\\t") (Lexer/wrap "\t")) + (l;after (l;this "\\b") (Lexer/wrap "\b")) + (l;after (l;this "\\n") (Lexer/wrap "\n")) + (l;after (l;this "\\r") (Lexer/wrap "\r")) + (l;after (l;this "\\f") (Lexer/wrap "\f")) + (l;after (l;this "\\\"") (Lexer/wrap "\"")) + (l;after (l;this "\\\\") (Lexer/wrap "\\")))) (def: string~ (Lexer String) - (do Monad<Lexer> - [_ (lexer;text "\"") - string-body string-body~ - _ (lexer;text "\"")] - (wrap string-body))) + (<| (l;enclosed ["\"" "\""]) + (loop [_ []] + (do Monad<Lexer> + [chars (l;some' (l;none-of "\\\"")) + stop l;peek] + (if (Text/= "\\" stop) + (do @ + [escaped escaped~ + next-chars (recur [])] + (wrap (format chars escaped next-chars))) + (wrap chars)))))) (def: (kv~ json~) (-> (-> Unit (Lexer JSON)) (Lexer [String JSON])) (do Monad<Lexer> [key string~ _ space~ - _ (lexer;char #":") + _ (l;this ":") _ space~ value (json~ [])] (wrap [key value]))) @@ -331,20 +320,20 @@ [(def: (<name> json~) (-> (-> Unit (Lexer JSON)) (Lexer <type>)) (do Monad<Lexer> - [_ (lexer;char <open>) + [_ (l;this <open>) _ space~ - elems (lexer;sep-by data-sep <elem-parser>) + elems (l;sep-by data-sep <elem-parser>) _ space~ - _ (lexer;char <close>)] + _ (l;this <close>)] (wrap (<prep> elems))))] - [array~ Array #"[" #"]" (json~ []) vector;from-list] - [object~ Object #"{" #"}" (kv~ json~) (dict;from-list text;Hash<Text>)] + [array~ Array "[" "]" (json~ []) vector;from-list] + [object~ Object "{" "}" (kv~ json~) (d;from-list text;Hash<Text>)] ) (def: (json~' _) (-> Unit (Lexer JSON)) - ($_ lexer;alt null~ boolean~ number~ string~ (array~ json~') (object~ json~'))) + ($_ l;alt null~ boolean~ number~ string~ (array~ json~') (object~ json~'))) ## [Structures] (struct: #export _ (Functor Parser) @@ -514,7 +503,7 @@ (function [json] (case json (#Array values) - (do Monad<Result> + (do R;Monad<Result> [elems (mapM @ parser (vector;to-list values))] (wrap elems)) @@ -523,18 +512,18 @@ (def: #export (object parser) {#;doc "Parses a JSON object, assuming that every field's value can be parsed the same way."} - (All [a] (-> (Parser a) (Parser (Dict String a)))) + (All [a] (-> (Parser a) (Parser (d;Dict String a)))) (function [json] (case json (#Object fields) - (do Monad<Result> + (do R;Monad<Result> [kvs (mapM @ (function [[key val']] (do @ [val (parser val')] (wrap [key val]))) - (dict;entries fields))] - (wrap (dict;from-list text;Hash<Text> kvs))) + (d;entries fields))] + (wrap (d;from-list text;Hash<Text> kvs))) _ (#R;Error (format "JSON value is not an object: " (show-json json)))))) @@ -624,7 +613,7 @@ (#R;Success x) (#R;Success (#;Some x)))) (def: #export (run json parser) - (All [a] (-> JSON (Parser a) (Result a))) + (All [a] (-> JSON (Parser a) (R;Result a))) (parser json)) (def: #export (ensure test parser json) @@ -654,7 +643,7 @@ (-> (List String) (Parser Unit)) (case json (#Object kvs) - (let [actual-fields (dict;keys kvs)] + (let [actual-fields (d;keys kvs)] (if (and (n.= (list;size wanted-fields) (list;size actual-fields)) (list;every? (list;member? text;Eq<Text> wanted-fields) actual-fields)) @@ -691,21 +680,21 @@ (list;indices (vector;size xs)))) [(#Object xs) (#Object ys)] - (and (n.= (dict;size xs) (dict;size ys)) + (and (n.= (d;size xs) (d;size ys)) (fold (function [[xk xv] prev] (and prev - (case (dict;get xk ys) + (case (d;get xk ys) #;None false (#;Some yv) (= xv yv)))) true - (dict;entries xs))) + (d;entries xs))) _ false))) (struct: #export _ (Codec Text JSON) (def: encode show-json) - (def: decode (function [input] (lexer;run input (json~' []))))) + (def: decode (function [input] (l;run input (json~' []))))) ## [Syntax] (type: Shape @@ -785,39 +774,22 @@ [Text poly;text ;;gen-string])] ($_ macro;either <basic> - (with-gensyms [g!type-fun g!case g!input g!key g!val] + (with-gensyms [g!input g!key g!val] (do @ - [:sub: (poly;apply-1 (ident-for ;List) :x:) - [g!vars members] (poly;tuple :sub:) - :val: (case members - (^ (list :key: :val:)) - (do @ [_ (poly;text :key:)] - (wrap :val:)) - - _ - (macro;fail "")) - #let [new-*env* (poly;extend-env [:x: g!type-fun] - (list;zip2 (|> g!vars list;size poly;type-var-indices) - g!vars) - *env*)] - .val. (Codec<JSON,?>//encode new-*env* :val:) - #let [:x:+ (case g!vars - #;Nil - (->Codec//encode (type;to-ast :x:)) - - _ - (` (All (~ g!type-fun) [(~@ g!vars)] - (-> (~@ (List/map ->Codec//encode g!vars)) - (~ (->Codec//encode (` ((~ (type;to-ast :x:)) (~@ g!vars)))))))))]] - (wrap (` (: (~ :x:+) - (function [(~@ g!vars) (~ g!input)] + [[:key: :val:] (poly;apply-2 (ident-for d;Dict) :x:) + _ (poly;text :key:) + .val. (Codec<JSON,?>//encode *env* :val:)] + (wrap (` (: (~ (->Codec//encode (type;to-ast :x:))) + (function [(~ g!input)] (|> (~ g!input) - (_map_ (: (-> [Text (~ (type;to-ast :val:))] - [Text JSON]) - (function [[(~ g!key) (~ g!val)]] - [(~ g!key) - ((~ .val.) (~ g!val))]))) - ;;object)) + d;entries + (;;_map_ (: (-> [Text (~ (type;to-ast :val:))] + [Text JSON]) + (function [[(~ g!key) (~ g!val)]] + [(~ g!key) + ((~ .val.) (~ g!val))]))) + (d;from-list text;Hash<Text>) + #;;Object)) ))) )) (do @ @@ -829,7 +801,7 @@ [:sub: (poly;apply-1 (ident-for ;List) :x:) .sub. (Codec<JSON,?>//encode *env* :sub:)] (wrap (` (: (~ (->Codec//encode (type;to-ast :x:))) - (|>. (_map_ (~ .sub.)) vector;from-list ;;gen-array))))) + (|>. (;;_map_ (~ .sub.)) vector;from-list ;;gen-array))))) (with-gensyms [g!type-fun g!case g!input] (do @ [[g!vars members] (poly;variant :x:) @@ -924,7 +896,7 @@ (poly: #hidden (Codec<JSON,?>//decode *env* :x:) (let [->Codec//decode (: (-> Code Code) - (function [.type.] (` (-> JSON (Result (~ .type.))))))] + (function [.type.] (` (-> JSON (R;Result (~ .type.))))))] (with-expansions [<basic> (do-template [<type> <matcher> <decoder>] [(do @ [_ (<matcher> :x:)] (wrap (` (: (~ (->Codec//decode (` <type>))) <decoder>))))] @@ -946,40 +918,23 @@ [List (poly;apply-1 (ident-for ;List)) ;;array])] ($_ macro;either <basic> - (with-gensyms [g!type-fun g!case g!input g!key g!val] + (with-gensyms [g!input g!output g!key g!val] (do @ - [:sub: (poly;apply-1 (ident-for ;List) :x:) - [g!vars members] (poly;tuple :sub:) - :val: (case members - (^ (list :key: :val:)) - (do @ [_ (poly;text :key:)] - (wrap :val:)) - - _ - (macro;fail "")) - #let [new-*env* (poly;extend-env [:x: g!type-fun] - (list;zip2 (|> g!vars list;size poly;type-var-indices) g!vars) - *env*)] - .val. (Codec<JSON,?>//decode new-*env* :val:) - #let [:x:+ (case g!vars - #;Nil - (->Codec//decode (type;to-ast :x:)) - - _ - (` (All (~ g!type-fun) [(~@ g!vars)] - (-> (~@ (List/map ->Codec//decode g!vars)) - (~ (->Codec//decode (` ((~ (type;to-ast :x:)) (~@ g!vars)))))))))]] - (wrap (` (: (~ :x:+) - (function [(~@ g!vars) (~ g!input)] - (do Monad<Result> - [(~ g!key) (;;fields (~ g!input))] - (mapM (~ (' %)) - (function [(~ g!key)] - (do Monad<Result> - [(~ g!val) (;;get (~ g!key) (~ g!input)) - (~ g!val) (;;run (~ g!val) (~ .val.))] - ((~ (' wrap)) [(~ g!key) (~ g!val)]))) - (~ g!key)))) + [[:key: :val:] (poly;apply-2 (ident-for d;Dict) :x:) + _ (poly;text :key:) + .val. (Codec<JSON,?>//decode *env* :val:)] + (wrap (` (: (~ (->Codec//decode (type;to-ast :x:))) + (function [(~ g!input)] + (do R;Monad<Result> + [(~ g!key) (;;fields (~ g!input)) + (~ g!output) (mapM R;Monad<Result> + (function [(~ g!key)] + (do R;Monad<Result> + [(~ g!val) (;;get (~ g!key) (~ g!input)) + (~ g!val) (;;run (~ g!val) (~ .val.))] + ((~ (' wrap)) [(~ g!key) (~ g!val)]))) + (~ g!key))] + ((~' wrap) (d;from-list text;Hash<Text> (~ g!output))))) ))) )) <complex> @@ -1043,7 +998,7 @@ (~ (->Codec//decode (` ((~ (type;to-ast :x:)) (~@ g!vars)))))))))]] (wrap (` (: (~ :x:+) (function [(~@ g!vars) (~ g!input)] - (do Monad<Result> + (do R;Monad<Result> [(~@ (List/join extraction))] ((~ (' wrap)) (~ (code;record (List/map (function [[name :slot:]] [(code;tag name) (code;symbol ["" (product;right name)])]) @@ -1108,7 +1063,8 @@ #maybe (Maybe Int) #list (List Int) #variant Variant - #tuple [Int Real Char]}) + #tuple [Int Real Char] + #dict (Dict Text Int)}) (derived: (Codec<JSON,?> Record)))} (wrap (list (` (: (Codec JSON (~ :x:)) diff --git a/stdlib/source/lux/data/format/xml.lux b/stdlib/source/lux/data/format/xml.lux index db68fbf29..aaafcd3d0 100644 --- a/stdlib/source/lux/data/format/xml.lux +++ b/stdlib/source/lux/data/format/xml.lux @@ -36,52 +36,52 @@ ## [Parsing] (def: xml-standard-escape-char^ - (l;Lexer Char) + (l;Lexer Text) ($_ l;either - (l;after (l;text "<") (lex/wrap #"<")) - (l;after (l;text ">") (lex/wrap #">")) - (l;after (l;text "&") (lex/wrap #"&")) - (l;after (l;text "'") (lex/wrap #"'")) - (l;after (l;text """) (lex/wrap #"\"")))) + (l;after (l;this "<") (lex/wrap "<")) + (l;after (l;this ">") (lex/wrap ">")) + (l;after (l;this "&") (lex/wrap "&")) + (l;after (l;this "'") (lex/wrap "'")) + (l;after (l;this """) (lex/wrap "\"")))) (def: xml-unicode-escape-char^ - (l;Lexer Char) + (l;Lexer Text) (|> (do l;Monad<Lexer> - [hex? (l;opt (l;text "x")) + [hex? (l;opt (l;this "x")) code (case hex? #;None (l;codec number;Codec<Text,Int> (l;many' l;digit)) (#;Some _) (l;codec number;Hex@Codec<Text,Int> (l;many' l;hex-digit)))] - (wrap (|> code int-to-nat char;char))) - (l;before (l;text ";")) - (l;after (l;text "&#")))) + (wrap (|> code int-to-nat char;char char;as-text))) + (l;before (l;this ";")) + (l;after (l;this "&#")))) (def: xml-escape-char^ - (l;Lexer Char) + (l;Lexer Text) (l;either xml-standard-escape-char^ xml-unicode-escape-char^)) (def: xml-char^ - (l;Lexer Char) + (l;Lexer Text) (l;either (l;none-of "<>&'\"") xml-escape-char^)) (def: xml-identifier (l;Lexer Text) (do l;Monad<Lexer> - [head (l;either (l;char #"_") + [head (l;either (l;one-of "_") l;alpha) tail (l;some' (l;either (l;one-of "_.-") l;alpha-num))] - (wrap (format (char;as-text head) tail)))) + (wrap (format head tail)))) (def: namespaced-symbol^ (l;Lexer Ident) (do l;Monad<Lexer> [first-part xml-identifier - ?second-part (<| l;opt (l;after (l;char #":")) xml-identifier)] + ?second-part (<| l;opt (l;after (l;this ":")) xml-identifier)] (case ?second-part #;None (wrap ["" first-part]) @@ -109,7 +109,7 @@ (<| (:: l;Monad<Lexer> map (D;from-list ident;Hash<Ident>)) l;some (l;seq (spaced^ attr-name^)) - (l;after (l;char #"=")) + (l;after (l;this "=")) (spaced^ attr-value^))) (def: (close-tag^ expected) @@ -117,7 +117,7 @@ (do l;Monad<Lexer> [actual (|> tag^ spaced^ - (l;after (l;char #"/")) + (l;after (l;this "/")) (l;enclosed ["<" ">"]))] (l;assert (format "Close tag does not match open tag.\n" "Expected: " (%ident expected) "\n" @@ -126,24 +126,24 @@ (def: comment^ (l;Lexer Text) - (|> (l;some' (l;not (l;text "--"))) - (l;after (l;text "-->")) - (l;after (l;text "<--")) + (|> (l;some' (l;not (l;this "--"))) + (l;after (l;this "-->")) + (l;after (l;this "<--")) spaced^)) (def: xml-header^ (l;Lexer Attrs) (|> (spaced^ attrs^) - (l;before (l;text "?>")) - (l;after (l;text "<?xml")) + (l;before (l;this "?>")) + (l;after (l;this "<?xml")) spaced^)) (def: cdata^ (l;Lexer Text) - (let [end (l;text "]]>")] + (let [end (l;this "]]>")] (|> (l;some' (l;not end)) (l;after end) - (l;after (l;text "<![CDATA[")) + (l;after (l;this "<![CDATA[")) spaced^))) (def: text^ @@ -159,14 +159,14 @@ (l;either text^ (spaced^ (do l;Monad<Lexer> - [_ (l;char #"<") + [_ (l;this "<") tag (spaced^ tag^) attrs (spaced^ attrs^) #let [no-children^ (do l;Monad<Lexer> - [_ (l;text "/>")] + [_ (l;this "/>")] (wrap (node tag attrs (list)))) with-children^ (do l;Monad<Lexer> - [_ (l;char #">") + [_ (l;this ">") children (l;some node^) _ (close-tag^ tag)] (wrap (node tag attrs children)))]] diff --git a/stdlib/source/lux/data/text/lexer.lux b/stdlib/source/lux/data/text/lexer.lux index 58e636b53..8475d91e2 100644 --- a/stdlib/source/lux/data/text/lexer.lux +++ b/stdlib/source/lux/data/text/lexer.lux @@ -7,7 +7,7 @@ (data [text "Text/" Eq<Text> Monoid<Text>] [number "Int/" Codec<Text,Int>] [product] - [char "Char/" Order<Char>] + [char "Char/" Order<Char> Codec<Text,Char>] maybe ["R" result] (coll [list "" Functor<List>])))) @@ -79,11 +79,11 @@ (def: #export any {#;doc "Just returns the next character without applying any logic."} - (Lexer Char) + (Lexer Text) (function [input] (case [(text;nth +0 input) (text;split +1 input)] [(#;Some output) (#;Some [_ input'])] - (#R;Success [input' output]) + (#R;Success [input' (char;as-text output)]) _ (#R;Error "Cannot parse character from empty text.")) @@ -126,7 +126,7 @@ (def: #export (not p) {#;doc "Produce a character if the lexer fails."} - (All [a] (-> (Lexer a) (Lexer Char))) + (All [a] (-> (Lexer a) (Lexer Text))) (function [input] (case (p input) (#R;Error msg) @@ -232,16 +232,26 @@ (#R;Success [input (#;Some value)]) ))) -(def: #export (text test) +(def: #export (this reference) {#;doc "Lex a text if it matches the given sample."} - (-> Text (Lexer Text)) + (-> Text (Lexer Unit)) (function [input] - (if (text;starts-with? test input) - (case (text;split (text;size test) input) + (if (text;starts-with? reference input) + (case (text;split (text;size reference) input) #;None (#R;Error "") - (#;Some [_ input']) (#R;Success [input' test])) + (#;Some [_ input']) (#R;Success [input' []])) (let [(^open "T/") text;Codec<Text,Text>] - (#R;Error ($_ Text/append "Invalid match: " (T/encode test) " @ " (T/encode input))))) + (#R;Error ($_ Text/append "Invalid match: " (T/encode reference) " @ " (T/encode input))))))) + +(def: #export (this? reference) + {#;doc "Lex a text if it matches the given sample."} + (-> Text (Lexer Bool)) + (function [input] + (if (text;starts-with? reference input) + (case (text;split (text;size reference) input) + #;None (#R;Success [input false]) + (#;Some [_ input']) (#R;Success [input' true])) + (#R;Success [input false])) )) (def: #export (sep-by sep lexer) @@ -270,30 +280,16 @@ (def: #export peek {#;doc "Lex the next character (without consuming it from the input)."} - (Lexer Char) + (Lexer Text) (function [input] (case (text;nth +0 input) (#;Some output) - (#R;Success [input output]) + (#R;Success [input (char;as-text output)]) _ (#R;Error "Cannot peek character from empty text.")) )) -(def: #export (char test) - {#;doc "Lex a character if it matches the given sample."} - (-> Char (Lexer Char)) - (function [input] - (case [(text;nth +0 input) (text;split +1 input)] - [(#;Some char') (#;Some [_ input'])] - (if (Char/= test char') - (#R;Success [input' test]) - (#R;Error ($_ Text/append "Expected " (:: char;Codec<Text,Char> encode test) " @ " (:: text;Codec<Text,Text> encode input)))) - - _ - (#R;Error "Cannot parse character from empty text.")) - )) - (def: #export get-input {#;doc "Get all of the remaining input (without consuming it)."} (Lexer Text) @@ -302,19 +298,20 @@ (def: #export (char-range bottom top) {#;doc "Only lex characters within a range."} - (-> Char Char (Lexer Char)) + (-> Char Char (Lexer Text)) (do Monad<Lexer> [input get-input char any - _ (assert ($_ Text/append "Character is not within range: " (:: char;Codec<Text,Char> encode bottom) "-" (:: char;Codec<Text,Char> encode top) " @ " (:: text;Codec<Text,Text> encode input)) - (and (Char/>= bottom char) - (Char/<= top char)))] + #let [char' (|> char (text;nth +0) assume)] + _ (assert ($_ Text/append "Character is not within range: " (Char/encode bottom) "-" (Char/encode top) " @ " (:: text;Codec<Text,Text> encode input)) + (and (Char/>= bottom char') + (Char/<= top char')))] (wrap char))) (do-template [<name> <bottom> <top> <desc>] [(def: #export <name> {#;doc (#;TextA ($_ Text/append "Only lex " <desc> " characters."))} - (Lexer Char) + (Lexer Text) (char-range <bottom> <top>))] [upper #"A" #"Z" "uppercase"] @@ -325,17 +322,17 @@ (def: #export alpha {#;doc "Only lex alphabetic characters."} - (Lexer Char) + (Lexer Text) (either lower upper)) (def: #export alpha-num {#;doc "Only lex alphanumeric characters."} - (Lexer Char) + (Lexer Text) (either alpha digit)) (def: #export hex-digit {#;doc "Only lex hexadecimal digits."} - (Lexer Char) + (Lexer Text) ($_ either digit (char-range #"a" #"f") @@ -343,14 +340,14 @@ (def: #export (one-of options) {#;doc "Only lex characters that are part of a piece of text."} - (-> Text (Lexer Char)) + (-> Text (Lexer Text)) (function [input] (case (text;split +1 input) (#;Some [init input']) (if (text;contains? init options) (case (text;nth +0 init) (#;Some output) - (#R;Success [input' output]) + (#R;Success [input' (char;as-text output)]) _ (#R;Error "")) @@ -361,14 +358,14 @@ (def: #export (none-of options) {#;doc "Only lex characters that are not part of a piece of text."} - (-> Text (Lexer Char)) + (-> Text (Lexer Text)) (function [input] (case (text;split +1 input) (#;Some [init input']) (if (;not (text;contains? init options)) (case (text;nth +0 init) (#;Some output) - (#R;Success [input' output]) + (#R;Success [input' (char;as-text output)]) _ (#R;Error "")) @@ -379,7 +376,7 @@ (def: #export (satisfies p) {#;doc "Only lex characters that satisfy a predicate."} - (-> (-> Char Bool) (Lexer Char)) + (-> (-> Char Bool) (Lexer Text)) (function [input] (case (: (Maybe [Text Char]) (do Monad<Maybe> @@ -388,7 +385,7 @@ (wrap [input' output]))) (#;Some [input' output]) (if (p output) - (#R;Success [input' output]) + (#R;Success [input' (char;as-text output)]) (#R;Error ($_ Text/append "Character does not satisfy predicate: " (:: text;Codec<Text,Text> encode input)))) _ @@ -396,7 +393,7 @@ (def: #export space {#;doc "Only lex white-space."} - (Lexer Char) + (Lexer Text) (satisfies char;space?)) (def: #export (constrain test lexer) @@ -412,10 +409,10 @@ (do-template [<name> <base> <doc>] [(def: #export (<name> p) {#;doc <doc>} - (-> (Lexer Char) (Lexer Text)) + (-> (Lexer Text) (Lexer Text)) (do Monad<Lexer> - [cs (<base> p)] - (wrap (text;concat (map char;as-text cs)))))] + [] + (|> p <base> (:: @ map text;concat))))] [some' some "Lex some characters as a single continuous text."] [many' many "Lex many characters as a single continuous text."] @@ -424,10 +421,10 @@ (do-template [<name> <base> <doc>] [(def: #export (<name> n p) {#;doc <doc>} - (-> Nat (Lexer Char) (Lexer Text)) + (-> Nat (Lexer Text) (Lexer Text)) (do Monad<Lexer> - [cs (<base> n p)] - (wrap (text;concat (map char;as-text cs)))))] + [] + (|> p (<base> n) (:: @ map text;concat))))] [exactly' exactly "Lex exactly N characters."] [at-most' at-most "Lex at most N characters."] @@ -436,10 +433,10 @@ (def: #export (between' from to p) {#;doc "Lex between N and M characters."} - (-> Nat Nat (Lexer Char) (Lexer Text)) + (-> Nat Nat (Lexer Text) (Lexer Text)) (do Monad<Lexer> - [cs (between from to p)] - (wrap (text;concat (map char;as-text cs))))) + [] + (|> p (between from to) (:: @ map text;concat)))) (def: #export end? {#;doc "Ask if the lexer's input is empty."} @@ -463,13 +460,8 @@ (def: #export (default value lexer) {#;doc "If the given lexer fails, this lexer will succeed with the provided value."} (All [a] (-> a (Lexer a) (Lexer a))) - (function [input] - (case (lexer input) - (#R;Error error) - (#R;Success [input value]) - - (#R;Success input'+value) - (#R;Success input'+value)))) + (|> (opt lexer) + (:: Monad<Lexer> map (|>. (;default value))))) (def: #export (codec codec lexer) {#;doc "Lex a token by means of a codec."} @@ -490,8 +482,8 @@ (def: #export (enclosed [start end] lexer) (All [a] (-> [Text Text] (Lexer a) (Lexer a))) (|> lexer - (before (text end)) - (after (text start)))) + (before (this end)) + (after (this start)))) (def: #export (rec lexer) (All [a] (-> (-> (Lexer a) (Lexer a)) diff --git a/stdlib/source/lux/data/text/regex.lux b/stdlib/source/lux/data/text/regex.lux index 95a932905..405eca618 100644 --- a/stdlib/source/lux/data/text/regex.lux +++ b/stdlib/source/lux/data/text/regex.lux @@ -3,7 +3,7 @@ (lux (control monad) (data [char] [text] - ["&" text/lexer #+ Lexer Monad<Lexer>] + ["&" text/lexer #+ Lexer Monad<Lexer> "&/" Monad<Lexer>] text/format [number "Int/" Codec<Text,Int>] [product] @@ -13,24 +13,17 @@ ["s" syntax #+ syntax:]))) ## [Utils] -(def: #hidden (->Text lexer^) - (-> (Lexer Char) (Lexer Text)) - (do Monad<Lexer> - [output lexer^] - (wrap (char;as-text output)))) - (def: regex-char^ - (Lexer Char) + (Lexer Text) (&;none-of "\\.|&()[]{}")) (def: escaped-char^ - (Lexer Char) + (Lexer Text) (do Monad<Lexer> - [? (&;opt (&;char #"\\")) - char (case ? - (#;Some _) &;any - #;None regex-char^)] - (wrap char))) + [? (&;this? "\\")] + (if ? + &;any + regex-char^))) (def: (local^ state lexer) (All [a] (-> Text (Lexer a) (Lexer a))) @@ -50,9 +43,13 @@ (wrap output))) (def: #hidden word^ - (Lexer Char) + (Lexer Text) (&;either &;alpha-num - (&;char #"_"))) + (&;one-of "_"))) + +(def: #hidden (copy reference) + (-> Text (Lexer Text)) + (&;after (&;this reference) (&/wrap reference))) (def: #hidden (join-text^ part^) (-> (Lexer (List Text)) (Lexer Text)) @@ -61,14 +58,14 @@ (wrap (text;join-with "" parts)))) (def: identifier-char^ - (Lexer Char) + (Lexer Text) (&;none-of "[]{}()s\"#;<>")) (def: identifier-part^ (Lexer Text) (do Monad<Lexer> [head (refine^ (&;not &;digit) - (->Text identifier-char^)) + identifier-char^) tail (&;some' identifier-char^)] (wrap (format head tail)))) @@ -77,9 +74,9 @@ (do Monad<Lexer> [] ($_ &;either - (&;seq (wrap current-module) (&;after (&;text ";;") identifier-part^)) - (&;seq identifier-part^ (&;after (&;text ";") identifier-part^)) - (&;seq (wrap "lux") (&;after (&;text ";") identifier-part^)) + (&;seq (wrap current-module) (&;after (&;this ";;") identifier-part^)) + (&;seq identifier-part^ (&;after (&;this ";") identifier-part^)) + (&;seq (wrap "lux") (&;after (&;this ";") identifier-part^)) (&;seq (wrap "") identifier-part^)))) (def: (re-var^ current-module) @@ -91,22 +88,16 @@ (def: re-char-range^ (Lexer Code) (do Monad<Lexer> - [from regex-char^ - _ (&;char #"-") - to regex-char^] + [from (|> regex-char^ (:: @ map (|>. (text;nth +0) assume))) + _ (&;this "-") + to (|> regex-char^ (:: @ map (|>. (text;nth +0) assume)))] (wrap (` (&;char-range (~ (code;char from)) (~ (code;char to))))))) (def: re-char^ (Lexer Code) (do Monad<Lexer> [char escaped-char^] - (wrap (` (&;char (~ (code;char char))))))) - -(def: re-char+^ - (Lexer Code) - (do Monad<Lexer> - [base re-char^] - (wrap (` (->Text (~ base)))))) + (wrap (` (;;copy (~ (code;text char))))))) (def: re-char-options^ (Lexer Code) @@ -117,78 +108,78 @@ (def: re-user-class^' (Lexer Code) (do Monad<Lexer> - [negate? (&;opt (&;char #"^")) + [negate? (&;opt (&;this "^")) parts (&;many ($_ &;either re-char-range^ re-char-options^))] (wrap (case negate? - (#;Some _) (` (->Text (&;not ($_ &;either (~@ parts))))) - #;None (` (->Text ($_ &;either (~@ parts)))))))) + (#;Some _) (` (&;not ($_ &;either (~@ parts)))) + #;None (` ($_ &;either (~@ parts))))))) (def: re-user-class^ (Lexer Code) (do Monad<Lexer> [_ (wrap []) init re-user-class^' - rest (&;some (&;after (&;text "&&") (&;enclosed ["[" "]"] re-user-class^')))] + rest (&;some (&;after (&;this "&&") (&;enclosed ["[" "]"] re-user-class^')))] (wrap (fold (function [refinement base] (` (refine^ (~ refinement) (~ base)))) init rest)))) (def: #hidden blank^ - (Lexer Char) + (Lexer Text) (&;one-of " \t")) (def: #hidden ascii^ - (Lexer Char) + (Lexer Text) (&;char-range #"\u0000" #"\u007F")) (def: #hidden control^ - (Lexer Char) + (Lexer Text) (&;either (&;char-range #"\u0000" #"\u001F") - (&;char #"\u007F"))) + (&;one-of "\u007F"))) (def: #hidden punct^ - (Lexer Char) + (Lexer Text) (&;one-of "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")) (def: #hidden graph^ - (Lexer Char) + (Lexer Text) (&;either punct^ &;alpha-num)) (def: #hidden print^ - (Lexer Char) + (Lexer Text) (&;either graph^ - (&;char #"\u0020"))) + (&;one-of "\u0020"))) (def: re-system-class^ (Lexer Code) (do Monad<Lexer> [] ($_ &;either - (&;after (&;char #".") (wrap (` (->Text &;any)))) - (&;after (&;text "\\d") (wrap (` (->Text &;digit)))) - (&;after (&;text "\\D") (wrap (` (->Text (&;not &;digit))))) - (&;after (&;text "\\s") (wrap (` (->Text &;space)))) - (&;after (&;text "\\S") (wrap (` (->Text (&;not &;space))))) - (&;after (&;text "\\w") (wrap (` (->Text word^)))) - (&;after (&;text "\\W") (wrap (` (->Text (&;not word^))))) - - (&;after (&;text "\\p{Lower}") (wrap (` (->Text &;lower)))) - (&;after (&;text "\\p{Upper}") (wrap (` (->Text &;upper)))) - (&;after (&;text "\\p{Alpha}") (wrap (` (->Text &;alpha)))) - (&;after (&;text "\\p{Digit}") (wrap (` (->Text &;digit)))) - (&;after (&;text "\\p{Alnum}") (wrap (` (->Text &;alpha-num)))) - (&;after (&;text "\\p{Space}") (wrap (` (->Text &;space)))) - (&;after (&;text "\\p{HexDigit}") (wrap (` (->Text &;hex-digit)))) - (&;after (&;text "\\p{OctDigit}") (wrap (` (->Text &;oct-digit)))) - (&;after (&;text "\\p{Blank}") (wrap (` (->Text blank^)))) - (&;after (&;text "\\p{ASCII}") (wrap (` (->Text ascii^)))) - (&;after (&;text "\\p{Contrl}") (wrap (` (->Text control^)))) - (&;after (&;text "\\p{Punct}") (wrap (` (->Text punct^)))) - (&;after (&;text "\\p{Graph}") (wrap (` (->Text graph^)))) - (&;after (&;text "\\p{Print}") (wrap (` (->Text print^)))) + (&;after (&;this ".") (wrap (` &;any))) + (&;after (&;this "\\d") (wrap (` &;digit))) + (&;after (&;this "\\D") (wrap (` (&;not &;digit)))) + (&;after (&;this "\\s") (wrap (` &;space))) + (&;after (&;this "\\S") (wrap (` (&;not &;space)))) + (&;after (&;this "\\w") (wrap (` word^))) + (&;after (&;this "\\W") (wrap (` (&;not word^)))) + + (&;after (&;this "\\p{Lower}") (wrap (` &;lower))) + (&;after (&;this "\\p{Upper}") (wrap (` &;upper))) + (&;after (&;this "\\p{Alpha}") (wrap (` &;alpha))) + (&;after (&;this "\\p{Digit}") (wrap (` &;digit))) + (&;after (&;this "\\p{Alnum}") (wrap (` &;alpha-num))) + (&;after (&;this "\\p{Space}") (wrap (` &;space))) + (&;after (&;this "\\p{HexDigit}") (wrap (` &;hex-digit))) + (&;after (&;this "\\p{OctDigit}") (wrap (` &;oct-digit))) + (&;after (&;this "\\p{Blank}") (wrap (` blank^))) + (&;after (&;this "\\p{ASCII}") (wrap (` ascii^))) + (&;after (&;this "\\p{Contrl}") (wrap (` control^))) + (&;after (&;this "\\p{Punct}") (wrap (` punct^))) + (&;after (&;this "\\p{Graph}") (wrap (` graph^))) + (&;after (&;this "\\p{Print}") (wrap (` print^))) ))) (def: re-class^ @@ -196,21 +187,23 @@ (&;either re-system-class^ (&;enclosed ["[" "]"] re-user-class^))) -(def: int^ - (Lexer Int) - (&;codec number;Codec<Text,Int> (&;many' &;digit))) +(def: number^ + (Lexer Nat) + (|> (&;many' &;digit) + (&;codec number;Codec<Text,Int>) + (&/map int-to-nat))) (def: re-back-reference^ (Lexer Code) (&;either (do Monad<Lexer> - [_ (&;char #"\\") - id int^] - (wrap (` (&;text (~ (code;symbol ["" (Int/encode id)])))))) + [_ (&;this "\\") + id number^] + (wrap (` (;;copy (~ (code;symbol ["" (Int/encode (nat-to-int id))])))))) (do Monad<Lexer> - [_ (&;text "\\k<") + [_ (&;this "\\k<") captured-name identifier-part^ - _ (&;text ">")] - (wrap (` (&;text (~ (code;symbol ["" captured-name])))))))) + _ (&;this ">")] + (wrap (` (;;copy (~ (code;symbol ["" captured-name])))))))) (def: (re-simple^ current-module) (-> Text (Lexer Code)) @@ -218,7 +211,7 @@ re-class^ (re-var^ current-module) re-back-reference^ - re-char+^ + re-char^ )) (def: (re-simple-quantified^ current-module) @@ -227,12 +220,13 @@ [base (re-simple^ current-module) quantifier (&;one-of "?*+")] (case quantifier - #"?" + "?" (wrap (` (&;default "" (~ base)))) - #"*" + "*" (wrap (` (join-text^ (&;some (~ base))))) + ## "+" _ (wrap (` (join-text^ (&;many (~ base))))) ))) @@ -244,19 +238,19 @@ (&;enclosed ["{" "}"] ($_ &;either (do @ - [[from to] (&;seq int^ (&;after (&;char #",") int^))] - (wrap (` (join-text^ (&;between (~ (code;nat (int-to-nat from))) - (~ (code;nat (int-to-nat to))) + [[from to] (&;seq number^ (&;after (&;this ",") number^))] + (wrap (` (join-text^ (&;between (~ (code;nat from)) + (~ (code;nat to)) (~ base)))))) (do @ - [limit (&;after (&;char #",") int^)] - (wrap (` (join-text^ (&;at-most (~ (code;nat (int-to-nat limit))) (~ base)))))) + [limit (&;after (&;this ",") number^)] + (wrap (` (join-text^ (&;at-most (~ (code;nat limit)) (~ base)))))) (do @ - [limit (&;before (&;char #",") int^)] - (wrap (` (join-text^ (&;at-least (~ (code;nat (int-to-nat limit))) (~ base)))))) + [limit (&;before (&;this ",") number^)] + (wrap (` (join-text^ (&;at-least (~ (code;nat limit)) (~ base)))))) (do @ - [limit int^] - (wrap (` (join-text^ (&;exactly (~ (code;nat (int-to-nat limit))) (~ base)))))))))) + [limit number^] + (wrap (` (join-text^ (&;exactly (~ (code;nat limit)) (~ base)))))))))) (def: (re-quantified^ current-module) (-> Text (Lexer Code)) @@ -376,7 +370,7 @@ (do Monad<Lexer> [#let [sub^ (re-sequential^ capturing? re-scoped^ current-module)] head sub^ - tail (&;some (&;after (&;char #"|") sub^)) + tail (&;some (&;after (&;this "|") sub^)) #let [g!op (if capturing? (` |||^) (` |||_^))]] @@ -389,24 +383,24 @@ (-> Text (Lexer [Re-Group Code])) ($_ &;either (do Monad<Lexer> - [_ (&;text "(?:") + [_ (&;this "(?:") [_ scoped] (re-alternative^ false re-scoped^ current-module) - _ (&;char #")")] + _ (&;this ")")] (wrap [#Non-Capturing scoped])) (do Monad<Lexer> [complex (re-complex^ current-module)] (wrap [#Non-Capturing complex])) (do Monad<Lexer> - [_ (&;text "(?<") + [_ (&;this "(?<") captured-name identifier-part^ - _ (&;text ">") + _ (&;this ">") [num-captures pattern] (re-alternative^ true re-scoped^ current-module) - _ (&;char #")")] + _ (&;this ")")] (wrap [(#Capturing [(#;Some captured-name) num-captures]) pattern])) (do Monad<Lexer> - [_ (&;char #"(") + [_ (&;this "(") [num-captures pattern] (re-alternative^ true re-scoped^ current-module) - _ (&;char #")")] + _ (&;this ")")] (wrap [(#Capturing [#;None num-captures]) pattern])))) (def: (regex^ current-module) @@ -479,7 +473,8 @@ (&;before &;end) (&;run pattern)) (#;Left error) - (macro;fail error) + (macro;fail (format "Error while parsing regular-expression:\n" + error)) (#;Right regex) (wrap (list regex)) |