aboutsummaryrefslogtreecommitdiff
path: root/stdlib/source
diff options
context:
space:
mode:
authorEduardo Julian2017-06-19 20:06:41 -0400
committerEduardo Julian2017-06-19 20:06:41 -0400
commited0406cb0994f14ca5a3e6120b7b1ec6927bae75 (patch)
tree5939298115f7bf40dd6af52c86b891319a7b957e /stdlib/source
parente5bd00eeadaa84137cbd83bb359ddcc6fad8fbca (diff)
- JSON polytypic generator checks for (Dict Text ?) instead of (List [Text ?]).
- Lexers now rely only on Text, instead of also relying on Char.
Diffstat (limited to '')
-rw-r--r--stdlib/source/lux/data/format/json.lux260
-rw-r--r--stdlib/source/lux/data/format/xml.lux56
-rw-r--r--stdlib/source/lux/data/text/lexer.lux110
-rw-r--r--stdlib/source/lux/data/text/regex.lux185
4 files changed, 277 insertions, 334 deletions
diff --git a/stdlib/source/lux/data/format/json.lux b/stdlib/source/lux/data/format/json.lux
index 1cc3000c3..6cf45dfc9 100644
--- a/stdlib/source/lux/data/format/json.lux
+++ b/stdlib/source/lux/data/format/json.lux
@@ -10,16 +10,16 @@
(data [bool]
[text "Text/" Eq<Text> Monoid<Text>]
text/format
- (text [lexer #+ Lexer Monad<Lexer>])
+ (text ["l" lexer #+ Lexer Monad<Lexer> "Lexer/" Monad<Lexer>])
[number "Real/" Codec<Text,Real>]
maybe
- [char "Char/" Eq<Char> Codec<Text,Char>]
- ["R" result #- fail]
+ [char "Char/" Codec<Text,Char>]
+ ["R" result]
[sum]
[product]
(coll [list "" Fold<List> "List/" Monad<List>]
[vector #+ Vector vector "Vector/" Monad<Vector>]
- [dict #+ Dict]))
+ ["d" dict]))
[macro #+ Monad<Lux> with-gensyms]
(macro [syntax #+ syntax:]
[code]
@@ -43,18 +43,18 @@
(#Number Number)
(#String String)
(#Array (Vector JSON))
- (#Object (Dict String JSON)))
+ (#Object (d;Dict String JSON)))
(do-template [<name> <type>]
[(type: #export <name> <type>)]
[Array (Vector JSON)]
- [Object (Dict String JSON)]
+ [Object (d;Dict String JSON)]
)
(type: #export (Parser a)
{#;doc "JSON parsers."}
- (-> JSON (Result a)))
+ (-> JSON (R;Result a)))
(type: #export (Gen a)
{#;doc "JSON generators."}
@@ -99,7 +99,7 @@
_
(macro;fail "Wrong syntax for JSON object.")))
pairs)]
- (wrap (list (` (: JSON (#Object (dict;from-list text;Hash<Text> (list (~@ pairs')))))))))
+ (wrap (list (` (: JSON (#Object (d;from-list text;Hash<Text> (list (~@ pairs')))))))))
_
(wrap (list token))
@@ -124,7 +124,7 @@
(-> (-> JSON Text) (-> Object Text))
(format "{"
(|> object
- dict;entries
+ d;entries
(List/map (function [[key value]] (format (:: text;Codec<Text,Text> encode key) ":" (show-json value))))
(text;join-with ","))
"}"))
@@ -150,20 +150,20 @@
(def: #export (fields json)
{#;doc "Get all the fields in a JSON object."}
- (-> JSON (Result (List String)))
+ (-> JSON (R;Result (List String)))
(case json
(#Object obj)
- (#R;Success (dict;keys obj))
+ (#R;Success (d;keys obj))
_
(#R;Error (format "Cannot get the fields of a non-object."))))
(def: #export (get key json)
{#;doc "A JSON object field getter."}
- (-> String JSON (Result JSON))
+ (-> String JSON (R;Result JSON))
(case json
(#Object obj)
- (case (dict;get key obj)
+ (case (d;get key obj)
(#;Some value)
(#R;Success value)
@@ -175,10 +175,10 @@
(def: #export (set key value json)
{#;doc "A JSON object field setter."}
- (-> String JSON JSON (Result JSON))
+ (-> String JSON JSON (R;Result JSON))
(case json
(#Object obj)
- (#R;Success (#Object (dict;put key value obj)))
+ (#R;Success (#Object (d;put key value obj)))
_
(#R;Error (format "Cannot set field " (show-string key) " of a non-object."))))
@@ -186,7 +186,7 @@
(do-template [<name> <tag> <type> <desc>]
[(def: #export (<name> key json)
{#;doc (#;TextA (format "A JSON object field getter for " <desc> "."))}
- (-> Text JSON (Result <type>))
+ (-> Text JSON (R;Result <type>))
(case (get key json)
(#R;Success (<tag> value))
(#R;Success value)
@@ -228,23 +228,23 @@
## Lexers
(def: space~
(Lexer Text)
- (lexer;some' lexer;space))
+ (l;some' l;space))
(def: data-sep
- (Lexer [Text Char Text])
- ($_ lexer;seq space~ (lexer;char #",") space~))
+ (Lexer [Text Unit Text])
+ ($_ l;seq space~ (l;this ",") space~))
(def: null~
(Lexer Null)
(do Monad<Lexer>
- [_ (lexer;text "null")]
+ [_ (l;this "null")]
(wrap [])))
(do-template [<name> <token> <value>]
[(def: <name>
(Lexer Boolean)
(do Monad<Lexer>
- [_ (lexer;text <token>)]
+ [_ (l;this <token>)]
(wrap <value>)))]
[t~ "true" true]
@@ -253,76 +253,65 @@
(def: boolean~
(Lexer Boolean)
- (lexer;either t~ f~))
+ (l;either t~ f~))
(def: number~
(Lexer Number)
(do Monad<Lexer>
- [?sign (: (Lexer Text)
- (lexer;default ""
- (lexer;text "-")))
+ [signed? (l;this? "-")
digits (: (Lexer Text)
- (lexer;many' lexer;digit))
+ (l;many' l;digit))
decimals (: (Lexer Text)
- (lexer;default "0"
+ (l;default "0"
(do @
- [_ (lexer;text ".")]
- (lexer;many' lexer;digit))))
+ [_ (l;this ".")]
+ (l;many' l;digit))))
exp (: (Lexer Text)
- (lexer;default ""
+ (l;default ""
(do @
- [mark (lexer;either (lexer;text "e") (lexer;text "E"))
- sign (lexer;default "" (lexer;text "-"))
- offset (lexer;many' lexer;digit)]
- (wrap (format mark sign offset)))))]
- (case (: (Result Real)
- (Real/decode (format ?sign digits "." decimals exp)))
+ [mark (l;one-of "eE")
+ signed?' (l;this? "-")
+ offset (l;many' l;digit)]
+ (wrap (format mark (if signed?' "-" "") offset)))))]
+ (case (: (R;Result Real)
+ (Real/decode (format (if signed? "-" "") digits "." decimals exp)))
(#R;Error message)
- (lexer;fail message)
+ (l;fail message)
(#R;Success value)
(wrap value))))
-(def: (un-escape escaped)
- (-> Char Text)
- (case escaped
- #"t" "\t"
- #"b" "\b"
- #"n" "\n"
- #"r" "\r"
- #"f" "\f"
- #"\"" "\""
- #"\\" "\\"
- _ ""))
-
-(def: string-body~
+(def: escaped~
(Lexer Text)
- (loop [_ []]
- (do Monad<Lexer>
- [chars (lexer;some' (lexer;none-of "\\\""))
- stop-char lexer;peek]
- (if (Char/= #"\\" stop-char)
- (do @
- [_ lexer;any
- escaped lexer;any
- next-chars (recur [])]
- (wrap (format chars (un-escape escaped) next-chars)))
- (wrap chars)))))
+ ($_ l;either
+ (l;after (l;this "\\t") (Lexer/wrap "\t"))
+ (l;after (l;this "\\b") (Lexer/wrap "\b"))
+ (l;after (l;this "\\n") (Lexer/wrap "\n"))
+ (l;after (l;this "\\r") (Lexer/wrap "\r"))
+ (l;after (l;this "\\f") (Lexer/wrap "\f"))
+ (l;after (l;this "\\\"") (Lexer/wrap "\""))
+ (l;after (l;this "\\\\") (Lexer/wrap "\\"))))
(def: string~
(Lexer String)
- (do Monad<Lexer>
- [_ (lexer;text "\"")
- string-body string-body~
- _ (lexer;text "\"")]
- (wrap string-body)))
+ (<| (l;enclosed ["\"" "\""])
+ (loop [_ []]
+ (do Monad<Lexer>
+ [chars (l;some' (l;none-of "\\\""))
+ stop l;peek]
+ (if (Text/= "\\" stop)
+ (do @
+ [escaped escaped~
+ next-chars (recur [])]
+ (wrap (format chars escaped next-chars)))
+ (wrap chars))))))
(def: (kv~ json~)
(-> (-> Unit (Lexer JSON)) (Lexer [String JSON]))
(do Monad<Lexer>
[key string~
_ space~
- _ (lexer;char #":")
+ _ (l;this ":")
_ space~
value (json~ [])]
(wrap [key value])))
@@ -331,20 +320,20 @@
[(def: (<name> json~)
(-> (-> Unit (Lexer JSON)) (Lexer <type>))
(do Monad<Lexer>
- [_ (lexer;char <open>)
+ [_ (l;this <open>)
_ space~
- elems (lexer;sep-by data-sep <elem-parser>)
+ elems (l;sep-by data-sep <elem-parser>)
_ space~
- _ (lexer;char <close>)]
+ _ (l;this <close>)]
(wrap (<prep> elems))))]
- [array~ Array #"[" #"]" (json~ []) vector;from-list]
- [object~ Object #"{" #"}" (kv~ json~) (dict;from-list text;Hash<Text>)]
+ [array~ Array "[" "]" (json~ []) vector;from-list]
+ [object~ Object "{" "}" (kv~ json~) (d;from-list text;Hash<Text>)]
)
(def: (json~' _)
(-> Unit (Lexer JSON))
- ($_ lexer;alt null~ boolean~ number~ string~ (array~ json~') (object~ json~')))
+ ($_ l;alt null~ boolean~ number~ string~ (array~ json~') (object~ json~')))
## [Structures]
(struct: #export _ (Functor Parser)
@@ -514,7 +503,7 @@
(function [json]
(case json
(#Array values)
- (do Monad<Result>
+ (do R;Monad<Result>
[elems (mapM @ parser (vector;to-list values))]
(wrap elems))
@@ -523,18 +512,18 @@
(def: #export (object parser)
{#;doc "Parses a JSON object, assuming that every field's value can be parsed the same way."}
- (All [a] (-> (Parser a) (Parser (Dict String a))))
+ (All [a] (-> (Parser a) (Parser (d;Dict String a))))
(function [json]
(case json
(#Object fields)
- (do Monad<Result>
+ (do R;Monad<Result>
[kvs (mapM @
(function [[key val']]
(do @
[val (parser val')]
(wrap [key val])))
- (dict;entries fields))]
- (wrap (dict;from-list text;Hash<Text> kvs)))
+ (d;entries fields))]
+ (wrap (d;from-list text;Hash<Text> kvs)))
_
(#R;Error (format "JSON value is not an object: " (show-json json))))))
@@ -624,7 +613,7 @@
(#R;Success x) (#R;Success (#;Some x))))
(def: #export (run json parser)
- (All [a] (-> JSON (Parser a) (Result a)))
+ (All [a] (-> JSON (Parser a) (R;Result a)))
(parser json))
(def: #export (ensure test parser json)
@@ -654,7 +643,7 @@
(-> (List String) (Parser Unit))
(case json
(#Object kvs)
- (let [actual-fields (dict;keys kvs)]
+ (let [actual-fields (d;keys kvs)]
(if (and (n.= (list;size wanted-fields) (list;size actual-fields))
(list;every? (list;member? text;Eq<Text> wanted-fields)
actual-fields))
@@ -691,21 +680,21 @@
(list;indices (vector;size xs))))
[(#Object xs) (#Object ys)]
- (and (n.= (dict;size xs) (dict;size ys))
+ (and (n.= (d;size xs) (d;size ys))
(fold (function [[xk xv] prev]
(and prev
- (case (dict;get xk ys)
+ (case (d;get xk ys)
#;None false
(#;Some yv) (= xv yv))))
true
- (dict;entries xs)))
+ (d;entries xs)))
_
false)))
(struct: #export _ (Codec Text JSON)
(def: encode show-json)
- (def: decode (function [input] (lexer;run input (json~' [])))))
+ (def: decode (function [input] (l;run input (json~' [])))))
## [Syntax]
(type: Shape
@@ -785,39 +774,22 @@
[Text poly;text ;;gen-string])]
($_ macro;either
<basic>
- (with-gensyms [g!type-fun g!case g!input g!key g!val]
+ (with-gensyms [g!input g!key g!val]
(do @
- [:sub: (poly;apply-1 (ident-for ;List) :x:)
- [g!vars members] (poly;tuple :sub:)
- :val: (case members
- (^ (list :key: :val:))
- (do @ [_ (poly;text :key:)]
- (wrap :val:))
-
- _
- (macro;fail ""))
- #let [new-*env* (poly;extend-env [:x: g!type-fun]
- (list;zip2 (|> g!vars list;size poly;type-var-indices)
- g!vars)
- *env*)]
- .val. (Codec<JSON,?>//encode new-*env* :val:)
- #let [:x:+ (case g!vars
- #;Nil
- (->Codec//encode (type;to-ast :x:))
-
- _
- (` (All (~ g!type-fun) [(~@ g!vars)]
- (-> (~@ (List/map ->Codec//encode g!vars))
- (~ (->Codec//encode (` ((~ (type;to-ast :x:)) (~@ g!vars)))))))))]]
- (wrap (` (: (~ :x:+)
- (function [(~@ g!vars) (~ g!input)]
+ [[:key: :val:] (poly;apply-2 (ident-for d;Dict) :x:)
+ _ (poly;text :key:)
+ .val. (Codec<JSON,?>//encode *env* :val:)]
+ (wrap (` (: (~ (->Codec//encode (type;to-ast :x:)))
+ (function [(~ g!input)]
(|> (~ g!input)
- (_map_ (: (-> [Text (~ (type;to-ast :val:))]
- [Text JSON])
- (function [[(~ g!key) (~ g!val)]]
- [(~ g!key)
- ((~ .val.) (~ g!val))])))
- ;;object))
+ d;entries
+ (;;_map_ (: (-> [Text (~ (type;to-ast :val:))]
+ [Text JSON])
+ (function [[(~ g!key) (~ g!val)]]
+ [(~ g!key)
+ ((~ .val.) (~ g!val))])))
+ (d;from-list text;Hash<Text>)
+ #;;Object))
)))
))
(do @
@@ -829,7 +801,7 @@
[:sub: (poly;apply-1 (ident-for ;List) :x:)
.sub. (Codec<JSON,?>//encode *env* :sub:)]
(wrap (` (: (~ (->Codec//encode (type;to-ast :x:)))
- (|>. (_map_ (~ .sub.)) vector;from-list ;;gen-array)))))
+ (|>. (;;_map_ (~ .sub.)) vector;from-list ;;gen-array)))))
(with-gensyms [g!type-fun g!case g!input]
(do @
[[g!vars members] (poly;variant :x:)
@@ -924,7 +896,7 @@
(poly: #hidden (Codec<JSON,?>//decode *env* :x:)
(let [->Codec//decode (: (-> Code Code)
- (function [.type.] (` (-> JSON (Result (~ .type.))))))]
+ (function [.type.] (` (-> JSON (R;Result (~ .type.))))))]
(with-expansions
[<basic> (do-template [<type> <matcher> <decoder>]
[(do @ [_ (<matcher> :x:)] (wrap (` (: (~ (->Codec//decode (` <type>))) <decoder>))))]
@@ -946,40 +918,23 @@
[List (poly;apply-1 (ident-for ;List)) ;;array])]
($_ macro;either
<basic>
- (with-gensyms [g!type-fun g!case g!input g!key g!val]
+ (with-gensyms [g!input g!output g!key g!val]
(do @
- [:sub: (poly;apply-1 (ident-for ;List) :x:)
- [g!vars members] (poly;tuple :sub:)
- :val: (case members
- (^ (list :key: :val:))
- (do @ [_ (poly;text :key:)]
- (wrap :val:))
-
- _
- (macro;fail ""))
- #let [new-*env* (poly;extend-env [:x: g!type-fun]
- (list;zip2 (|> g!vars list;size poly;type-var-indices) g!vars)
- *env*)]
- .val. (Codec<JSON,?>//decode new-*env* :val:)
- #let [:x:+ (case g!vars
- #;Nil
- (->Codec//decode (type;to-ast :x:))
-
- _
- (` (All (~ g!type-fun) [(~@ g!vars)]
- (-> (~@ (List/map ->Codec//decode g!vars))
- (~ (->Codec//decode (` ((~ (type;to-ast :x:)) (~@ g!vars)))))))))]]
- (wrap (` (: (~ :x:+)
- (function [(~@ g!vars) (~ g!input)]
- (do Monad<Result>
- [(~ g!key) (;;fields (~ g!input))]
- (mapM (~ (' %))
- (function [(~ g!key)]
- (do Monad<Result>
- [(~ g!val) (;;get (~ g!key) (~ g!input))
- (~ g!val) (;;run (~ g!val) (~ .val.))]
- ((~ (' wrap)) [(~ g!key) (~ g!val)])))
- (~ g!key))))
+ [[:key: :val:] (poly;apply-2 (ident-for d;Dict) :x:)
+ _ (poly;text :key:)
+ .val. (Codec<JSON,?>//decode *env* :val:)]
+ (wrap (` (: (~ (->Codec//decode (type;to-ast :x:)))
+ (function [(~ g!input)]
+ (do R;Monad<Result>
+ [(~ g!key) (;;fields (~ g!input))
+ (~ g!output) (mapM R;Monad<Result>
+ (function [(~ g!key)]
+ (do R;Monad<Result>
+ [(~ g!val) (;;get (~ g!key) (~ g!input))
+ (~ g!val) (;;run (~ g!val) (~ .val.))]
+ ((~ (' wrap)) [(~ g!key) (~ g!val)])))
+ (~ g!key))]
+ ((~' wrap) (d;from-list text;Hash<Text> (~ g!output)))))
)))
))
<complex>
@@ -1043,7 +998,7 @@
(~ (->Codec//decode (` ((~ (type;to-ast :x:)) (~@ g!vars)))))))))]]
(wrap (` (: (~ :x:+)
(function [(~@ g!vars) (~ g!input)]
- (do Monad<Result>
+ (do R;Monad<Result>
[(~@ (List/join extraction))]
((~ (' wrap)) (~ (code;record (List/map (function [[name :slot:]]
[(code;tag name) (code;symbol ["" (product;right name)])])
@@ -1108,7 +1063,8 @@
#maybe (Maybe Int)
#list (List Int)
#variant Variant
- #tuple [Int Real Char]})
+ #tuple [Int Real Char]
+ #dict (Dict Text Int)})
(derived: (Codec<JSON,?> Record)))}
(wrap (list (` (: (Codec JSON (~ :x:))
diff --git a/stdlib/source/lux/data/format/xml.lux b/stdlib/source/lux/data/format/xml.lux
index db68fbf29..aaafcd3d0 100644
--- a/stdlib/source/lux/data/format/xml.lux
+++ b/stdlib/source/lux/data/format/xml.lux
@@ -36,52 +36,52 @@
## [Parsing]
(def: xml-standard-escape-char^
- (l;Lexer Char)
+ (l;Lexer Text)
($_ l;either
- (l;after (l;text "&lt;") (lex/wrap #"<"))
- (l;after (l;text "&gt;") (lex/wrap #">"))
- (l;after (l;text "&amp;") (lex/wrap #"&"))
- (l;after (l;text "&apos;") (lex/wrap #"'"))
- (l;after (l;text "&quot;") (lex/wrap #"\""))))
+ (l;after (l;this "&lt;") (lex/wrap "<"))
+ (l;after (l;this "&gt;") (lex/wrap ">"))
+ (l;after (l;this "&amp;") (lex/wrap "&"))
+ (l;after (l;this "&apos;") (lex/wrap "'"))
+ (l;after (l;this "&quot;") (lex/wrap "\""))))
(def: xml-unicode-escape-char^
- (l;Lexer Char)
+ (l;Lexer Text)
(|> (do l;Monad<Lexer>
- [hex? (l;opt (l;text "x"))
+ [hex? (l;opt (l;this "x"))
code (case hex?
#;None
(l;codec number;Codec<Text,Int> (l;many' l;digit))
(#;Some _)
(l;codec number;Hex@Codec<Text,Int> (l;many' l;hex-digit)))]
- (wrap (|> code int-to-nat char;char)))
- (l;before (l;text ";"))
- (l;after (l;text "&#"))))
+ (wrap (|> code int-to-nat char;char char;as-text)))
+ (l;before (l;this ";"))
+ (l;after (l;this "&#"))))
(def: xml-escape-char^
- (l;Lexer Char)
+ (l;Lexer Text)
(l;either xml-standard-escape-char^
xml-unicode-escape-char^))
(def: xml-char^
- (l;Lexer Char)
+ (l;Lexer Text)
(l;either (l;none-of "<>&'\"")
xml-escape-char^))
(def: xml-identifier
(l;Lexer Text)
(do l;Monad<Lexer>
- [head (l;either (l;char #"_")
+ [head (l;either (l;one-of "_")
l;alpha)
tail (l;some' (l;either (l;one-of "_.-")
l;alpha-num))]
- (wrap (format (char;as-text head) tail))))
+ (wrap (format head tail))))
(def: namespaced-symbol^
(l;Lexer Ident)
(do l;Monad<Lexer>
[first-part xml-identifier
- ?second-part (<| l;opt (l;after (l;char #":")) xml-identifier)]
+ ?second-part (<| l;opt (l;after (l;this ":")) xml-identifier)]
(case ?second-part
#;None
(wrap ["" first-part])
@@ -109,7 +109,7 @@
(<| (:: l;Monad<Lexer> map (D;from-list ident;Hash<Ident>))
l;some
(l;seq (spaced^ attr-name^))
- (l;after (l;char #"="))
+ (l;after (l;this "="))
(spaced^ attr-value^)))
(def: (close-tag^ expected)
@@ -117,7 +117,7 @@
(do l;Monad<Lexer>
[actual (|> tag^
spaced^
- (l;after (l;char #"/"))
+ (l;after (l;this "/"))
(l;enclosed ["<" ">"]))]
(l;assert (format "Close tag does not match open tag.\n"
"Expected: " (%ident expected) "\n"
@@ -126,24 +126,24 @@
(def: comment^
(l;Lexer Text)
- (|> (l;some' (l;not (l;text "--")))
- (l;after (l;text "-->"))
- (l;after (l;text "<--"))
+ (|> (l;some' (l;not (l;this "--")))
+ (l;after (l;this "-->"))
+ (l;after (l;this "<--"))
spaced^))
(def: xml-header^
(l;Lexer Attrs)
(|> (spaced^ attrs^)
- (l;before (l;text "?>"))
- (l;after (l;text "<?xml"))
+ (l;before (l;this "?>"))
+ (l;after (l;this "<?xml"))
spaced^))
(def: cdata^
(l;Lexer Text)
- (let [end (l;text "]]>")]
+ (let [end (l;this "]]>")]
(|> (l;some' (l;not end))
(l;after end)
- (l;after (l;text "<![CDATA["))
+ (l;after (l;this "<![CDATA["))
spaced^)))
(def: text^
@@ -159,14 +159,14 @@
(l;either text^
(spaced^
(do l;Monad<Lexer>
- [_ (l;char #"<")
+ [_ (l;this "<")
tag (spaced^ tag^)
attrs (spaced^ attrs^)
#let [no-children^ (do l;Monad<Lexer>
- [_ (l;text "/>")]
+ [_ (l;this "/>")]
(wrap (node tag attrs (list))))
with-children^ (do l;Monad<Lexer>
- [_ (l;char #">")
+ [_ (l;this ">")
children (l;some node^)
_ (close-tag^ tag)]
(wrap (node tag attrs children)))]]
diff --git a/stdlib/source/lux/data/text/lexer.lux b/stdlib/source/lux/data/text/lexer.lux
index 58e636b53..8475d91e2 100644
--- a/stdlib/source/lux/data/text/lexer.lux
+++ b/stdlib/source/lux/data/text/lexer.lux
@@ -7,7 +7,7 @@
(data [text "Text/" Eq<Text> Monoid<Text>]
[number "Int/" Codec<Text,Int>]
[product]
- [char "Char/" Order<Char>]
+ [char "Char/" Order<Char> Codec<Text,Char>]
maybe
["R" result]
(coll [list "" Functor<List>]))))
@@ -79,11 +79,11 @@
(def: #export any
{#;doc "Just returns the next character without applying any logic."}
- (Lexer Char)
+ (Lexer Text)
(function [input]
(case [(text;nth +0 input) (text;split +1 input)]
[(#;Some output) (#;Some [_ input'])]
- (#R;Success [input' output])
+ (#R;Success [input' (char;as-text output)])
_
(#R;Error "Cannot parse character from empty text."))
@@ -126,7 +126,7 @@
(def: #export (not p)
{#;doc "Produce a character if the lexer fails."}
- (All [a] (-> (Lexer a) (Lexer Char)))
+ (All [a] (-> (Lexer a) (Lexer Text)))
(function [input]
(case (p input)
(#R;Error msg)
@@ -232,16 +232,26 @@
(#R;Success [input (#;Some value)])
)))
-(def: #export (text test)
+(def: #export (this reference)
{#;doc "Lex a text if it matches the given sample."}
- (-> Text (Lexer Text))
+ (-> Text (Lexer Unit))
(function [input]
- (if (text;starts-with? test input)
- (case (text;split (text;size test) input)
+ (if (text;starts-with? reference input)
+ (case (text;split (text;size reference) input)
#;None (#R;Error "")
- (#;Some [_ input']) (#R;Success [input' test]))
+ (#;Some [_ input']) (#R;Success [input' []]))
(let [(^open "T/") text;Codec<Text,Text>]
- (#R;Error ($_ Text/append "Invalid match: " (T/encode test) " @ " (T/encode input)))))
+ (#R;Error ($_ Text/append "Invalid match: " (T/encode reference) " @ " (T/encode input)))))))
+
+(def: #export (this? reference)
+ {#;doc "Lex a text if it matches the given sample."}
+ (-> Text (Lexer Bool))
+ (function [input]
+ (if (text;starts-with? reference input)
+ (case (text;split (text;size reference) input)
+ #;None (#R;Success [input false])
+ (#;Some [_ input']) (#R;Success [input' true]))
+ (#R;Success [input false]))
))
(def: #export (sep-by sep lexer)
@@ -270,30 +280,16 @@
(def: #export peek
{#;doc "Lex the next character (without consuming it from the input)."}
- (Lexer Char)
+ (Lexer Text)
(function [input]
(case (text;nth +0 input)
(#;Some output)
- (#R;Success [input output])
+ (#R;Success [input (char;as-text output)])
_
(#R;Error "Cannot peek character from empty text."))
))
-(def: #export (char test)
- {#;doc "Lex a character if it matches the given sample."}
- (-> Char (Lexer Char))
- (function [input]
- (case [(text;nth +0 input) (text;split +1 input)]
- [(#;Some char') (#;Some [_ input'])]
- (if (Char/= test char')
- (#R;Success [input' test])
- (#R;Error ($_ Text/append "Expected " (:: char;Codec<Text,Char> encode test) " @ " (:: text;Codec<Text,Text> encode input))))
-
- _
- (#R;Error "Cannot parse character from empty text."))
- ))
-
(def: #export get-input
{#;doc "Get all of the remaining input (without consuming it)."}
(Lexer Text)
@@ -302,19 +298,20 @@
(def: #export (char-range bottom top)
{#;doc "Only lex characters within a range."}
- (-> Char Char (Lexer Char))
+ (-> Char Char (Lexer Text))
(do Monad<Lexer>
[input get-input
char any
- _ (assert ($_ Text/append "Character is not within range: " (:: char;Codec<Text,Char> encode bottom) "-" (:: char;Codec<Text,Char> encode top) " @ " (:: text;Codec<Text,Text> encode input))
- (and (Char/>= bottom char)
- (Char/<= top char)))]
+ #let [char' (|> char (text;nth +0) assume)]
+ _ (assert ($_ Text/append "Character is not within range: " (Char/encode bottom) "-" (Char/encode top) " @ " (:: text;Codec<Text,Text> encode input))
+ (and (Char/>= bottom char')
+ (Char/<= top char')))]
(wrap char)))
(do-template [<name> <bottom> <top> <desc>]
[(def: #export <name>
{#;doc (#;TextA ($_ Text/append "Only lex " <desc> " characters."))}
- (Lexer Char)
+ (Lexer Text)
(char-range <bottom> <top>))]
[upper #"A" #"Z" "uppercase"]
@@ -325,17 +322,17 @@
(def: #export alpha
{#;doc "Only lex alphabetic characters."}
- (Lexer Char)
+ (Lexer Text)
(either lower upper))
(def: #export alpha-num
{#;doc "Only lex alphanumeric characters."}
- (Lexer Char)
+ (Lexer Text)
(either alpha digit))
(def: #export hex-digit
{#;doc "Only lex hexadecimal digits."}
- (Lexer Char)
+ (Lexer Text)
($_ either
digit
(char-range #"a" #"f")
@@ -343,14 +340,14 @@
(def: #export (one-of options)
{#;doc "Only lex characters that are part of a piece of text."}
- (-> Text (Lexer Char))
+ (-> Text (Lexer Text))
(function [input]
(case (text;split +1 input)
(#;Some [init input'])
(if (text;contains? init options)
(case (text;nth +0 init)
(#;Some output)
- (#R;Success [input' output])
+ (#R;Success [input' (char;as-text output)])
_
(#R;Error ""))
@@ -361,14 +358,14 @@
(def: #export (none-of options)
{#;doc "Only lex characters that are not part of a piece of text."}
- (-> Text (Lexer Char))
+ (-> Text (Lexer Text))
(function [input]
(case (text;split +1 input)
(#;Some [init input'])
(if (;not (text;contains? init options))
(case (text;nth +0 init)
(#;Some output)
- (#R;Success [input' output])
+ (#R;Success [input' (char;as-text output)])
_
(#R;Error ""))
@@ -379,7 +376,7 @@
(def: #export (satisfies p)
{#;doc "Only lex characters that satisfy a predicate."}
- (-> (-> Char Bool) (Lexer Char))
+ (-> (-> Char Bool) (Lexer Text))
(function [input]
(case (: (Maybe [Text Char])
(do Monad<Maybe>
@@ -388,7 +385,7 @@
(wrap [input' output])))
(#;Some [input' output])
(if (p output)
- (#R;Success [input' output])
+ (#R;Success [input' (char;as-text output)])
(#R;Error ($_ Text/append "Character does not satisfy predicate: " (:: text;Codec<Text,Text> encode input))))
_
@@ -396,7 +393,7 @@
(def: #export space
{#;doc "Only lex white-space."}
- (Lexer Char)
+ (Lexer Text)
(satisfies char;space?))
(def: #export (constrain test lexer)
@@ -412,10 +409,10 @@
(do-template [<name> <base> <doc>]
[(def: #export (<name> p)
{#;doc <doc>}
- (-> (Lexer Char) (Lexer Text))
+ (-> (Lexer Text) (Lexer Text))
(do Monad<Lexer>
- [cs (<base> p)]
- (wrap (text;concat (map char;as-text cs)))))]
+ []
+ (|> p <base> (:: @ map text;concat))))]
[some' some "Lex some characters as a single continuous text."]
[many' many "Lex many characters as a single continuous text."]
@@ -424,10 +421,10 @@
(do-template [<name> <base> <doc>]
[(def: #export (<name> n p)
{#;doc <doc>}
- (-> Nat (Lexer Char) (Lexer Text))
+ (-> Nat (Lexer Text) (Lexer Text))
(do Monad<Lexer>
- [cs (<base> n p)]
- (wrap (text;concat (map char;as-text cs)))))]
+ []
+ (|> p (<base> n) (:: @ map text;concat))))]
[exactly' exactly "Lex exactly N characters."]
[at-most' at-most "Lex at most N characters."]
@@ -436,10 +433,10 @@
(def: #export (between' from to p)
{#;doc "Lex between N and M characters."}
- (-> Nat Nat (Lexer Char) (Lexer Text))
+ (-> Nat Nat (Lexer Text) (Lexer Text))
(do Monad<Lexer>
- [cs (between from to p)]
- (wrap (text;concat (map char;as-text cs)))))
+ []
+ (|> p (between from to) (:: @ map text;concat))))
(def: #export end?
{#;doc "Ask if the lexer's input is empty."}
@@ -463,13 +460,8 @@
(def: #export (default value lexer)
{#;doc "If the given lexer fails, this lexer will succeed with the provided value."}
(All [a] (-> a (Lexer a) (Lexer a)))
- (function [input]
- (case (lexer input)
- (#R;Error error)
- (#R;Success [input value])
-
- (#R;Success input'+value)
- (#R;Success input'+value))))
+ (|> (opt lexer)
+ (:: Monad<Lexer> map (|>. (;default value)))))
(def: #export (codec codec lexer)
{#;doc "Lex a token by means of a codec."}
@@ -490,8 +482,8 @@
(def: #export (enclosed [start end] lexer)
(All [a] (-> [Text Text] (Lexer a) (Lexer a)))
(|> lexer
- (before (text end))
- (after (text start))))
+ (before (this end))
+ (after (this start))))
(def: #export (rec lexer)
(All [a] (-> (-> (Lexer a) (Lexer a))
diff --git a/stdlib/source/lux/data/text/regex.lux b/stdlib/source/lux/data/text/regex.lux
index 95a932905..405eca618 100644
--- a/stdlib/source/lux/data/text/regex.lux
+++ b/stdlib/source/lux/data/text/regex.lux
@@ -3,7 +3,7 @@
(lux (control monad)
(data [char]
[text]
- ["&" text/lexer #+ Lexer Monad<Lexer>]
+ ["&" text/lexer #+ Lexer Monad<Lexer> "&/" Monad<Lexer>]
text/format
[number "Int/" Codec<Text,Int>]
[product]
@@ -13,24 +13,17 @@
["s" syntax #+ syntax:])))
## [Utils]
-(def: #hidden (->Text lexer^)
- (-> (Lexer Char) (Lexer Text))
- (do Monad<Lexer>
- [output lexer^]
- (wrap (char;as-text output))))
-
(def: regex-char^
- (Lexer Char)
+ (Lexer Text)
(&;none-of "\\.|&()[]{}"))
(def: escaped-char^
- (Lexer Char)
+ (Lexer Text)
(do Monad<Lexer>
- [? (&;opt (&;char #"\\"))
- char (case ?
- (#;Some _) &;any
- #;None regex-char^)]
- (wrap char)))
+ [? (&;this? "\\")]
+ (if ?
+ &;any
+ regex-char^)))
(def: (local^ state lexer)
(All [a] (-> Text (Lexer a) (Lexer a)))
@@ -50,9 +43,13 @@
(wrap output)))
(def: #hidden word^
- (Lexer Char)
+ (Lexer Text)
(&;either &;alpha-num
- (&;char #"_")))
+ (&;one-of "_")))
+
+(def: #hidden (copy reference)
+ (-> Text (Lexer Text))
+ (&;after (&;this reference) (&/wrap reference)))
(def: #hidden (join-text^ part^)
(-> (Lexer (List Text)) (Lexer Text))
@@ -61,14 +58,14 @@
(wrap (text;join-with "" parts))))
(def: identifier-char^
- (Lexer Char)
+ (Lexer Text)
(&;none-of "[]{}()s\"#;<>"))
(def: identifier-part^
(Lexer Text)
(do Monad<Lexer>
[head (refine^ (&;not &;digit)
- (->Text identifier-char^))
+ identifier-char^)
tail (&;some' identifier-char^)]
(wrap (format head tail))))
@@ -77,9 +74,9 @@
(do Monad<Lexer>
[]
($_ &;either
- (&;seq (wrap current-module) (&;after (&;text ";;") identifier-part^))
- (&;seq identifier-part^ (&;after (&;text ";") identifier-part^))
- (&;seq (wrap "lux") (&;after (&;text ";") identifier-part^))
+ (&;seq (wrap current-module) (&;after (&;this ";;") identifier-part^))
+ (&;seq identifier-part^ (&;after (&;this ";") identifier-part^))
+ (&;seq (wrap "lux") (&;after (&;this ";") identifier-part^))
(&;seq (wrap "") identifier-part^))))
(def: (re-var^ current-module)
@@ -91,22 +88,16 @@
(def: re-char-range^
(Lexer Code)
(do Monad<Lexer>
- [from regex-char^
- _ (&;char #"-")
- to regex-char^]
+ [from (|> regex-char^ (:: @ map (|>. (text;nth +0) assume)))
+ _ (&;this "-")
+ to (|> regex-char^ (:: @ map (|>. (text;nth +0) assume)))]
(wrap (` (&;char-range (~ (code;char from)) (~ (code;char to)))))))
(def: re-char^
(Lexer Code)
(do Monad<Lexer>
[char escaped-char^]
- (wrap (` (&;char (~ (code;char char)))))))
-
-(def: re-char+^
- (Lexer Code)
- (do Monad<Lexer>
- [base re-char^]
- (wrap (` (->Text (~ base))))))
+ (wrap (` (;;copy (~ (code;text char)))))))
(def: re-char-options^
(Lexer Code)
@@ -117,78 +108,78 @@
(def: re-user-class^'
(Lexer Code)
(do Monad<Lexer>
- [negate? (&;opt (&;char #"^"))
+ [negate? (&;opt (&;this "^"))
parts (&;many ($_ &;either
re-char-range^
re-char-options^))]
(wrap (case negate?
- (#;Some _) (` (->Text (&;not ($_ &;either (~@ parts)))))
- #;None (` (->Text ($_ &;either (~@ parts))))))))
+ (#;Some _) (` (&;not ($_ &;either (~@ parts))))
+ #;None (` ($_ &;either (~@ parts)))))))
(def: re-user-class^
(Lexer Code)
(do Monad<Lexer>
[_ (wrap [])
init re-user-class^'
- rest (&;some (&;after (&;text "&&") (&;enclosed ["[" "]"] re-user-class^')))]
+ rest (&;some (&;after (&;this "&&") (&;enclosed ["[" "]"] re-user-class^')))]
(wrap (fold (function [refinement base]
(` (refine^ (~ refinement) (~ base))))
init
rest))))
(def: #hidden blank^
- (Lexer Char)
+ (Lexer Text)
(&;one-of " \t"))
(def: #hidden ascii^
- (Lexer Char)
+ (Lexer Text)
(&;char-range #"\u0000" #"\u007F"))
(def: #hidden control^
- (Lexer Char)
+ (Lexer Text)
(&;either (&;char-range #"\u0000" #"\u001F")
- (&;char #"\u007F")))
+ (&;one-of "\u007F")))
(def: #hidden punct^
- (Lexer Char)
+ (Lexer Text)
(&;one-of "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"))
(def: #hidden graph^
- (Lexer Char)
+ (Lexer Text)
(&;either punct^ &;alpha-num))
(def: #hidden print^
- (Lexer Char)
+ (Lexer Text)
(&;either graph^
- (&;char #"\u0020")))
+ (&;one-of "\u0020")))
(def: re-system-class^
(Lexer Code)
(do Monad<Lexer>
[]
($_ &;either
- (&;after (&;char #".") (wrap (` (->Text &;any))))
- (&;after (&;text "\\d") (wrap (` (->Text &;digit))))
- (&;after (&;text "\\D") (wrap (` (->Text (&;not &;digit)))))
- (&;after (&;text "\\s") (wrap (` (->Text &;space))))
- (&;after (&;text "\\S") (wrap (` (->Text (&;not &;space)))))
- (&;after (&;text "\\w") (wrap (` (->Text word^))))
- (&;after (&;text "\\W") (wrap (` (->Text (&;not word^)))))
-
- (&;after (&;text "\\p{Lower}") (wrap (` (->Text &;lower))))
- (&;after (&;text "\\p{Upper}") (wrap (` (->Text &;upper))))
- (&;after (&;text "\\p{Alpha}") (wrap (` (->Text &;alpha))))
- (&;after (&;text "\\p{Digit}") (wrap (` (->Text &;digit))))
- (&;after (&;text "\\p{Alnum}") (wrap (` (->Text &;alpha-num))))
- (&;after (&;text "\\p{Space}") (wrap (` (->Text &;space))))
- (&;after (&;text "\\p{HexDigit}") (wrap (` (->Text &;hex-digit))))
- (&;after (&;text "\\p{OctDigit}") (wrap (` (->Text &;oct-digit))))
- (&;after (&;text "\\p{Blank}") (wrap (` (->Text blank^))))
- (&;after (&;text "\\p{ASCII}") (wrap (` (->Text ascii^))))
- (&;after (&;text "\\p{Contrl}") (wrap (` (->Text control^))))
- (&;after (&;text "\\p{Punct}") (wrap (` (->Text punct^))))
- (&;after (&;text "\\p{Graph}") (wrap (` (->Text graph^))))
- (&;after (&;text "\\p{Print}") (wrap (` (->Text print^))))
+ (&;after (&;this ".") (wrap (` &;any)))
+ (&;after (&;this "\\d") (wrap (` &;digit)))
+ (&;after (&;this "\\D") (wrap (` (&;not &;digit))))
+ (&;after (&;this "\\s") (wrap (` &;space)))
+ (&;after (&;this "\\S") (wrap (` (&;not &;space))))
+ (&;after (&;this "\\w") (wrap (` word^)))
+ (&;after (&;this "\\W") (wrap (` (&;not word^))))
+
+ (&;after (&;this "\\p{Lower}") (wrap (` &;lower)))
+ (&;after (&;this "\\p{Upper}") (wrap (` &;upper)))
+ (&;after (&;this "\\p{Alpha}") (wrap (` &;alpha)))
+ (&;after (&;this "\\p{Digit}") (wrap (` &;digit)))
+ (&;after (&;this "\\p{Alnum}") (wrap (` &;alpha-num)))
+ (&;after (&;this "\\p{Space}") (wrap (` &;space)))
+ (&;after (&;this "\\p{HexDigit}") (wrap (` &;hex-digit)))
+ (&;after (&;this "\\p{OctDigit}") (wrap (` &;oct-digit)))
+ (&;after (&;this "\\p{Blank}") (wrap (` blank^)))
+ (&;after (&;this "\\p{ASCII}") (wrap (` ascii^)))
+ (&;after (&;this "\\p{Contrl}") (wrap (` control^)))
+ (&;after (&;this "\\p{Punct}") (wrap (` punct^)))
+ (&;after (&;this "\\p{Graph}") (wrap (` graph^)))
+ (&;after (&;this "\\p{Print}") (wrap (` print^)))
)))
(def: re-class^
@@ -196,21 +187,23 @@
(&;either re-system-class^
(&;enclosed ["[" "]"] re-user-class^)))
-(def: int^
- (Lexer Int)
- (&;codec number;Codec<Text,Int> (&;many' &;digit)))
+(def: number^
+ (Lexer Nat)
+ (|> (&;many' &;digit)
+ (&;codec number;Codec<Text,Int>)
+ (&/map int-to-nat)))
(def: re-back-reference^
(Lexer Code)
(&;either (do Monad<Lexer>
- [_ (&;char #"\\")
- id int^]
- (wrap (` (&;text (~ (code;symbol ["" (Int/encode id)]))))))
+ [_ (&;this "\\")
+ id number^]
+ (wrap (` (;;copy (~ (code;symbol ["" (Int/encode (nat-to-int id))]))))))
(do Monad<Lexer>
- [_ (&;text "\\k<")
+ [_ (&;this "\\k<")
captured-name identifier-part^
- _ (&;text ">")]
- (wrap (` (&;text (~ (code;symbol ["" captured-name]))))))))
+ _ (&;this ">")]
+ (wrap (` (;;copy (~ (code;symbol ["" captured-name]))))))))
(def: (re-simple^ current-module)
(-> Text (Lexer Code))
@@ -218,7 +211,7 @@
re-class^
(re-var^ current-module)
re-back-reference^
- re-char+^
+ re-char^
))
(def: (re-simple-quantified^ current-module)
@@ -227,12 +220,13 @@
[base (re-simple^ current-module)
quantifier (&;one-of "?*+")]
(case quantifier
- #"?"
+ "?"
(wrap (` (&;default "" (~ base))))
- #"*"
+ "*"
(wrap (` (join-text^ (&;some (~ base)))))
+ ## "+"
_
(wrap (` (join-text^ (&;many (~ base)))))
)))
@@ -244,19 +238,19 @@
(&;enclosed ["{" "}"]
($_ &;either
(do @
- [[from to] (&;seq int^ (&;after (&;char #",") int^))]
- (wrap (` (join-text^ (&;between (~ (code;nat (int-to-nat from)))
- (~ (code;nat (int-to-nat to)))
+ [[from to] (&;seq number^ (&;after (&;this ",") number^))]
+ (wrap (` (join-text^ (&;between (~ (code;nat from))
+ (~ (code;nat to))
(~ base))))))
(do @
- [limit (&;after (&;char #",") int^)]
- (wrap (` (join-text^ (&;at-most (~ (code;nat (int-to-nat limit))) (~ base))))))
+ [limit (&;after (&;this ",") number^)]
+ (wrap (` (join-text^ (&;at-most (~ (code;nat limit)) (~ base))))))
(do @
- [limit (&;before (&;char #",") int^)]
- (wrap (` (join-text^ (&;at-least (~ (code;nat (int-to-nat limit))) (~ base))))))
+ [limit (&;before (&;this ",") number^)]
+ (wrap (` (join-text^ (&;at-least (~ (code;nat limit)) (~ base))))))
(do @
- [limit int^]
- (wrap (` (join-text^ (&;exactly (~ (code;nat (int-to-nat limit))) (~ base))))))))))
+ [limit number^]
+ (wrap (` (join-text^ (&;exactly (~ (code;nat limit)) (~ base))))))))))
(def: (re-quantified^ current-module)
(-> Text (Lexer Code))
@@ -376,7 +370,7 @@
(do Monad<Lexer>
[#let [sub^ (re-sequential^ capturing? re-scoped^ current-module)]
head sub^
- tail (&;some (&;after (&;char #"|") sub^))
+ tail (&;some (&;after (&;this "|") sub^))
#let [g!op (if capturing?
(` |||^)
(` |||_^))]]
@@ -389,24 +383,24 @@
(-> Text (Lexer [Re-Group Code]))
($_ &;either
(do Monad<Lexer>
- [_ (&;text "(?:")
+ [_ (&;this "(?:")
[_ scoped] (re-alternative^ false re-scoped^ current-module)
- _ (&;char #")")]
+ _ (&;this ")")]
(wrap [#Non-Capturing scoped]))
(do Monad<Lexer>
[complex (re-complex^ current-module)]
(wrap [#Non-Capturing complex]))
(do Monad<Lexer>
- [_ (&;text "(?<")
+ [_ (&;this "(?<")
captured-name identifier-part^
- _ (&;text ">")
+ _ (&;this ">")
[num-captures pattern] (re-alternative^ true re-scoped^ current-module)
- _ (&;char #")")]
+ _ (&;this ")")]
(wrap [(#Capturing [(#;Some captured-name) num-captures]) pattern]))
(do Monad<Lexer>
- [_ (&;char #"(")
+ [_ (&;this "(")
[num-captures pattern] (re-alternative^ true re-scoped^ current-module)
- _ (&;char #")")]
+ _ (&;this ")")]
(wrap [(#Capturing [#;None num-captures]) pattern]))))
(def: (regex^ current-module)
@@ -479,7 +473,8 @@
(&;before &;end)
(&;run pattern))
(#;Left error)
- (macro;fail error)
+ (macro;fail (format "Error while parsing regular-expression:\n"
+ error))
(#;Right regex)
(wrap (list regex))