aboutsummaryrefslogtreecommitdiff
path: root/stdlib
diff options
context:
space:
mode:
authorEduardo Julian2017-09-19 19:16:11 -0400
committerEduardo Julian2017-09-19 19:16:11 -0400
commit4474a2a3ba6b345cb5d369ad5654bf11481d0393 (patch)
tree09e401874e4b57df4228aaea96b1c3f99cbd8277 /stdlib
parent6da0a54773e44ad0696437efacefa6f870c9868f (diff)
- XML parsing.
Diffstat (limited to 'stdlib')
-rw-r--r--stdlib/source/lux/data/format/xml.lux140
-rw-r--r--stdlib/test/test/lux/data/format/xml.lux49
2 files changed, 163 insertions, 26 deletions
diff --git a/stdlib/source/lux/data/format/xml.lux b/stdlib/source/lux/data/format/xml.lux
index 4ff38380f..dc6074ef5 100644
--- a/stdlib/source/lux/data/format/xml.lux
+++ b/stdlib/source/lux/data/format/xml.lux
@@ -3,37 +3,28 @@
(lux (control monad
[eq #+ Eq]
codec
- ["p" parser "p/" Monad<Parser>])
+ ["p" parser "p/" Monad<Parser>]
+ ["ex" exception #+ exception:])
(data [text "text/" Eq<Text> Monoid<Text>]
(text ["l" lexer])
[number]
["R" result]
[product]
[maybe "m/" Monad<Maybe>]
- [ident "Ident/" Eq<Ident> Codec<Text,Ident>]
+ [ident "ident/" Eq<Ident> Codec<Text,Ident>]
(coll [list "L/" Monad<List>]
- ["d" dict]
- (tree ["T" rose]
- ["Z" zipper])))
+ ["d" dict]))
))
-## [Types]
(type: #export Tag Ident)
(type: #export Attrs (d;Dict Ident Text))
+(def: #export attrs Attrs (d;new ident;Hash<Ident>))
+
(type: #export #rec XML
(#Text Text)
(#Node Tag Attrs (List XML)))
-(def: #export (text value)
- (-> Text XML)
- (#Text value))
-
-(def: #export (node tag attrs children)
- (-> Tag Attrs (List XML) XML)
- (#Node tag attrs children))
-
-## [Parsing]
(def: xml-standard-escape-char^
(l;Lexer Text)
($_ p;either
@@ -119,9 +110,9 @@
(p;after (l;this "/"))
(l;enclosed ["<" ">"]))]
(p;assert ($_ text/append "Close tag does not match open tag.\n"
- "Expected: " (Ident/encode expected) "\n"
- " Actual: " (Ident/encode actual) "\n")
- (Ident/= expected actual))))
+ "Expected: " (ident/encode expected) "\n"
+ " Actual: " (ident/encode actual) "\n")
+ (ident/= expected actual))))
(def: comment^
(l;Lexer Text)
@@ -163,12 +154,12 @@
attrs (spaced^ attrs^)
#let [no-children^ (do p;Monad<Parser>
[_ (l;this "/>")]
- (wrap (node tag attrs (list))))
+ (wrap (#Node tag attrs (list))))
with-children^ (do p;Monad<Parser>
[_ (l;this ">")
children (p;some node^)
_ (close-tag^ tag)]
- (wrap (node tag attrs children)))]]
+ (wrap (#Node tag attrs children)))]]
(p;either no-children^
with-children^))))))
## This is put outside of the call to "rec" because comments
@@ -182,7 +173,6 @@
(-> Text (R;Result XML))
(l;run input xml^))
-## [Generation]
(def: (sanitize-value input)
(-> Text Text)
(|> input
@@ -231,7 +221,6 @@
(text;join-with ""))
"</" tag ">")))))))
-## [Structs]
(struct: #export _ (Codec Text XML)
(def: encode write)
(def: decode read))
@@ -244,7 +233,7 @@
[(#Node reference/tag reference/attrs reference/children)
(#Node sample/tag sample/attrs sample/children)]
- (and (Ident/= reference/tag sample/tag)
+ (and (ident/= reference/tag sample/tag)
(:: (d;Eq<Dict> text;Eq<Text>) = reference/attrs sample/attrs)
(n.= (list;size reference/children)
(list;size sample/children))
@@ -253,3 +242,108 @@
_
false)))
+
+(type: #export (Reader a)
+ (p;Parser (List XML) a))
+
+(exception: #export Empty-Input)
+(exception: #export Unexpected-Input)
+(exception: #export Unknown-Attribute)
+(exception: #export Wrong-Tag)
+(exception: #export Unconsumed-Inputs)
+
+(def: #export text
+ (Reader Text)
+ (function [docs]
+ (case docs
+ #;Nil
+ (ex;throw Empty-Input "")
+
+ (#;Cons head tail)
+ (case head
+ (#Text value)
+ (#R;Success [tail value])
+
+ (#Node _)
+ (ex;throw Unexpected-Input "")))))
+
+(def: #export (attr name)
+ (-> Ident (Reader Text))
+ (function [docs]
+ (case docs
+ #;Nil
+ (ex;throw Empty-Input "")
+
+ (#;Cons head _)
+ (case head
+ (#Text _)
+ (ex;throw Unexpected-Input "")
+
+ (#Node tag attrs children)
+ (case (d;get name attrs)
+ #;None
+ (ex;throw Unknown-Attribute "")
+
+ (#;Some value)
+ (#R;Success [docs value]))))))
+
+(def: (run' docs reader)
+ (All [a] (-> (List XML) (Reader a) (R;Result a)))
+ (case (p;run docs reader)
+ (#R;Success [remaining output])
+ (if (list;empty? remaining)
+ (#R;Success output)
+ (ex;throw Unconsumed-Inputs (|> remaining
+ (L/map (:: Codec<Text,XML> encode))
+ (text;join-with "\n\n"))))
+
+ (#R;Error error)
+ (#R;Error error)))
+
+(def: #export (node tag)
+ (-> Ident (Reader Unit))
+ (function [docs]
+ (case docs
+ #;Nil
+ (ex;throw Empty-Input "")
+
+ (#;Cons head _)
+ (case head
+ (#Text _)
+ (ex;throw Unexpected-Input "")
+
+ (#Node _tag _attrs _children)
+ (if (ident/= tag _tag)
+ (#R;Success [docs []])
+ (ex;throw Wrong-Tag (ident/encode tag)))))))
+
+(def: #export (children reader)
+ (All [a] (-> (Reader a) (Reader a)))
+ (function [docs]
+ (case docs
+ #;Nil
+ (ex;throw Empty-Input "")
+
+ (#;Cons head tail)
+ (case head
+ (#Text _)
+ (ex;throw Unexpected-Input "")
+
+ (#Node _tag _attrs _children)
+ (do R;Monad<Result>
+ [output (run' _children reader)]
+ (wrap [tail output]))))))
+
+(def: #export ignore
+ (Reader Unit)
+ (function [docs]
+ (case docs
+ #;Nil
+ (ex;throw Empty-Input "")
+
+ (#;Cons head tail)
+ (#R;Success [tail []]))))
+
+(def: #export (run document reader)
+ (All [a] (-> XML (Reader a) (R;Result a)))
+ (run' (list document) reader))
diff --git a/stdlib/test/test/lux/data/format/xml.lux b/stdlib/test/test/lux/data/format/xml.lux
index 1910caf3e..382659ab0 100644
--- a/stdlib/test/test/lux/data/format/xml.lux
+++ b/stdlib/test/test/lux/data/format/xml.lux
@@ -2,13 +2,15 @@
lux
(lux [io]
(control [monad #+ do Monad]
+ ["p" parser]
pipe)
- (data [text "Text/" Monoid<Text>]
+ (data [text "text/" Eq<Text>]
text/format
[ident]
+ ["R" result]
(format ["&" xml])
(coll [dict]
- [list]))
+ [list "L/" Functor<List>]))
["r" math/random "r/" Monad<Random>]
test)
)
@@ -52,7 +54,7 @@
(r;dict ident;Hash<Ident> size xml-identifier^ (xml-text^ +0 +10))
(r;list size gen-xml)))))))
-(context: "XML"
+(context: "XML."
[sample gen-xml
#let [(^open "&/") &;Eq<XML>
(^open "&/") &;Codec<Text,XML>]]
@@ -68,3 +70,44 @@
(#;Left error)
false)))
))
+
+(context: "Parsing."
+ [text (xml-text^ +1 +10)
+ num-children (|> r;nat (:: @ map (n.% +5)))
+ children (r;list num-children (xml-text^ +1 +10))
+ tag xml-identifier^
+ attr xml-identifier^
+ value (xml-text^ +1 +10)
+ #let [node (#&;Node tag
+ (dict;put attr value &;attrs)
+ (L/map (|>. #&;Text) children))]]
+ ($_ seq
+ (test "Can parse text."
+ (R;default false
+ (do R;Monad<Result>
+ [output (&;run (#&;Text text)
+ &;text)]
+ (wrap (text/= text output)))))
+ (test "Can parse attributes."
+ (R;default false
+ (do R;Monad<Result>
+ [output (|> (&;attr attr)
+ (p;before &;ignore)
+ (&;run node))]
+ (wrap (text/= value output)))))
+ (test "Can parse nodes."
+ (R;default false
+ (do R;Monad<Result>
+ [_ (|> (&;node tag)
+ (p;before &;ignore)
+ (&;run node))]
+ (wrap true))))
+ (test "Can parse children."
+ (R;default false
+ (do R;Monad<Result>
+ [outputs (|> (&;children (p;some &;text))
+ (&;run node))]
+ (wrap (:: (list;Eq<List> text;Eq<Text>) =
+ children
+ outputs)))))
+ ))