From 4474a2a3ba6b345cb5d369ad5654bf11481d0393 Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Tue, 19 Sep 2017 19:16:11 -0400 Subject: - XML parsing. --- stdlib/source/lux/data/format/xml.lux | 140 ++++++++++++++++++++++++++----- stdlib/test/test/lux/data/format/xml.lux | 49 ++++++++++- 2 files changed, 163 insertions(+), 26 deletions(-) (limited to 'stdlib') diff --git a/stdlib/source/lux/data/format/xml.lux b/stdlib/source/lux/data/format/xml.lux index 4ff38380f..dc6074ef5 100644 --- a/stdlib/source/lux/data/format/xml.lux +++ b/stdlib/source/lux/data/format/xml.lux @@ -3,37 +3,28 @@ (lux (control monad [eq #+ Eq] codec - ["p" parser "p/" Monad]) + ["p" parser "p/" Monad] + ["ex" exception #+ exception:]) (data [text "text/" Eq Monoid] (text ["l" lexer]) [number] ["R" result] [product] [maybe "m/" Monad] - [ident "Ident/" Eq Codec] + [ident "ident/" Eq Codec] (coll [list "L/" Monad] - ["d" dict] - (tree ["T" rose] - ["Z" zipper]))) + ["d" dict])) )) -## [Types] (type: #export Tag Ident) (type: #export Attrs (d;Dict Ident Text)) +(def: #export attrs Attrs (d;new ident;Hash)) + (type: #export #rec XML (#Text Text) (#Node Tag Attrs (List XML))) -(def: #export (text value) - (-> Text XML) - (#Text value)) - -(def: #export (node tag attrs children) - (-> Tag Attrs (List XML) XML) - (#Node tag attrs children)) - -## [Parsing] (def: xml-standard-escape-char^ (l;Lexer Text) ($_ p;either @@ -119,9 +110,9 @@ (p;after (l;this "/")) (l;enclosed ["<" ">"]))] (p;assert ($_ text/append "Close tag does not match open tag.\n" - "Expected: " (Ident/encode expected) "\n" - " Actual: " (Ident/encode actual) "\n") - (Ident/= expected actual)))) + "Expected: " (ident/encode expected) "\n" + " Actual: " (ident/encode actual) "\n") + (ident/= expected actual)))) (def: comment^ (l;Lexer Text) @@ -163,12 +154,12 @@ attrs (spaced^ attrs^) #let [no-children^ (do p;Monad [_ (l;this "/>")] - (wrap (node tag attrs (list)))) + (wrap (#Node tag attrs (list)))) with-children^ (do p;Monad [_ (l;this ">") children (p;some node^) _ (close-tag^ tag)] - (wrap (node tag attrs children)))]] + (wrap (#Node tag attrs children)))]] (p;either no-children^ with-children^)))))) ## This is put outside of the call to "rec" because comments @@ -182,7 +173,6 @@ (-> Text (R;Result XML)) (l;run input xml^)) -## [Generation] (def: (sanitize-value input) (-> Text Text) (|> input @@ -231,7 +221,6 @@ (text;join-with "")) ""))))))) -## [Structs] (struct: #export _ (Codec Text XML) (def: encode write) (def: decode read)) @@ -244,7 +233,7 @@ [(#Node reference/tag reference/attrs reference/children) (#Node sample/tag sample/attrs sample/children)] - (and (Ident/= reference/tag sample/tag) + (and (ident/= reference/tag sample/tag) (:: (d;Eq text;Eq) = reference/attrs sample/attrs) (n.= (list;size reference/children) (list;size sample/children)) @@ -253,3 +242,108 @@ _ false))) + +(type: #export (Reader a) + (p;Parser (List XML) a)) + +(exception: #export Empty-Input) +(exception: #export Unexpected-Input) +(exception: #export Unknown-Attribute) +(exception: #export Wrong-Tag) +(exception: #export Unconsumed-Inputs) + +(def: #export text + (Reader Text) + (function [docs] + (case docs + #;Nil + (ex;throw Empty-Input "") + + (#;Cons head tail) + (case head + (#Text value) + (#R;Success [tail value]) + + (#Node _) + (ex;throw Unexpected-Input ""))))) + +(def: #export (attr name) + (-> Ident (Reader Text)) + (function [docs] + (case docs + #;Nil + (ex;throw Empty-Input "") + + (#;Cons head _) + (case head + (#Text _) + (ex;throw Unexpected-Input "") + + (#Node tag attrs children) + (case (d;get name attrs) + #;None + (ex;throw Unknown-Attribute "") + + (#;Some value) + (#R;Success [docs value])))))) + +(def: (run' docs reader) + (All [a] (-> (List XML) (Reader a) (R;Result a))) + (case (p;run docs reader) + (#R;Success [remaining output]) + (if (list;empty? remaining) + (#R;Success output) + (ex;throw Unconsumed-Inputs (|> remaining + (L/map (:: Codec encode)) + (text;join-with "\n\n")))) + + (#R;Error error) + (#R;Error error))) + +(def: #export (node tag) + (-> Ident (Reader Unit)) + (function [docs] + (case docs + #;Nil + (ex;throw Empty-Input "") + + (#;Cons head _) + (case head + (#Text _) + (ex;throw Unexpected-Input "") + + (#Node _tag _attrs _children) + (if (ident/= tag _tag) + (#R;Success [docs []]) + (ex;throw Wrong-Tag (ident/encode tag))))))) + +(def: #export (children reader) + (All [a] (-> (Reader a) (Reader a))) + (function [docs] + (case docs + #;Nil + (ex;throw Empty-Input "") + + (#;Cons head tail) + (case head + (#Text _) + (ex;throw Unexpected-Input "") + + (#Node _tag _attrs _children) + (do R;Monad + [output (run' _children reader)] + (wrap [tail output])))))) + +(def: #export ignore + (Reader Unit) + (function [docs] + (case docs + #;Nil + (ex;throw Empty-Input "") + + (#;Cons head tail) + (#R;Success [tail []])))) + +(def: #export (run document reader) + (All [a] (-> XML (Reader a) (R;Result a))) + (run' (list document) reader)) diff --git a/stdlib/test/test/lux/data/format/xml.lux b/stdlib/test/test/lux/data/format/xml.lux index 1910caf3e..382659ab0 100644 --- a/stdlib/test/test/lux/data/format/xml.lux +++ b/stdlib/test/test/lux/data/format/xml.lux @@ -2,13 +2,15 @@ lux (lux [io] (control [monad #+ do Monad] + ["p" parser] pipe) - (data [text "Text/" Monoid] + (data [text "text/" Eq] text/format [ident] + ["R" result] (format ["&" xml]) (coll [dict] - [list])) + [list "L/" Functor])) ["r" math/random "r/" Monad] test) ) @@ -52,7 +54,7 @@ (r;dict ident;Hash size xml-identifier^ (xml-text^ +0 +10)) (r;list size gen-xml))))))) -(context: "XML" +(context: "XML." [sample gen-xml #let [(^open "&/") &;Eq (^open "&/") &;Codec]] @@ -68,3 +70,44 @@ (#;Left error) false))) )) + +(context: "Parsing." + [text (xml-text^ +1 +10) + num-children (|> r;nat (:: @ map (n.% +5))) + children (r;list num-children (xml-text^ +1 +10)) + tag xml-identifier^ + attr xml-identifier^ + value (xml-text^ +1 +10) + #let [node (#&;Node tag + (dict;put attr value &;attrs) + (L/map (|>. #&;Text) children))]] + ($_ seq + (test "Can parse text." + (R;default false + (do R;Monad + [output (&;run (#&;Text text) + &;text)] + (wrap (text/= text output))))) + (test "Can parse attributes." + (R;default false + (do R;Monad + [output (|> (&;attr attr) + (p;before &;ignore) + (&;run node))] + (wrap (text/= value output))))) + (test "Can parse nodes." + (R;default false + (do R;Monad + [_ (|> (&;node tag) + (p;before &;ignore) + (&;run node))] + (wrap true)))) + (test "Can parse children." + (R;default false + (do R;Monad + [outputs (|> (&;children (p;some &;text)) + (&;run node))] + (wrap (:: (list;Eq text;Eq) = + children + outputs))))) + )) -- cgit v1.2.3