(.module: {#.doc "Functionality for reading, generating and processing values in the XML format."} lux (lux (control monad [eq #+ Eq] codec ["p" parser "p/" Monad] ["ex" exception #+ exception:]) (data [text "text/" Eq Monoid] (text ["l" lexer]) [number] ["E" error] [product] [maybe "m/" Monad] [ident "ident/" Eq Codec] (coll [list "L/" Monad] ["d" dict])))) (type: #export Tag Ident) (type: #export Attrs (d.Dict Ident Text)) (def: #export attrs Attrs (d.new ident.Hash)) (type: #export #rec XML (#Text Text) (#Node Tag Attrs (List XML))) (def: xml-standard-escape-char^ (l.Lexer Text) ($_ p.either (p.after (l.this "<") (p/wrap "<")) (p.after (l.this ">") (p/wrap ">")) (p.after (l.this "&") (p/wrap "&")) (p.after (l.this "'") (p/wrap "'")) (p.after (l.this """) (p/wrap "\"")))) (def: xml-unicode-escape-char^ (l.Lexer Text) (|> (do p.Monad [hex? (p.maybe (l.this "x")) code (case hex? #.None (p.codec number.Codec (l.many l.decimal)) (#.Some _) (p.codec number.Hex@Codec (l.many l.hexadecimal)))] (wrap (|> code int-to-nat text.from-code))) (p.before (l.this ";")) (p.after (l.this "&#")))) (def: xml-escape-char^ (l.Lexer Text) (p.either xml-standard-escape-char^ xml-unicode-escape-char^)) (def: xml-char^ (l.Lexer Text) (p.either (l.none-of "<>&'\"") xml-escape-char^)) (def: xml-identifier (l.Lexer Text) (do p.Monad [head (p.either (l.one-of "_") l.alpha) tail (l.some (p.either (l.one-of "_.-") l.alpha-num))] (wrap ($_ text/compose head tail)))) (def: namespaced-symbol^ (l.Lexer Ident) (do p.Monad [first-part xml-identifier ?second-part (<| p.maybe (p.after (l.this ":")) xml-identifier)] (case ?second-part #.None (wrap ["" first-part]) (#.Some second-part) (wrap [first-part second-part])))) (def: tag^ namespaced-symbol^) (def: attr-name^ namespaced-symbol^) (def: spaced^ (All [a] (-> (l.Lexer a) (l.Lexer a))) (let [white-space^ (p.some l.space)] (|>> (p.before white-space^) (p.after white-space^)))) (def: attr-value^ (l.Lexer Text) (let [value^ (l.some xml-char^)] (p.either (l.enclosed ["\"" "\""] value^) (l.enclosed ["'" "'"] value^)))) (def: attrs^ (l.Lexer Attrs) (<| (:: p.Monad map (d.from-list ident.Hash)) p.some (p.seq (spaced^ attr-name^)) (p.after (l.this "=")) (spaced^ attr-value^))) (def: (close-tag^ expected) (-> Tag (l.Lexer [])) (do p.Monad [actual (|> tag^ spaced^ (p.after (l.this "/")) (l.enclosed ["<" ">"]))] (p.assert ($_ text/compose "Close tag does not match open tag.\n" "Expected: " (ident/encode expected) "\n" " Actual: " (ident/encode actual) "\n") (ident/= expected actual)))) (def: comment^ (l.Lexer Text) (|> (l.not (l.this "--")) l.some (l.enclosed ["<--" "-->"]) spaced^)) (def: xml-header^ (l.Lexer Attrs) (|> (spaced^ attrs^) (p.before (l.this "?>")) (p.after (l.this "")] (|> (l.some (l.not end)) (p.after end) (p.after (l.this " (p.either cdata^ (l.many xml-char^)) (p/map (|>> #Text)))) (def: xml^ (l.Lexer XML) (|> (p.rec (function [node^] (p.either text^ (spaced^ (do p.Monad [_ (l.this "<") tag (spaced^ tag^) attrs (spaced^ attrs^) #let [no-children^ (do p.Monad [_ (l.this "/>")] (wrap (#Node tag attrs (list)))) with-children^ (do p.Monad [_ (l.this ">") children (p.some node^) _ (close-tag^ tag)] (wrap (#Node tag attrs children)))]] (p.either no-children^ with-children^)))))) ## This is put outside of the call to "rec" because comments ## cannot be located inside of XML nodes. ## This way, the comments can only be before or after the main document. (p.before (p.some comment^)) (p.after (p.some comment^)) (p.after (p.maybe xml-header^)))) (def: #export (read input) (-> Text (E.Error XML)) (l.run input xml^)) (def: (sanitize-value input) (-> Text Text) (|> input (text.replace-all "&" "&") (text.replace-all "<" "<") (text.replace-all ">" ">") (text.replace-all "'" "'") (text.replace-all "\"" """))) (def: (write-tag [namespace name]) (-> Tag Text) (case namespace "" name _ ($_ text/compose namespace ":" name))) (def: (write-attrs attrs) (-> Attrs Text) (|> attrs d.entries (L/map (function [[key value]] ($_ text/compose (write-tag key) "=" "\""(sanitize-value value) "\""))) (text.join-with " "))) (def: xml-header Text "") (def: #export (write input) (-> XML Text) ($_ text/compose xml-header (loop [input input] (case input (#Text value) (sanitize-value value) (#Node xml-tag xml-attrs xml-children) (let [tag (write-tag xml-tag) attrs (if (d.empty? xml-attrs) "" ($_ text/compose " " (write-attrs xml-attrs)))] (if (list.empty? xml-children) ($_ text/compose "<" tag attrs "/>") ($_ text/compose "<" tag attrs ">" (|> xml-children (L/map recur) (text.join-with "")) ""))))))) (struct: #export _ (Codec Text XML) (def: encode write) (def: decode read)) (struct: #export _ (Eq XML) (def: (= reference sample) (case [reference sample] [(#Text reference/value) (#Text sample/value)] (text/= reference/value sample/value) [(#Node reference/tag reference/attrs reference/children) (#Node sample/tag sample/attrs sample/children)] (and (ident/= reference/tag sample/tag) (:: (d.Eq text.Eq) = reference/attrs sample/attrs) (n/= (list.size reference/children) (list.size sample/children)) (|> (list.zip2 reference/children sample/children) (list.every? (product.uncurry =)))) _ false))) (type: #export (Reader a) (p.Parser (List XML) a)) (exception: #export Empty-Input) (exception: #export Unexpected-Input) (exception: #export Unknown-Attribute) (exception: #export (Wrong-Tag {tag Ident}) (ident/encode tag)) (exception: #export (Unconsumed-Inputs {inputs (List XML)}) (|> inputs (L/map (:: Codec encode)) (text.join-with "\n\n"))) (def: #export text (Reader Text) (function [docs] (case docs #.Nil (ex.throw Empty-Input []) (#.Cons head tail) (case head (#Text value) (#E.Success [tail value]) (#Node _) (ex.throw Unexpected-Input []))))) (def: #export (attr name) (-> Ident (Reader Text)) (function [docs] (case docs #.Nil (ex.throw Empty-Input []) (#.Cons head _) (case head (#Text _) (ex.throw Unexpected-Input []) (#Node tag attrs children) (case (d.get name attrs) #.None (ex.throw Unknown-Attribute []) (#.Some value) (#E.Success [docs value])))))) (def: (run' docs reader) (All [a] (-> (List XML) (Reader a) (E.Error a))) (case (p.run docs reader) (#E.Success [remaining output]) (if (list.empty? remaining) (#E.Success output) (ex.throw Unconsumed-Inputs remaining)) (#E.Error error) (#E.Error error))) (def: #export (node tag) (-> Ident (Reader Unit)) (function [docs] (case docs #.Nil (ex.throw Empty-Input []) (#.Cons head _) (case head (#Text _) (ex.throw Unexpected-Input []) (#Node _tag _attrs _children) (if (ident/= tag _tag) (#E.Success [docs []]) (ex.throw Wrong-Tag tag)))))) (def: #export (children reader) (All [a] (-> (Reader a) (Reader a))) (function [docs] (case docs #.Nil (ex.throw Empty-Input []) (#.Cons head tail) (case head (#Text _) (ex.throw Unexpected-Input []) (#Node _tag _attrs _children) (do E.Monad [output (run' _children reader)] (wrap [tail output])))))) (def: #export ignore (Reader Unit) (function [docs] (case docs #.Nil (ex.throw Empty-Input []) (#.Cons head tail) (#E.Success [tail []])))) (def: #export (run document reader) (All [a] (-> XML (Reader a) (E.Error a))) (run' (list document) reader))