(;module: {#;doc "Functionality for reading, generating and processing values in the XML format."} lux (lux (control monad eq codec ["p" parser "p/" Monad]) (data [text "t/" Eq] text/format (text ["l" lexer]) [number] ["R" result] [char "c/" Eq] [product] [maybe "m/" Monad] [ident "Ident/" Eq] (coll [list "L/" Monad] ["d" dict] (tree ["T" rose] ["Z" zipper]))) )) ## [Types] (type: #export Tag Ident) (type: #export Attrs (d;Dict Ident Text)) (type: #export #rec XML (#Text Text) (#Node Tag Attrs (List XML))) (def: #export (text value) (-> Text XML) (#Text value)) (def: #export (node tag attrs children) (-> Tag Attrs (List XML) XML) (#Node tag attrs children)) ## [Parsing] (def: xml-standard-escape-char^ (l;Lexer Text) ($_ p;either (p;after (l;this "<") (p/wrap "<")) (p;after (l;this ">") (p/wrap ">")) (p;after (l;this "&") (p/wrap "&")) (p;after (l;this "'") (p/wrap "'")) (p;after (l;this """) (p/wrap "\"")))) (def: xml-unicode-escape-char^ (l;Lexer Text) (|> (do p;Monad [hex? (p;opt (l;this "x")) code (case hex? #;None (l;codec number;Codec (l;many l;digit)) (#;Some _) (l;codec number;Hex@Codec (l;many l;hex-digit)))] (wrap (|> code int-to-nat char;char char;as-text))) (p;before (l;this ";")) (p;after (l;this "&#")))) (def: xml-escape-char^ (l;Lexer Text) (p;either xml-standard-escape-char^ xml-unicode-escape-char^)) (def: xml-char^ (l;Lexer Text) (p;either (l;none-of "<>&'\"") xml-escape-char^)) (def: xml-identifier (l;Lexer Text) (do p;Monad [head (p;either (l;one-of "_") l;alpha) tail (l;some (p;either (l;one-of "_.-") l;alpha-num))] (wrap (format head tail)))) (def: namespaced-symbol^ (l;Lexer Ident) (do p;Monad [first-part xml-identifier ?second-part (<| p;opt (p;after (l;this ":")) xml-identifier)] (case ?second-part #;None (wrap ["" first-part]) (#;Some second-part) (wrap [first-part second-part])))) (def: tag^ namespaced-symbol^) (def: attr-name^ namespaced-symbol^) (def: spaced^ (All [a] (-> (l;Lexer a) (l;Lexer a))) (let [white-space^ (p;some l;space)] (|>. (p;before white-space^) (p;after white-space^)))) (def: attr-value^ (l;Lexer Text) (let [value^ (l;some xml-char^)] (p;either (l;enclosed ["\"" "\""] value^) (l;enclosed ["'" "'"] value^)))) (def: attrs^ (l;Lexer Attrs) (<| (:: p;Monad map (d;from-list ident;Hash)) p;some (p;seq (spaced^ attr-name^)) (p;after (l;this "=")) (spaced^ attr-value^))) (def: (close-tag^ expected) (-> Tag (l;Lexer [])) (do p;Monad [actual (|> tag^ spaced^ (p;after (l;this "/")) (l;enclosed ["<" ">"]))] (p;assert (format "Close tag does not match open tag.\n" "Expected: " (%ident expected) "\n" " Actual: " (%ident actual) "\n") (Ident/= expected actual)))) (def: comment^ (l;Lexer Text) (|> (l;not (l;this "--")) l;some (l;enclosed ["<--" "-->"]) spaced^)) (def: xml-header^ (l;Lexer Attrs) (|> (spaced^ attrs^) (p;before (l;this "?>")) (p;after (l;this "")] (|> (l;some (l;not end)) (p;after end) (p;after (l;this " (p;either cdata^ (l;many xml-char^)) (p/map (|>. text;trim #Text)))) (def: xml^ (l;Lexer XML) (|> (p;rec (function [node^] (p;either text^ (spaced^ (do p;Monad [_ (l;this "<") tag (spaced^ tag^) attrs (spaced^ attrs^) #let [no-children^ (do p;Monad [_ (l;this "/>")] (wrap (node tag attrs (list)))) with-children^ (do p;Monad [_ (l;this ">") children (p;some node^) _ (close-tag^ tag)] (wrap (node tag attrs children)))]] (p;either no-children^ with-children^)))))) ## This is put outside of the call to "rec" because comments ## cannot be located inside of XML nodes. ## This way, the comments can only be before or after the main document. (p;before (p;some comment^)) (p;after (p;some comment^)) (p;after (p;opt xml-header^)))) (def: #export (read input) (-> Text (R;Result XML)) (l;run input xml^)) ## [Generation] (def: (sanitize-value input) (-> Text Text) (|> input (text;replace-all "&" "&") (text;replace-all "<" "<") (text;replace-all ">" ">") (text;replace-all "'" "'") (text;replace-all "\"" """))) (def: (write-tag [namespace name]) (-> Tag Text) (case namespace "" name _ (format namespace ":" name))) (def: (write-attrs attrs) (-> Attrs Text) (|> attrs d;entries (L/map (function [[key value]] (format (write-tag key) "=" "\""(sanitize-value value) "\""))) (text;join-with " "))) (def: xml-header Text "") (def: #export (write input) (-> XML Text) (format xml-header (loop [input input] (case input (#Text value) (sanitize-value value) (#Node xml-tag xml-attrs xml-children) (let [tag (write-tag xml-tag) attrs (if (d;empty? xml-attrs) "" (format " " (write-attrs xml-attrs)))] (if (list;empty? xml-children) (format "<" tag attrs "/>") (format "<" tag attrs ">" (|> xml-children (L/map recur) (text;join-with "")) ""))))))) ## [Structs] (struct: #export _ (Codec Text XML) (def: encode write) (def: decode read)) (struct: #export _ (Eq XML) (def: (= reference sample) (case [reference sample] [(#Text reference/value) (#Text sample/value)] (t/= reference/value sample/value) [(#Node reference/tag reference/attrs reference/children) (#Node sample/tag sample/attrs sample/children)] (and (Ident/= reference/tag sample/tag) (:: (d;Eq text;Eq) = reference/attrs sample/attrs) (n.= (list;size reference/children) (list;size sample/children)) (|> (list;zip2 reference/children sample/children) (list;every? (product;uncurry =)))) _ false)))