diff options
Diffstat (limited to 'stdlib/source/library/lux/data/format/xml.lux')
-rw-r--r-- | stdlib/source/library/lux/data/format/xml.lux | 299 |
1 files changed, 299 insertions, 0 deletions
diff --git a/stdlib/source/library/lux/data/format/xml.lux b/stdlib/source/library/lux/data/format/xml.lux new file mode 100644 index 000000000..56d394490 --- /dev/null +++ b/stdlib/source/library/lux/data/format/xml.lux @@ -0,0 +1,299 @@ +(.module: + [library + [lux #* + [abstract + [monad (#+ do)] + [equivalence (#+ Equivalence)] + [codec (#+ Codec)]] + [control + [try (#+ Try)] + ["<>" parser ("#\." monad) + ["<.>" text (#+ Parser)]]] + [data + ["." product] + ["." name ("#\." equivalence codec)] + ["." text ("#\." equivalence monoid)] + [collection + ["." list ("#\." functor)] + ["." dictionary (#+ Dictionary)]]] + [math + [number + ["n" nat] + ["." int]]]]]) + +(type: #export Tag + Name) + +(type: #export Attribute + Name) + +(type: #export Attrs + (Dictionary Attribute Text)) + +(def: #export attributes + Attrs + (dictionary.new name.hash)) + +(type: #export #rec XML + (#Text Text) + (#Node Tag Attrs (List XML))) + +(def: namespace_separator + ":") + +(def: xml_standard_escape_char^ + (Parser Text) + ($_ <>.either + (<>.after (<text>.this "<") (<>\wrap "<")) + (<>.after (<text>.this ">") (<>\wrap ">")) + (<>.after (<text>.this "&") (<>\wrap "&")) + (<>.after (<text>.this "'") (<>\wrap "'")) + (<>.after (<text>.this """) (<>\wrap text.double_quote)) + )) + +(def: xml_unicode_escape_char^ + (Parser Text) + (|> (do <>.monad + [hex? (<>.maybe (<text>.this "x")) + code (case hex? + #.None + (<>.codec int.decimal (<text>.many <text>.decimal)) + + (#.Some _) + (<>.codec int.decimal (<text>.many <text>.hexadecimal)))] + (wrap (|> code .nat text.from_code))) + (<>.before (<text>.this ";")) + (<>.after (<text>.this "&#")))) + +(def: xml_escape_char^ + (Parser Text) + (<>.either xml_standard_escape_char^ + xml_unicode_escape_char^)) + +(def: xml_char^ + (Parser Text) + (<>.either (<text>.none_of ($_ text\compose "<>&" text.double_quote)) + xml_escape_char^)) + +(def: xml_identifier + (Parser Text) + (do <>.monad + [head (<>.either (<text>.one_of "_") + <text>.alpha) + tail (<text>.some (<>.either (<text>.one_of "_.-") + <text>.alpha_num))] + (wrap ($_ text\compose head tail)))) + +(def: namespaced_symbol^ + (Parser Name) + (do <>.monad + [first_part xml_identifier + ?second_part (<| <>.maybe (<>.after (<text>.this ..namespace_separator)) xml_identifier)] + (case ?second_part + #.None + (wrap ["" first_part]) + + (#.Some second_part) + (wrap [first_part second_part])))) + +(def: tag^ namespaced_symbol^) +(def: attr_name^ namespaced_symbol^) + +(def: spaced^ + (All [a] (-> (Parser a) (Parser a))) + (let [white_space^ (<>.some <text>.space)] + (|>> (<>.before white_space^) + (<>.after white_space^)))) + +(def: attr_value^ + (Parser Text) + (let [value^ (<text>.some xml_char^)] + (<>.either (<text>.enclosed [text.double_quote text.double_quote] value^) + (<text>.enclosed ["'" "'"] value^)))) + +(def: attrs^ + (Parser Attrs) + (<| (\ <>.monad map (dictionary.from_list name.hash)) + <>.some + (<>.and (..spaced^ attr_name^)) + (<>.after (<text>.this "=")) + (..spaced^ attr_value^))) + +(def: (close_tag^ expected) + (-> Tag (Parser [])) + (do <>.monad + [actual (|> tag^ + ..spaced^ + (<>.after (<text>.this "/")) + (<text>.enclosed ["<" ">"]))] + (<>.assert ($_ text\compose "Close tag does not match open tag." text.new_line + "Expected: " (name\encode expected) text.new_line + " Actual: " (name\encode actual) text.new_line) + (name\= expected actual)))) + +(def: comment^ + (Parser Text) + (|> (<text>.not (<text>.this "--")) + <text>.some + (<text>.enclosed ["<!--" "-->"]) + ..spaced^)) + +(def: xml_header^ + (Parser Attrs) + (|> (..spaced^ attrs^) + (<>.before (<text>.this "?>")) + (<>.after (<text>.this "<?xml")) + ..spaced^)) + +(def: cdata^ + (Parser Text) + (let [end (<text>.this "]]>")] + (|> (<text>.some (<text>.not end)) + (<>.after end) + (<>.after (<text>.this "<![CDATA[")) + ..spaced^))) + +(def: text^ + (Parser XML) + (|> (..spaced^ (<text>.many xml_char^)) + (<>.either cdata^) + (<>\map (|>> #Text)))) + +(def: null^ + (Parser Any) + (<text>.this (text.from_code 0))) + +(def: xml^ + (Parser XML) + (|> (<>.rec + (function (_ node^) + (|> (do <>.monad + [_ (<text>.this "<") + tag (..spaced^ tag^) + attrs (..spaced^ attrs^) + #let [no_children^ ($_ <>.either + (do <>.monad + [_ (<text>.this "/>")] + (wrap (#Node tag attrs (list)))) + (do <>.monad + [_ (<text>.this ">") + _ (<>.some (<>.either <text>.space + ..comment^)) + _ (..close_tag^ tag)] + (wrap (#Node tag attrs (list))))) + with_children^ (do <>.monad + [_ (<text>.this ">") + children (<>.many node^) + _ (..close_tag^ tag)] + (wrap (#Node tag attrs children)))]] + ($_ <>.either + no_children^ + with_children^)) + ..spaced^ + (<>.before (<>.some ..comment^)) + (<>.after (<>.some ..comment^)) + (<>.either ..text^)))) + (<>.before (<>.some ..null^)) + (<>.after (<>.maybe ..xml_header^)))) + +(def: read + (-> Text (Try XML)) + (<text>.run xml^)) + +(def: (sanitize_value input) + (-> Text Text) + (|> input + (text.replace_all "&" "&") + (text.replace_all "<" "<") + (text.replace_all ">" ">") + (text.replace_all "'" "'") + (text.replace_all text.double_quote """))) + +(def: #export (tag [namespace name]) + (-> Tag Text) + (case namespace + "" name + _ ($_ text\compose namespace ..namespace_separator name))) + +(def: #export attribute + (-> Attribute Text) + ..tag) + +(def: (write_attrs attrs) + (-> Attrs Text) + (|> attrs + dictionary.entries + (list\map (function (_ [key value]) + ($_ text\compose (..attribute key) "=" text.double_quote (sanitize_value value) text.double_quote))) + (text.join_with " "))) + +(def: xml_header + Text + (let [quote (: (-> Text Text) + (function (_ value) + ($_ text\compose text.double_quote value text.double_quote)))] + ($_ text\compose + "<?xml" + " version=" (quote "1.0") + " encoding=" (quote "UTF-8") + "?>"))) + +(def: (write input) + (-> XML Text) + ($_ text\compose + ..xml_header text.new_line + (loop [prefix "" + input input] + (case input + (#Text value) + (sanitize_value value) + + (^ (#Node xml_tag xml_attrs (list (#Text value)))) + (let [tag (..tag xml_tag) + attrs (if (dictionary.empty? xml_attrs) + "" + ($_ text\compose " " (..write_attrs xml_attrs)))] + ($_ text\compose + prefix "<" tag attrs ">" + (sanitize_value value) + "</" tag ">")) + + (#Node xml_tag xml_attrs xml_children) + (let [tag (..tag xml_tag) + attrs (if (dictionary.empty? xml_attrs) + "" + ($_ text\compose " " (..write_attrs xml_attrs)))] + (if (list.empty? xml_children) + ($_ text\compose prefix "<" tag attrs "/>") + ($_ text\compose prefix "<" tag attrs ">" + (|> xml_children + (list\map (|>> (recur (text\compose prefix text.tab)) (text\compose text.new_line))) + (text.join_with "")) + text.new_line prefix "</" tag ">"))))) + )) + +(implementation: #export codec + (Codec Text XML) + + (def: encode ..write) + (def: decode ..read)) + +(implementation: #export equivalence + (Equivalence XML) + + (def: (= reference sample) + (case [reference sample] + [(#Text reference/value) (#Text sample/value)] + (text\= reference/value sample/value) + + [(#Node reference/tag reference/attrs reference/children) + (#Node sample/tag sample/attrs sample/children)] + (and (name\= reference/tag sample/tag) + (\ (dictionary.equivalence text.equivalence) = reference/attrs sample/attrs) + (n.= (list.size reference/children) + (list.size sample/children)) + (|> (list.zip/2 reference/children sample/children) + (list.every? (product.uncurry =)))) + + _ + false))) |