aboutsummaryrefslogtreecommitdiff
path: root/stdlib/source/library/lux/data/format/xml.lux
diff options
context:
space:
mode:
Diffstat (limited to 'stdlib/source/library/lux/data/format/xml.lux')
-rw-r--r--stdlib/source/library/lux/data/format/xml.lux299
1 files changed, 299 insertions, 0 deletions
diff --git a/stdlib/source/library/lux/data/format/xml.lux b/stdlib/source/library/lux/data/format/xml.lux
new file mode 100644
index 000000000..56d394490
--- /dev/null
+++ b/stdlib/source/library/lux/data/format/xml.lux
@@ -0,0 +1,299 @@
+(.module:
+ [library
+ [lux #*
+ [abstract
+ [monad (#+ do)]
+ [equivalence (#+ Equivalence)]
+ [codec (#+ Codec)]]
+ [control
+ [try (#+ Try)]
+ ["<>" parser ("#\." monad)
+ ["<.>" text (#+ Parser)]]]
+ [data
+ ["." product]
+ ["." name ("#\." equivalence codec)]
+ ["." text ("#\." equivalence monoid)]
+ [collection
+ ["." list ("#\." functor)]
+ ["." dictionary (#+ Dictionary)]]]
+ [math
+ [number
+ ["n" nat]
+ ["." int]]]]])
+
+(type: #export Tag
+ Name)
+
+(type: #export Attribute
+ Name)
+
+(type: #export Attrs
+ (Dictionary Attribute Text))
+
+(def: #export attributes
+ Attrs
+ (dictionary.new name.hash))
+
+(type: #export #rec XML
+ (#Text Text)
+ (#Node Tag Attrs (List XML)))
+
+(def: namespace_separator
+ ":")
+
+(def: xml_standard_escape_char^
+ (Parser Text)
+ ($_ <>.either
+ (<>.after (<text>.this "&lt;") (<>\wrap "<"))
+ (<>.after (<text>.this "&gt;") (<>\wrap ">"))
+ (<>.after (<text>.this "&amp;") (<>\wrap "&"))
+ (<>.after (<text>.this "&apos;") (<>\wrap "'"))
+ (<>.after (<text>.this "&quot;") (<>\wrap text.double_quote))
+ ))
+
+(def: xml_unicode_escape_char^
+ (Parser Text)
+ (|> (do <>.monad
+ [hex? (<>.maybe (<text>.this "x"))
+ code (case hex?
+ #.None
+ (<>.codec int.decimal (<text>.many <text>.decimal))
+
+ (#.Some _)
+ (<>.codec int.decimal (<text>.many <text>.hexadecimal)))]
+ (wrap (|> code .nat text.from_code)))
+ (<>.before (<text>.this ";"))
+ (<>.after (<text>.this "&#"))))
+
+(def: xml_escape_char^
+ (Parser Text)
+ (<>.either xml_standard_escape_char^
+ xml_unicode_escape_char^))
+
+(def: xml_char^
+ (Parser Text)
+ (<>.either (<text>.none_of ($_ text\compose "<>&" text.double_quote))
+ xml_escape_char^))
+
+(def: xml_identifier
+ (Parser Text)
+ (do <>.monad
+ [head (<>.either (<text>.one_of "_")
+ <text>.alpha)
+ tail (<text>.some (<>.either (<text>.one_of "_.-")
+ <text>.alpha_num))]
+ (wrap ($_ text\compose head tail))))
+
+(def: namespaced_symbol^
+ (Parser Name)
+ (do <>.monad
+ [first_part xml_identifier
+ ?second_part (<| <>.maybe (<>.after (<text>.this ..namespace_separator)) xml_identifier)]
+ (case ?second_part
+ #.None
+ (wrap ["" first_part])
+
+ (#.Some second_part)
+ (wrap [first_part second_part]))))
+
+(def: tag^ namespaced_symbol^)
+(def: attr_name^ namespaced_symbol^)
+
+(def: spaced^
+ (All [a] (-> (Parser a) (Parser a)))
+ (let [white_space^ (<>.some <text>.space)]
+ (|>> (<>.before white_space^)
+ (<>.after white_space^))))
+
+(def: attr_value^
+ (Parser Text)
+ (let [value^ (<text>.some xml_char^)]
+ (<>.either (<text>.enclosed [text.double_quote text.double_quote] value^)
+ (<text>.enclosed ["'" "'"] value^))))
+
+(def: attrs^
+ (Parser Attrs)
+ (<| (\ <>.monad map (dictionary.from_list name.hash))
+ <>.some
+ (<>.and (..spaced^ attr_name^))
+ (<>.after (<text>.this "="))
+ (..spaced^ attr_value^)))
+
+(def: (close_tag^ expected)
+ (-> Tag (Parser []))
+ (do <>.monad
+ [actual (|> tag^
+ ..spaced^
+ (<>.after (<text>.this "/"))
+ (<text>.enclosed ["<" ">"]))]
+ (<>.assert ($_ text\compose "Close tag does not match open tag." text.new_line
+ "Expected: " (name\encode expected) text.new_line
+ " Actual: " (name\encode actual) text.new_line)
+ (name\= expected actual))))
+
+(def: comment^
+ (Parser Text)
+ (|> (<text>.not (<text>.this "--"))
+ <text>.some
+ (<text>.enclosed ["<!--" "-->"])
+ ..spaced^))
+
+(def: xml_header^
+ (Parser Attrs)
+ (|> (..spaced^ attrs^)
+ (<>.before (<text>.this "?>"))
+ (<>.after (<text>.this "<?xml"))
+ ..spaced^))
+
+(def: cdata^
+ (Parser Text)
+ (let [end (<text>.this "]]>")]
+ (|> (<text>.some (<text>.not end))
+ (<>.after end)
+ (<>.after (<text>.this "<![CDATA["))
+ ..spaced^)))
+
+(def: text^
+ (Parser XML)
+ (|> (..spaced^ (<text>.many xml_char^))
+ (<>.either cdata^)
+ (<>\map (|>> #Text))))
+
+(def: null^
+ (Parser Any)
+ (<text>.this (text.from_code 0)))
+
+(def: xml^
+ (Parser XML)
+ (|> (<>.rec
+ (function (_ node^)
+ (|> (do <>.monad
+ [_ (<text>.this "<")
+ tag (..spaced^ tag^)
+ attrs (..spaced^ attrs^)
+ #let [no_children^ ($_ <>.either
+ (do <>.monad
+ [_ (<text>.this "/>")]
+ (wrap (#Node tag attrs (list))))
+ (do <>.monad
+ [_ (<text>.this ">")
+ _ (<>.some (<>.either <text>.space
+ ..comment^))
+ _ (..close_tag^ tag)]
+ (wrap (#Node tag attrs (list)))))
+ with_children^ (do <>.monad
+ [_ (<text>.this ">")
+ children (<>.many node^)
+ _ (..close_tag^ tag)]
+ (wrap (#Node tag attrs children)))]]
+ ($_ <>.either
+ no_children^
+ with_children^))
+ ..spaced^
+ (<>.before (<>.some ..comment^))
+ (<>.after (<>.some ..comment^))
+ (<>.either ..text^))))
+ (<>.before (<>.some ..null^))
+ (<>.after (<>.maybe ..xml_header^))))
+
+(def: read
+ (-> Text (Try XML))
+ (<text>.run xml^))
+
+(def: (sanitize_value input)
+ (-> Text Text)
+ (|> input
+ (text.replace_all "&" "&amp;")
+ (text.replace_all "<" "&lt;")
+ (text.replace_all ">" "&gt;")
+ (text.replace_all "'" "&apos;")
+ (text.replace_all text.double_quote "&quot;")))
+
+(def: #export (tag [namespace name])
+ (-> Tag Text)
+ (case namespace
+ "" name
+ _ ($_ text\compose namespace ..namespace_separator name)))
+
+(def: #export attribute
+ (-> Attribute Text)
+ ..tag)
+
+(def: (write_attrs attrs)
+ (-> Attrs Text)
+ (|> attrs
+ dictionary.entries
+ (list\map (function (_ [key value])
+ ($_ text\compose (..attribute key) "=" text.double_quote (sanitize_value value) text.double_quote)))
+ (text.join_with " ")))
+
+(def: xml_header
+ Text
+ (let [quote (: (-> Text Text)
+ (function (_ value)
+ ($_ text\compose text.double_quote value text.double_quote)))]
+ ($_ text\compose
+ "<?xml"
+ " version=" (quote "1.0")
+ " encoding=" (quote "UTF-8")
+ "?>")))
+
+(def: (write input)
+ (-> XML Text)
+ ($_ text\compose
+ ..xml_header text.new_line
+ (loop [prefix ""
+ input input]
+ (case input
+ (#Text value)
+ (sanitize_value value)
+
+ (^ (#Node xml_tag xml_attrs (list (#Text value))))
+ (let [tag (..tag xml_tag)
+ attrs (if (dictionary.empty? xml_attrs)
+ ""
+ ($_ text\compose " " (..write_attrs xml_attrs)))]
+ ($_ text\compose
+ prefix "<" tag attrs ">"
+ (sanitize_value value)
+ "</" tag ">"))
+
+ (#Node xml_tag xml_attrs xml_children)
+ (let [tag (..tag xml_tag)
+ attrs (if (dictionary.empty? xml_attrs)
+ ""
+ ($_ text\compose " " (..write_attrs xml_attrs)))]
+ (if (list.empty? xml_children)
+ ($_ text\compose prefix "<" tag attrs "/>")
+ ($_ text\compose prefix "<" tag attrs ">"
+ (|> xml_children
+ (list\map (|>> (recur (text\compose prefix text.tab)) (text\compose text.new_line)))
+ (text.join_with ""))
+ text.new_line prefix "</" tag ">")))))
+ ))
+
+(implementation: #export codec
+ (Codec Text XML)
+
+ (def: encode ..write)
+ (def: decode ..read))
+
+(implementation: #export equivalence
+ (Equivalence XML)
+
+ (def: (= reference sample)
+ (case [reference sample]
+ [(#Text reference/value) (#Text sample/value)]
+ (text\= reference/value sample/value)
+
+ [(#Node reference/tag reference/attrs reference/children)
+ (#Node sample/tag sample/attrs sample/children)]
+ (and (name\= reference/tag sample/tag)
+ (\ (dictionary.equivalence text.equivalence) = reference/attrs sample/attrs)
+ (n.= (list.size reference/children)
+ (list.size sample/children))
+ (|> (list.zip/2 reference/children sample/children)
+ (list.every? (product.uncurry =))))
+
+ _
+ false)))