aboutsummaryrefslogtreecommitdiff
path: root/stdlib/source/lux/data/text/encoding.lux
diff options
context:
space:
mode:
authorEduardo Julian2019-01-21 18:19:54 -0400
committerEduardo Julian2019-01-21 18:19:54 -0400
commitca54c9a6577c7f556cc51bd7597f79e153e6c3e6 (patch)
treed5f41230b5f41f9fde051e45f83a7e55ab044f8d /stdlib/source/lux/data/text/encoding.lux
parentf40ccf4a1567851547114731b4819c986aa093df (diff)
Added char-sets for text.
Diffstat (limited to '')
-rw-r--r--stdlib/source/lux/data/text/encoding.lux176
1 files changed, 171 insertions, 5 deletions
diff --git a/stdlib/source/lux/data/text/encoding.lux b/stdlib/source/lux/data/text/encoding.lux
index bd1a255ec..6e617e75b 100644
--- a/stdlib/source/lux/data/text/encoding.lux
+++ b/stdlib/source/lux/data/text/encoding.lux
@@ -1,7 +1,11 @@
(.module:
[lux #*
+ [control
+ [codec (#+ Codec)]]
[data
["." error (#+ Error)]]
+ [type
+ abstract]
[world
[binary (#+ Binary)]]
[platform
@@ -9,19 +13,181 @@
["_" host]]]
[host (#+ import:)]])
-(`` (for {(~~ (static _.jvm))
- (as-is (def: utf8 Text "UTF-8")
+## https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html
+
+(abstract: #export Char-Set
+ {}
+
+ Text
+
+ (do-template [<name> <charset>]
+ [(def: #export <name> Char-Set (:abstraction <charset>))]
+
+ [ascii "ASCII"]
+
+ [ibm-37 "IBM037"]
+ [ibm-273 "IBM273"]
+ [ibm-277 "IBM277"]
+ [ibm-278 "IBM278"]
+ [ibm-280 "IBM280"]
+ [ibm-284 "IBM284"]
+ [ibm-285 "IBM285"]
+ [ibm-290 "IBM290"]
+ [ibm-297 "IBM297"]
+ [ibm-300 "IBM300"]
+ [ibm-420 "IBM420"]
+ [ibm-424 "IBM424"]
+ [ibm-437 "IBM437"]
+ [ibm-500 "IBM500"]
+ [ibm-737 "IBM737"]
+ [ibm-775 "IBM775"]
+ [ibm-833 "IBM833"]
+ [ibm-834 "IBM834"]
+ [ibm-838 "IBM-Thai"]
+ [ibm-850 "IBM850"]
+ [ibm-852 "IBM852"]
+ [ibm-855 "IBM855"]
+ [ibm-856 "IBM856"]
+ [ibm-857 "IBM857"]
+ [ibm-858 "IBM00858"]
+ [ibm-860 "IBM860"]
+ [ibm-861 "IBM861"]
+ [ibm-862 "IBM862"]
+ [ibm-863 "IBM863"]
+ [ibm-864 "IBM864"]
+ [ibm-865 "IBM865"]
+ [ibm-866 "IBM866"]
+ [ibm-868 "IBM868"]
+ [ibm-869 "IBM869"]
+ [ibm-870 "IBM870"]
+ [ibm-871 "IBM871"]
+ [ibm-874 "IBM874"]
+ [ibm-875 "IBM875"]
+ [ibm-918 "IBM918"]
+ [ibm-921 "IBM921"]
+ [ibm-922 "IBM922"]
+ [ibm-930 "IBM930"]
+ [ibm-933 "IBM933"]
+ [ibm-935 "IBM935"]
+ [ibm-937 "IBM937"]
+ [ibm-939 "IBM939"]
+ [ibm-942 "IBM942"]
+ [ibm-942c "IBM942C"]
+ [ibm-943 "IBM943"]
+ [ibm-943c "IBM943C"]
+ [ibm-948 "IBM948"]
+ [ibm-949 "IBM949"]
+ [ibm-949c "IBM949C"]
+ [ibm-950 "IBM950"]
+ [ibm-964 "IBM964"]
+ [ibm-970 "IBM970"]
+ [ibm-1006 "IBM1006"]
+ [ibm-1025 "IBM1025"]
+ [ibm-1026 "IBM1026"]
+ [ibm-1046 "IBM1046"]
+ [ibm-1047 "IBM1047"]
+ [ibm-1097 "IBM1097"]
+ [ibm-1098 "IBM1098"]
+ [ibm-1112 "IBM1112"]
+ [ibm-1122 "IBM1122"]
+ [ibm-1123 "IBM1123"]
+ [ibm-1124 "IBM1124"]
+ [ibm-1140 "IBM01140"]
+ [ibm-1141 "IBM01141"]
+ [ibm-1142 "IBM01142"]
+ [ibm-1143 "IBM01143"]
+ [ibm-1144 "IBM01144"]
+ [ibm-1145 "IBM01145"]
+ [ibm-1146 "IBM01146"]
+ [ibm-1147 "IBM01147"]
+ [ibm-1148 "IBM01148"]
+ [ibm-1149 "IBM01149"]
+ [ibm-1166 "IBM1166"]
+ [ibm-1364 "IBM1364"]
+ [ibm-1381 "IBM1381"]
+ [ibm-1383 "IBM1383"]
+ [ibm-33722 "IBM33722"]
+
+ [iso-2022-cn "ISO-2022-CN"]
+ [iso2022-cn-cns "ISO2022-CN-CNS"]
+ [iso2022-cn-gb "ISO2022-CN-GB"]
+ [iso-2022-jp "ISO-2022-JP"]
+ [iso-2022-jp-2 "ISO-2022-JP-2"]
+ [iso-2022-kr "ISO-2022-KR"]
+ [iso-8859-1 "ISO-8859-1"]
+ [iso-8859-2 "ISO-8859-2"]
+ [iso-8859-3 "ISO-8859-3"]
+ [iso-8859-4 "ISO-8859-4"]
+ [iso-8859-5 "ISO-8859-5"]
+ [iso-8859-6 "ISO-8859-6"]
+ [iso-8859-7 "ISO-8859-7"]
+ [iso-8859-8 "ISO-8859-8"]
+ [iso-8859-9 "ISO-8859-9"]
+ [iso-8859-11 "iso-8859-11"]
+ [iso-8859-13 "ISO-8859-13"]
+ [iso-8859-15 "ISO-8859-15"]
- (import: java/lang/String
+ [mac-arabic "MacArabic"]
+ [mac-central-europe "MacCentralEurope"]
+ [mac-croatian "MacCroatian"]
+ [mac-cyrillic "MacCyrillic"]
+ [mac-dingbat "MacDingbat"]
+ [mac-greek "MacGreek"]
+ [mac-hebrew "MacHebrew"]
+ [mac-iceland "MacIceland"]
+ [mac-roman "MacRoman"]
+ [mac-romania "MacRomania"]
+ [mac-symbol "MacSymbol"]
+ [mac-thai "MacThai"]
+ [mac-turkish "MacTurkish"]
+ [mac-ukraine "MacUkraine"]
+
+ [utf-8 "UTF-8"]
+ [utf-16 "UTF-16"]
+ [utf-32 "UTF-32"]
+
+ [windows-31j "windows-31j"]
+ [windows-874 "windows-874"]
+ [windows-949 "windows-949"]
+ [windows-950 "windows-950"]
+ [windows-1250 "windows-1250"]
+ [windows-1252 "windows-1252"]
+ [windows-1251 "windows-1251"]
+ [windows-1253 "windows-1253"]
+ [windows-1254 "windows-1254"]
+ [windows-1255 "windows-1255"]
+ [windows-1256 "windows-1256"]
+ [windows-1257 "windows-1257"]
+ [windows-1258 "windows-1258"]
+ [windows-iso2022jp "windows-iso2022jp"]
+ [windows-50220 "windows-50220"]
+ [windows-50221 "windows-50221"]
+
+ [cesu-8 "CESU-8"]
+ [koi8-r "KOI8-R"]
+ [koi8-u "KOI8-U"]
+ )
+
+ (def: #export name
+ (-> Char-Set Text)
+ (|>> :representation))
+ )
+
+(`` (for {(~~ (static _.jvm))
+ (as-is (import: java/lang/String
(new [(Array byte) String])
(getBytes [String] (Array byte))))}))
(def: #export (to-utf8 value)
(-> Text Binary)
(`` (for {(~~ (static _.jvm))
- (String::getBytes ..utf8 (:coerce String value))})))
+ (String::getBytes (..name ..utf-8) (:coerce String value))})))
(def: #export (from-utf8 value)
(-> Binary (Error Text))
(`` (for {(~~ (static _.jvm))
- (#error.Success (String::new value ..utf8))})))
+ (#error.Success (String::new value (..name ..utf-8)))})))
+
+(structure: #export UTF-8 (Codec Binary Text)
+ (def: encode ..to-utf8)
+ (def: decode ..from-utf8))