diff options
author | Eduardo Julian | 2019-01-21 18:19:54 -0400 |
---|---|---|
committer | Eduardo Julian | 2019-01-21 18:19:54 -0400 |
commit | ca54c9a6577c7f556cc51bd7597f79e153e6c3e6 (patch) | |
tree | d5f41230b5f41f9fde051e45f83a7e55ab044f8d /stdlib/source/lux/data/text/encoding.lux | |
parent | f40ccf4a1567851547114731b4819c986aa093df (diff) |
Added char-sets for text.
Diffstat (limited to '')
-rw-r--r-- | stdlib/source/lux/data/text/encoding.lux | 176 |
1 files changed, 171 insertions, 5 deletions
diff --git a/stdlib/source/lux/data/text/encoding.lux b/stdlib/source/lux/data/text/encoding.lux index bd1a255ec..6e617e75b 100644 --- a/stdlib/source/lux/data/text/encoding.lux +++ b/stdlib/source/lux/data/text/encoding.lux @@ -1,7 +1,11 @@ (.module: [lux #* + [control + [codec (#+ Codec)]] [data ["." error (#+ Error)]] + [type + abstract] [world [binary (#+ Binary)]] [platform @@ -9,19 +13,181 @@ ["_" host]]] [host (#+ import:)]]) -(`` (for {(~~ (static _.jvm)) - (as-is (def: utf8 Text "UTF-8") +## https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html + +(abstract: #export Char-Set + {} + + Text + + (do-template [<name> <charset>] + [(def: #export <name> Char-Set (:abstraction <charset>))] + + [ascii "ASCII"] + + [ibm-37 "IBM037"] + [ibm-273 "IBM273"] + [ibm-277 "IBM277"] + [ibm-278 "IBM278"] + [ibm-280 "IBM280"] + [ibm-284 "IBM284"] + [ibm-285 "IBM285"] + [ibm-290 "IBM290"] + [ibm-297 "IBM297"] + [ibm-300 "IBM300"] + [ibm-420 "IBM420"] + [ibm-424 "IBM424"] + [ibm-437 "IBM437"] + [ibm-500 "IBM500"] + [ibm-737 "IBM737"] + [ibm-775 "IBM775"] + [ibm-833 "IBM833"] + [ibm-834 "IBM834"] + [ibm-838 "IBM-Thai"] + [ibm-850 "IBM850"] + [ibm-852 "IBM852"] + [ibm-855 "IBM855"] + [ibm-856 "IBM856"] + [ibm-857 "IBM857"] + [ibm-858 "IBM00858"] + [ibm-860 "IBM860"] + [ibm-861 "IBM861"] + [ibm-862 "IBM862"] + [ibm-863 "IBM863"] + [ibm-864 "IBM864"] + [ibm-865 "IBM865"] + [ibm-866 "IBM866"] + [ibm-868 "IBM868"] + [ibm-869 "IBM869"] + [ibm-870 "IBM870"] + [ibm-871 "IBM871"] + [ibm-874 "IBM874"] + [ibm-875 "IBM875"] + [ibm-918 "IBM918"] + [ibm-921 "IBM921"] + [ibm-922 "IBM922"] + [ibm-930 "IBM930"] + [ibm-933 "IBM933"] + [ibm-935 "IBM935"] + [ibm-937 "IBM937"] + [ibm-939 "IBM939"] + [ibm-942 "IBM942"] + [ibm-942c "IBM942C"] + [ibm-943 "IBM943"] + [ibm-943c "IBM943C"] + [ibm-948 "IBM948"] + [ibm-949 "IBM949"] + [ibm-949c "IBM949C"] + [ibm-950 "IBM950"] + [ibm-964 "IBM964"] + [ibm-970 "IBM970"] + [ibm-1006 "IBM1006"] + [ibm-1025 "IBM1025"] + [ibm-1026 "IBM1026"] + [ibm-1046 "IBM1046"] + [ibm-1047 "IBM1047"] + [ibm-1097 "IBM1097"] + [ibm-1098 "IBM1098"] + [ibm-1112 "IBM1112"] + [ibm-1122 "IBM1122"] + [ibm-1123 "IBM1123"] + [ibm-1124 "IBM1124"] + [ibm-1140 "IBM01140"] + [ibm-1141 "IBM01141"] + [ibm-1142 "IBM01142"] + [ibm-1143 "IBM01143"] + [ibm-1144 "IBM01144"] + [ibm-1145 "IBM01145"] + [ibm-1146 "IBM01146"] + [ibm-1147 "IBM01147"] + [ibm-1148 "IBM01148"] + [ibm-1149 "IBM01149"] + [ibm-1166 "IBM1166"] + [ibm-1364 "IBM1364"] + [ibm-1381 "IBM1381"] + [ibm-1383 "IBM1383"] + [ibm-33722 "IBM33722"] + + [iso-2022-cn "ISO-2022-CN"] + [iso2022-cn-cns "ISO2022-CN-CNS"] + [iso2022-cn-gb "ISO2022-CN-GB"] + [iso-2022-jp "ISO-2022-JP"] + [iso-2022-jp-2 "ISO-2022-JP-2"] + [iso-2022-kr "ISO-2022-KR"] + [iso-8859-1 "ISO-8859-1"] + [iso-8859-2 "ISO-8859-2"] + [iso-8859-3 "ISO-8859-3"] + [iso-8859-4 "ISO-8859-4"] + [iso-8859-5 "ISO-8859-5"] + [iso-8859-6 "ISO-8859-6"] + [iso-8859-7 "ISO-8859-7"] + [iso-8859-8 "ISO-8859-8"] + [iso-8859-9 "ISO-8859-9"] + [iso-8859-11 "iso-8859-11"] + [iso-8859-13 "ISO-8859-13"] + [iso-8859-15 "ISO-8859-15"] - (import: java/lang/String + [mac-arabic "MacArabic"] + [mac-central-europe "MacCentralEurope"] + [mac-croatian "MacCroatian"] + [mac-cyrillic "MacCyrillic"] + [mac-dingbat "MacDingbat"] + [mac-greek "MacGreek"] + [mac-hebrew "MacHebrew"] + [mac-iceland "MacIceland"] + [mac-roman "MacRoman"] + [mac-romania "MacRomania"] + [mac-symbol "MacSymbol"] + [mac-thai "MacThai"] + [mac-turkish "MacTurkish"] + [mac-ukraine "MacUkraine"] + + [utf-8 "UTF-8"] + [utf-16 "UTF-16"] + [utf-32 "UTF-32"] + + [windows-31j "windows-31j"] + [windows-874 "windows-874"] + [windows-949 "windows-949"] + [windows-950 "windows-950"] + [windows-1250 "windows-1250"] + [windows-1252 "windows-1252"] + [windows-1251 "windows-1251"] + [windows-1253 "windows-1253"] + [windows-1254 "windows-1254"] + [windows-1255 "windows-1255"] + [windows-1256 "windows-1256"] + [windows-1257 "windows-1257"] + [windows-1258 "windows-1258"] + [windows-iso2022jp "windows-iso2022jp"] + [windows-50220 "windows-50220"] + [windows-50221 "windows-50221"] + + [cesu-8 "CESU-8"] + [koi8-r "KOI8-R"] + [koi8-u "KOI8-U"] + ) + + (def: #export name + (-> Char-Set Text) + (|>> :representation)) + ) + +(`` (for {(~~ (static _.jvm)) + (as-is (import: java/lang/String (new [(Array byte) String]) (getBytes [String] (Array byte))))})) (def: #export (to-utf8 value) (-> Text Binary) (`` (for {(~~ (static _.jvm)) - (String::getBytes ..utf8 (:coerce String value))}))) + (String::getBytes (..name ..utf-8) (:coerce String value))}))) (def: #export (from-utf8 value) (-> Binary (Error Text)) (`` (for {(~~ (static _.jvm)) - (#error.Success (String::new value ..utf8))}))) + (#error.Success (String::new value (..name ..utf-8)))}))) + +(structure: #export UTF-8 (Codec Binary Text) + (def: encode ..to-utf8) + (def: decode ..from-utf8)) |