(.module: [lux #* ["@" target] ["." host] [abstract [codec (#+ Codec)]] [control ["." try (#+ Try)]] [data [binary (#+ Binary)]] [type abstract]]) ## https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html (abstract: #export Encoding Text (template [ ] [(def: #export Encoding (:abstraction ))] [ascii "ASCII"] [ibm-37 "IBM037"] [ibm-273 "IBM273"] [ibm-277 "IBM277"] [ibm-278 "IBM278"] [ibm-280 "IBM280"] [ibm-284 "IBM284"] [ibm-285 "IBM285"] [ibm-290 "IBM290"] [ibm-297 "IBM297"] [ibm-300 "IBM300"] [ibm-420 "IBM420"] [ibm-424 "IBM424"] [ibm-437 "IBM437"] [ibm-500 "IBM500"] [ibm-737 "IBM737"] [ibm-775 "IBM775"] [ibm-833 "IBM833"] [ibm-834 "IBM834"] [ibm-838 "IBM-Thai"] [ibm-850 "IBM850"] [ibm-852 "IBM852"] [ibm-855 "IBM855"] [ibm-856 "IBM856"] [ibm-857 "IBM857"] [ibm-858 "IBM00858"] [ibm-860 "IBM860"] [ibm-861 "IBM861"] [ibm-862 "IBM862"] [ibm-863 "IBM863"] [ibm-864 "IBM864"] [ibm-865 "IBM865"] [ibm-866 "IBM866"] [ibm-868 "IBM868"] [ibm-869 "IBM869"] [ibm-870 "IBM870"] [ibm-871 "IBM871"] [ibm-874 "IBM874"] [ibm-875 "IBM875"] [ibm-918 "IBM918"] [ibm-921 "IBM921"] [ibm-922 "IBM922"] [ibm-930 "IBM930"] [ibm-933 "IBM933"] [ibm-935 "IBM935"] [ibm-937 "IBM937"] [ibm-939 "IBM939"] [ibm-942 "IBM942"] [ibm-942c "IBM942C"] [ibm-943 "IBM943"] [ibm-943c "IBM943C"] [ibm-948 "IBM948"] [ibm-949 "IBM949"] [ibm-949c "IBM949C"] [ibm-950 "IBM950"] [ibm-964 "IBM964"] [ibm-970 "IBM970"] [ibm-1006 "IBM1006"] [ibm-1025 "IBM1025"] [ibm-1026 "IBM1026"] [ibm-1046 "IBM1046"] [ibm-1047 "IBM1047"] [ibm-1097 "IBM1097"] [ibm-1098 "IBM1098"] [ibm-1112 "IBM1112"] [ibm-1122 "IBM1122"] [ibm-1123 "IBM1123"] [ibm-1124 "IBM1124"] [ibm-1140 "IBM01140"] [ibm-1141 "IBM01141"] [ibm-1142 "IBM01142"] [ibm-1143 "IBM01143"] [ibm-1144 "IBM01144"] [ibm-1145 "IBM01145"] [ibm-1146 "IBM01146"] [ibm-1147 "IBM01147"] [ibm-1148 "IBM01148"] [ibm-1149 "IBM01149"] [ibm-1166 "IBM1166"] [ibm-1364 "IBM1364"] [ibm-1381 "IBM1381"] [ibm-1383 "IBM1383"] [ibm-33722 "IBM33722"] [iso-2022-cn "ISO-2022-CN"] [iso2022-cn-cns "ISO2022-CN-CNS"] [iso2022-cn-gb "ISO2022-CN-GB"] [iso-2022-jp "ISO-2022-JP"] [iso-2022-jp-2 "ISO-2022-JP-2"] [iso-2022-kr "ISO-2022-KR"] [iso-8859-1 "ISO-8859-1"] [iso-8859-2 "ISO-8859-2"] [iso-8859-3 "ISO-8859-3"] [iso-8859-4 "ISO-8859-4"] [iso-8859-5 "ISO-8859-5"] [iso-8859-6 "ISO-8859-6"] [iso-8859-7 "ISO-8859-7"] [iso-8859-8 "ISO-8859-8"] [iso-8859-9 "ISO-8859-9"] [iso-8859-11 "iso-8859-11"] [iso-8859-13 "ISO-8859-13"] [iso-8859-15 "ISO-8859-15"] [mac-arabic "MacArabic"] [mac-central-europe "MacCentralEurope"] [mac-croatian "MacCroatian"] [mac-cyrillic "MacCyrillic"] [mac-dingbat "MacDingbat"] [mac-greek "MacGreek"] [mac-hebrew "MacHebrew"] [mac-iceland "MacIceland"] [mac-roman "MacRoman"] [mac-romania "MacRomania"] [mac-symbol "MacSymbol"] [mac-thai "MacThai"] [mac-turkish "MacTurkish"] [mac-ukraine "MacUkraine"] [utf-8 "UTF-8"] [utf-16 "UTF-16"] [utf-32 "UTF-32"] [windows-31j "windows-31j"] [windows-874 "windows-874"] [windows-949 "windows-949"] [windows-950 "windows-950"] [windows-1250 "windows-1250"] [windows-1252 "windows-1252"] [windows-1251 "windows-1251"] [windows-1253 "windows-1253"] [windows-1254 "windows-1254"] [windows-1255 "windows-1255"] [windows-1256 "windows-1256"] [windows-1257 "windows-1257"] [windows-1258 "windows-1258"] [windows-iso2022jp "windows-iso2022jp"] [windows-50220 "windows-50220"] [windows-50221 "windows-50221"] [cesu-8 "CESU-8"] [koi8-r "KOI8-R"] [koi8-u "KOI8-U"] ) (def: #export name (-> Encoding Text) (|>> :representation)) ) (with-expansions [ (as-is (host.import: #long java/lang/String (new [[byte] java/lang/String]) (getBytes [java/lang/String] [byte])))] (for {@.old (as-is ) @.jvm (as-is ) @.js (as-is (host.import: Uint8Array) ## On Node (host.import: Buffer (#static from #as from|encode [host.String host.String] Buffer) (#static from #as from|decode [Uint8Array] Buffer) (toString [host.String] host.String)) ## On the browser (host.import: TextEncoder (new [host.String]) (encode [host.String] Uint8Array)) (host.import: TextDecoder (new [host.String]) (decode [Uint8Array] host.String)))})) (def: #export (to-utf8 value) (-> Text Binary) (for {@.old (java/lang/String::getBytes (..name ..utf-8) ## The coercion below may seem ## gratuitous, but removing it ## causes a grave compilation problem. (:coerce java/lang/String value)) @.jvm (java/lang/String::getBytes (..name ..utf-8) value) @.js (cond host.on-nashorn? (:coerce Binary ("js object do" "getBytes" value ["utf8"])) host.on-node-js? (|> (Buffer::from|encode [value "utf8"]) ## This coercion is valid as per NodeJS's documentation: ## https://nodejs.org/api/buffer.html#buffer_buffers_and_typedarrays (:coerce Uint8Array)) ## On the browser (|> (TextEncoder::new [(..name ..utf-8)]) (TextEncoder::encode [value])) )})) (def: #export (from-utf8 value) (-> Binary (Try Text)) (for {@.old (#try.Success (java/lang/String::new value (..name ..utf-8))) @.jvm (#try.Success (java/lang/String::new value (..name ..utf-8))) @.js (cond host.on-nashorn? (|> ("js object new" ("js constant" "java.lang.String") [value "utf8"]) (:coerce Text) #try.Success) host.on-node-js? (|> (Buffer::from|decode [value]) (Buffer::toString ["utf8"]) #try.Success) ## On the browser (|> (TextDecoder::new [(..name ..utf-8)]) (TextDecoder::decode [value]) #try.Success))})) (structure: #export UTF-8 (Codec Binary Text) (def: encode ..to-utf8) (def: decode ..from-utf8))