diff options
Diffstat (limited to '')
-rw-r--r-- | stdlib/source/library/lux/data/text/encoding/utf8.lux | 164 |
1 files changed, 164 insertions, 0 deletions
diff --git a/stdlib/source/library/lux/data/text/encoding/utf8.lux b/stdlib/source/library/lux/data/text/encoding/utf8.lux new file mode 100644 index 000000000..b24c88837 --- /dev/null +++ b/stdlib/source/library/lux/data/text/encoding/utf8.lux @@ -0,0 +1,164 @@ +(.module: + [library + [lux #* + ["@" target] + ["." ffi] + [abstract + [codec (#+ Codec)]] + [control + ["." try (#+ Try)]] + [data + ["." binary (#+ Binary)]]]] + ["." //]) + +(with_expansions [<jvm> (as_is (ffi.import: java/lang/String + ["#::." + (new [[byte] java/lang/String]) + (getBytes [java/lang/String] [byte])]))] + (for {@.old (as_is <jvm>) + @.jvm (as_is <jvm>) + + @.js + (as_is (ffi.import: Uint8Array) + + ## On Node + (ffi.import: Buffer + ["#::." + (#static from #as from|encode [ffi.String ffi.String] Buffer) + (#static from #as from|decode [Uint8Array] Buffer) + (toString [ffi.String] ffi.String)]) + + ## On the browser + (ffi.import: TextEncoder + ["#::." + (new [ffi.String]) + (encode [ffi.String] Uint8Array)]) + + (ffi.import: TextDecoder + ["#::." + (new [ffi.String]) + (decode [Uint8Array] ffi.String)])) + + @.ruby + (as_is (ffi.import: String #as RubyString + ["#::." + (encode [Text] RubyString) + (force_encoding [Text] Text) + (bytes [] Binary)]) + + (ffi.import: Array #as RubyArray + ["#::." + (pack [Text] RubyString)])) + + @.php + (as_is (ffi.import: Almost_Binary) + (ffi.import: (unpack [ffi.String ffi.String] Almost_Binary)) + (ffi.import: (array_values [Almost_Binary] Binary)) + (def: php_byte_array_format "C*")) + + @.scheme + ## https://srfi.schemers.org/srfi-140/srfi-140.html + (as_is (ffi.import: (string->utf8 [Text] Binary)) + (ffi.import: (utf8->string [Binary] Text)))} + (as_is))) + +(def: (encode value) + (-> Text Binary) + (for {@.old + (java/lang/String::getBytes (//.name //.utf_8) + ## TODO: Remove coercion below. + ## The coercion below may seem + ## gratuitous, but removing it + ## causes a grave compilation problem. + (:as java/lang/String value)) + + @.jvm + (java/lang/String::getBytes (//.name //.utf_8) value) + + @.js + (cond ffi.on_nashorn? + (:as Binary ("js object do" "getBytes" value ["utf8"])) + + ffi.on_node_js? + (|> (Buffer::from|encode [value "utf8"]) + ## This coercion is valid as per NodeJS's documentation: + ## https://nodejs.org/api/buffer.html#buffer_buffers_and_typedarrays + (:as Uint8Array)) + + ## On the browser + (|> (TextEncoder::new [(//.name //.utf_8)]) + (TextEncoder::encode [value])) + ) + + @.python + (:as Binary ("python apply" (:assume ("python constant" "bytearray")) value "utf-8")) + + @.lua + ("lua utf8 encode" value) + + @.ruby + (|> value + (:as RubyString) + (RubyString::encode ["UTF-8"]) + (RubyString::bytes [])) + + @.php + (|> (..unpack [..php_byte_array_format value]) + ..array_values + ("php object new" "ArrayObject") + (:as Binary)) + + @.scheme + (..string->utf8 value)})) + +(def: (decode value) + (-> Binary (Try Text)) + (with_expansions [<jvm> (#try.Success (java/lang/String::new value (//.name //.utf_8)))] + (for {@.old <jvm> + @.jvm <jvm> + + @.js + (cond ffi.on_nashorn? + (|> ("js object new" ("js constant" "java.lang.String") [value "utf8"]) + (:as Text) + #try.Success) + + ffi.on_node_js? + (|> (Buffer::from|decode [value]) + (Buffer::toString ["utf8"]) + #try.Success) + + ## On the browser + (|> (TextDecoder::new [(//.name //.utf_8)]) + (TextDecoder::decode [value]) + #try.Success)) + + @.python + (try (:as Text ("python object do" "decode" (:assume value) "utf-8"))) + + @.lua + (#try.Success ("lua utf8 decode" value)) + + @.ruby + (|> value + (:as RubyArray) + (RubyArray::pack ["C*"]) + (:as RubyString) + (RubyString::force_encoding ["UTF-8"]) + #try.Success) + + @.php + (|> value + ("php pack" ..php_byte_array_format) + #try.Success) + + @.scheme + (|> value + ..utf8->string + #try.Success)}))) + +(implementation: #export codec + (Codec Binary Text) + + (def: encode ..encode) + (def: decode ..decode)) |