aboutsummaryrefslogtreecommitdiff
path: root/stdlib/source/library/lux/data/text/encoding/utf8.lux
diff options
context:
space:
mode:
Diffstat (limited to 'stdlib/source/library/lux/data/text/encoding/utf8.lux')
-rw-r--r--stdlib/source/library/lux/data/text/encoding/utf8.lux164
1 files changed, 164 insertions, 0 deletions
diff --git a/stdlib/source/library/lux/data/text/encoding/utf8.lux b/stdlib/source/library/lux/data/text/encoding/utf8.lux
new file mode 100644
index 000000000..b24c88837
--- /dev/null
+++ b/stdlib/source/library/lux/data/text/encoding/utf8.lux
@@ -0,0 +1,164 @@
+(.module:
+ [library
+ [lux #*
+ ["@" target]
+ ["." ffi]
+ [abstract
+ [codec (#+ Codec)]]
+ [control
+ ["." try (#+ Try)]]
+ [data
+ ["." binary (#+ Binary)]]]]
+ ["." //])
+
+(with_expansions [<jvm> (as_is (ffi.import: java/lang/String
+ ["#::."
+ (new [[byte] java/lang/String])
+ (getBytes [java/lang/String] [byte])]))]
+ (for {@.old (as_is <jvm>)
+ @.jvm (as_is <jvm>)
+
+ @.js
+ (as_is (ffi.import: Uint8Array)
+
+ ## On Node
+ (ffi.import: Buffer
+ ["#::."
+ (#static from #as from|encode [ffi.String ffi.String] Buffer)
+ (#static from #as from|decode [Uint8Array] Buffer)
+ (toString [ffi.String] ffi.String)])
+
+ ## On the browser
+ (ffi.import: TextEncoder
+ ["#::."
+ (new [ffi.String])
+ (encode [ffi.String] Uint8Array)])
+
+ (ffi.import: TextDecoder
+ ["#::."
+ (new [ffi.String])
+ (decode [Uint8Array] ffi.String)]))
+
+ @.ruby
+ (as_is (ffi.import: String #as RubyString
+ ["#::."
+ (encode [Text] RubyString)
+ (force_encoding [Text] Text)
+ (bytes [] Binary)])
+
+ (ffi.import: Array #as RubyArray
+ ["#::."
+ (pack [Text] RubyString)]))
+
+ @.php
+ (as_is (ffi.import: Almost_Binary)
+ (ffi.import: (unpack [ffi.String ffi.String] Almost_Binary))
+ (ffi.import: (array_values [Almost_Binary] Binary))
+ (def: php_byte_array_format "C*"))
+
+ @.scheme
+ ## https://srfi.schemers.org/srfi-140/srfi-140.html
+ (as_is (ffi.import: (string->utf8 [Text] Binary))
+ (ffi.import: (utf8->string [Binary] Text)))}
+ (as_is)))
+
+(def: (encode value)
+ (-> Text Binary)
+ (for {@.old
+ (java/lang/String::getBytes (//.name //.utf_8)
+ ## TODO: Remove coercion below.
+ ## The coercion below may seem
+ ## gratuitous, but removing it
+ ## causes a grave compilation problem.
+ (:as java/lang/String value))
+
+ @.jvm
+ (java/lang/String::getBytes (//.name //.utf_8) value)
+
+ @.js
+ (cond ffi.on_nashorn?
+ (:as Binary ("js object do" "getBytes" value ["utf8"]))
+
+ ffi.on_node_js?
+ (|> (Buffer::from|encode [value "utf8"])
+ ## This coercion is valid as per NodeJS's documentation:
+ ## https://nodejs.org/api/buffer.html#buffer_buffers_and_typedarrays
+ (:as Uint8Array))
+
+ ## On the browser
+ (|> (TextEncoder::new [(//.name //.utf_8)])
+ (TextEncoder::encode [value]))
+ )
+
+ @.python
+ (:as Binary ("python apply" (:assume ("python constant" "bytearray")) value "utf-8"))
+
+ @.lua
+ ("lua utf8 encode" value)
+
+ @.ruby
+ (|> value
+ (:as RubyString)
+ (RubyString::encode ["UTF-8"])
+ (RubyString::bytes []))
+
+ @.php
+ (|> (..unpack [..php_byte_array_format value])
+ ..array_values
+ ("php object new" "ArrayObject")
+ (:as Binary))
+
+ @.scheme
+ (..string->utf8 value)}))
+
+(def: (decode value)
+ (-> Binary (Try Text))
+ (with_expansions [<jvm> (#try.Success (java/lang/String::new value (//.name //.utf_8)))]
+ (for {@.old <jvm>
+ @.jvm <jvm>
+
+ @.js
+ (cond ffi.on_nashorn?
+ (|> ("js object new" ("js constant" "java.lang.String") [value "utf8"])
+ (:as Text)
+ #try.Success)
+
+ ffi.on_node_js?
+ (|> (Buffer::from|decode [value])
+ (Buffer::toString ["utf8"])
+ #try.Success)
+
+ ## On the browser
+ (|> (TextDecoder::new [(//.name //.utf_8)])
+ (TextDecoder::decode [value])
+ #try.Success))
+
+ @.python
+ (try (:as Text ("python object do" "decode" (:assume value) "utf-8")))
+
+ @.lua
+ (#try.Success ("lua utf8 decode" value))
+
+ @.ruby
+ (|> value
+ (:as RubyArray)
+ (RubyArray::pack ["C*"])
+ (:as RubyString)
+ (RubyString::force_encoding ["UTF-8"])
+ #try.Success)
+
+ @.php
+ (|> value
+ ("php pack" ..php_byte_array_format)
+ #try.Success)
+
+ @.scheme
+ (|> value
+ ..utf8->string
+ #try.Success)})))
+
+(implementation: #export codec
+ (Codec Binary Text)
+
+ (def: encode ..encode)
+ (def: decode ..decode))