diff options
Diffstat (limited to '')
-rw-r--r-- | stdlib/source/lux/data/format/binary.lux | 7 | ||||
-rw-r--r-- | stdlib/source/lux/data/format/tar.lux | 31 | ||||
-rw-r--r-- | stdlib/source/lux/data/text/encoding.lux | 155 | ||||
-rw-r--r-- | stdlib/source/lux/data/text/encoding/utf8.lux | 158 |
4 files changed, 178 insertions, 173 deletions
diff --git a/stdlib/source/lux/data/format/binary.lux b/stdlib/source/lux/data/format/binary.lux index 62ef08f4b..0cf1fbdd0 100644 --- a/stdlib/source/lux/data/format/binary.lux +++ b/stdlib/source/lux/data/format/binary.lux @@ -15,8 +15,9 @@ ["." product] ["." binary (#+ Binary)] [text - ["." encoding] - ["%" format (#+ format)]] + ["%" format (#+ format)] + [encoding + ["." utf8]]] [collection ["." list] ["." row (#+ Row) ("#\." functor)] @@ -164,7 +165,7 @@ (template [<name> <binary>] [(def: #export <name> (Writer Text) - (|>> (\ encoding.utf8 encode) <binary>))] + (|>> (\ utf8.codec encode) <binary>))] [utf8/8 ..binary/8] [utf8/16 ..binary/16] diff --git a/stdlib/source/lux/data/format/tar.lux b/stdlib/source/lux/data/format/tar.lux index 7d4968239..504b7f5ac 100644 --- a/stdlib/source/lux/data/format/tar.lux +++ b/stdlib/source/lux/data/format/tar.lux @@ -13,7 +13,8 @@ ["." binary (#+ Binary)] ["." text (#+ Char) ["%" format (#+ format)] - ["." encoding]] + [encoding + ["." utf8]]] ["." format #_ ["#" binary (#+ Writer) ("#\." monoid)]] [collection @@ -86,7 +87,7 @@ (\ n.octal encode) (..octal_padding <size>) (text.suffix suffix) - (\ encoding.utf8 encode) + (\ utf8.codec encode) (format.segment padded_size)))) (def: <coercion> @@ -127,7 +128,7 @@ (Parser Small) (do <>.monad [digits (<b>.segment ..small_size) - digits (<>.lift (\ encoding.utf8 decode digits)) + digits (<>.lift (\ utf8.codec decode digits)) _ ..verify_small_suffix] (<>.lift (do {! try.monad} @@ -138,7 +139,7 @@ (Parser Big) (do <>.monad [digits (<b>.segment ..big_size) - digits (<>.lift (\ encoding.utf8 decode digits)) + digits (<>.lift (\ utf8.codec decode digits)) end <b>.bits/8 _ (let [expected (`` (char (~~ (static ..blank))))] (<>.assert (exception.construct ..wrong_character [expected end]) @@ -169,7 +170,7 @@ (def: checksum_checksum (|> ..dummy_checksum :representation - (\ encoding.utf8 encode) + (\ utf8.codec encode) ..checksum)) (def: checksum_code @@ -187,14 +188,14 @@ (let [padded_size (n.+ (text.size ..checksum_suffix) ..small_size)] (|>> :representation - (\ encoding.utf8 encode) + (\ utf8.codec encode) (format.segment padded_size)))) (def: checksum_parser (Parser [Nat Checksum]) (do <>.monad [ascii (<b>.segment ..small_size) - digits (<>.lift (\ encoding.utf8 decode ascii)) + digits (<>.lift (\ utf8.codec decode ascii)) _ ..verify_small_suffix value (<>.lift (\ n.octal decode digits))] @@ -208,7 +209,7 @@ (def: ascii? (-> Text Bit) - (|>> (\ encoding.utf8 encode) + (|>> (\ utf8.codec encode) (binary.fold (function (_ char verdict) (.and verdict (n.<= ..last_ascii char))) @@ -227,7 +228,7 @@ 0 (#try.Success string) size (loop [end (dec size)] (case end - 0 (#try.Success (\ encoding.utf8 encode "")) + 0 (#try.Success (\ utf8.codec encode "")) _ (do try.monad [last_char (binary.read/8 end string)] (`` (case (.nat last_char) @@ -250,7 +251,7 @@ (def: #export (<in> value) (-> <representation> (Try <type>)) (if (..ascii? value) - (if (|> value (\ encoding.utf8 encode) binary.size (n.<= <size>)) + (if (|> value (\ utf8.codec encode) binary.size (n.<= <size>)) (#try.Success (:abstraction value)) (exception.throw <exception> [value])) (exception.throw ..not_ascii [value]))) @@ -265,7 +266,7 @@ padded_size (n.+ (text.size suffix) <size>)] (|>> :representation (text.suffix suffix) - (\ encoding.utf8 encode) + (\ utf8.codec encode) (format.segment padded_size)))) (def: <parser> @@ -279,7 +280,7 @@ (<>.lift (do {! try.monad} [ascii (..un_pad string) - text (\ encoding.utf8 decode ascii)] + text (\ utf8.codec decode ascii)] (<in> text))))) (def: #export <none> @@ -307,7 +308,7 @@ (let [padded_size (n.+ (text.size ..null) ..magic_size)] (|>> :representation - (\ encoding.utf8 encode) + (\ utf8.codec encode) (format.segment padded_size)))) (def: magic_parser @@ -320,7 +321,7 @@ (n.= expected end))] (<>.lift (\ try.monad map (|>> :abstraction) - (\ encoding.utf8 decode string))))) + (\ utf8.codec decode string))))) ) (def: block_size Size 512) @@ -742,7 +743,7 @@ (-> Checksum Binary Nat) (let [|checksum| (|> checksum ..from_checksum - (\ encoding.utf8 encode) + (\ utf8.codec encode) ..checksum)] (|> (..checksum header) (n.- |checksum|) diff --git a/stdlib/source/lux/data/text/encoding.lux b/stdlib/source/lux/data/text/encoding.lux index 7445d5ebc..92f68dfe0 100644 --- a/stdlib/source/lux/data/text/encoding.lux +++ b/stdlib/source/lux/data/text/encoding.lux @@ -1,13 +1,5 @@ (.module: [lux #* - ["@" target] - ["." ffi] - [abstract - [codec (#+ Codec)]] - [control - ["." try (#+ Try)]] - [data - ["." binary (#+ Binary)]] [type abstract]]) @@ -168,150 +160,3 @@ (-> Encoding Text) (|>> :representation)) ) - -(with_expansions [<jvm> (as_is (ffi.import: java/lang/String - ["#::." - (new [[byte] java/lang/String]) - (getBytes [java/lang/String] [byte])]))] - (for {@.old (as_is <jvm>) - @.jvm (as_is <jvm>) - - @.js - (as_is (ffi.import: Uint8Array) - - ## On Node - (ffi.import: Buffer - (#static from #as from|encode [ffi.String ffi.String] Buffer) - (#static from #as from|decode [Uint8Array] Buffer) - (toString [ffi.String] ffi.String)) - - ## On the browser - (ffi.import: TextEncoder - (new [ffi.String]) - (encode [ffi.String] Uint8Array)) - - (ffi.import: TextDecoder - (new [ffi.String]) - (decode [Uint8Array] ffi.String))) - - @.ruby - (as_is (ffi.import: String #as RubyString - (encode [Text] RubyString) - (force_encoding [Text] Text) - (bytes [] Binary)) - - (ffi.import: Array #as RubyArray - (pack [Text] RubyString))) - - @.php - (as_is (ffi.import: Almost_Binary) - (ffi.import: (unpack [ffi.String ffi.String] Almost_Binary)) - (ffi.import: (array_values [Almost_Binary] Binary)) - (def: php_byte_array_format "C*")) - - @.scheme - ## https://srfi.schemers.org/srfi-140/srfi-140.html - (as_is (ffi.import: (string->utf8 [Text] Binary)) - (ffi.import: (utf8->string [Binary] Text)))} - (as_is))) - -(def: (utf8\encode value) - (-> Text Binary) - (for {@.old - (java/lang/String::getBytes (..name ..utf_8) - ## TODO: Remove coercion below. - ## The coercion below may seem - ## gratuitous, but removing it - ## causes a grave compilation problem. - (:coerce java/lang/String value)) - - @.jvm - (java/lang/String::getBytes (..name ..utf_8) value) - - @.js - (cond ffi.on_nashorn? - (:coerce Binary ("js object do" "getBytes" value ["utf8"])) - - ffi.on_node_js? - (|> (Buffer::from|encode [value "utf8"]) - ## This coercion is valid as per NodeJS's documentation: - ## https://nodejs.org/api/buffer.html#buffer_buffers_and_typedarrays - (:coerce Uint8Array)) - - ## On the browser - (|> (TextEncoder::new [(..name ..utf_8)]) - (TextEncoder::encode [value])) - ) - - @.python - (:coerce Binary ("python apply" (:assume ("python constant" "bytearray")) value "utf-8")) - - @.lua - ("lua utf8 encode" value) - - @.ruby - (|> value - (:coerce RubyString) - (RubyString::encode ["UTF-8"]) - (RubyString::bytes [])) - - @.php - (|> (..unpack [..php_byte_array_format value]) - ..array_values - ("php object new" "ArrayObject") - (:coerce Binary)) - - @.scheme - (..string->utf8 value)})) - -(def: (utf8\decode value) - (-> Binary (Try Text)) - (with_expansions [<jvm> (#try.Success (java/lang/String::new value (..name ..utf_8)))] - (for {@.old <jvm> - @.jvm <jvm> - - @.js - (cond ffi.on_nashorn? - (|> ("js object new" ("js constant" "java.lang.String") [value "utf8"]) - (:coerce Text) - #try.Success) - - ffi.on_node_js? - (|> (Buffer::from|decode [value]) - (Buffer::toString ["utf8"]) - #try.Success) - - ## On the browser - (|> (TextDecoder::new [(..name ..utf_8)]) - (TextDecoder::decode [value]) - #try.Success)) - - @.python - (ffi.try (:coerce Text ("python object do" "decode" (:assume value) "utf-8"))) - - @.lua - (#try.Success ("lua utf8 decode" value)) - - @.ruby - (|> value - (:coerce RubyArray) - (RubyArray::pack ["C*"]) - (:coerce RubyString) - (RubyString::force_encoding ["UTF-8"]) - #try.Success) - - @.php - (|> value - ("php pack" ..php_byte_array_format) - #try.Success) - - @.scheme - (|> value - ..utf8->string - #try.Success)}))) - -(structure: #export utf8 - (Codec Binary Text) - - (def: encode ..utf8\encode) - (def: decode ..utf8\decode)) diff --git a/stdlib/source/lux/data/text/encoding/utf8.lux b/stdlib/source/lux/data/text/encoding/utf8.lux new file mode 100644 index 000000000..01e4cd8a5 --- /dev/null +++ b/stdlib/source/lux/data/text/encoding/utf8.lux @@ -0,0 +1,158 @@ +(.module: + [lux #* + ["@" target] + ["." ffi] + [abstract + [codec (#+ Codec)]] + [control + ["." try (#+ Try)]] + [data + ["." binary (#+ Binary)]]] + ["." //]) + +(with_expansions [<jvm> (as_is (ffi.import: java/lang/String + ["#::." + (new [[byte] java/lang/String]) + (getBytes [java/lang/String] [byte])]))] + (for {@.old (as_is <jvm>) + @.jvm (as_is <jvm>) + + @.js + (as_is (ffi.import: Uint8Array) + + ## On Node + (ffi.import: Buffer + (#static from #as from|encode [ffi.String ffi.String] Buffer) + (#static from #as from|decode [Uint8Array] Buffer) + (toString [ffi.String] ffi.String)) + + ## On the browser + (ffi.import: TextEncoder + (new [ffi.String]) + (encode [ffi.String] Uint8Array)) + + (ffi.import: TextDecoder + (new [ffi.String]) + (decode [Uint8Array] ffi.String))) + + @.ruby + (as_is (ffi.import: String #as RubyString + (encode [Text] RubyString) + (force_encoding [Text] Text) + (bytes [] Binary)) + + (ffi.import: Array #as RubyArray + (pack [Text] RubyString))) + + @.php + (as_is (ffi.import: Almost_Binary) + (ffi.import: (unpack [ffi.String ffi.String] Almost_Binary)) + (ffi.import: (array_values [Almost_Binary] Binary)) + (def: php_byte_array_format "C*")) + + @.scheme + ## https://srfi.schemers.org/srfi-140/srfi-140.html + (as_is (ffi.import: (string->utf8 [Text] Binary)) + (ffi.import: (utf8->string [Binary] Text)))} + (as_is))) + +(def: (encode value) + (-> Text Binary) + (for {@.old + (java/lang/String::getBytes (//.name //.utf_8) + ## TODO: Remove coercion below. + ## The coercion below may seem + ## gratuitous, but removing it + ## causes a grave compilation problem. + (:coerce java/lang/String value)) + + @.jvm + (java/lang/String::getBytes (//.name //.utf_8) value) + + @.js + (cond ffi.on_nashorn? + (:coerce Binary ("js object do" "getBytes" value ["utf8"])) + + ffi.on_node_js? + (|> (Buffer::from|encode [value "utf8"]) + ## This coercion is valid as per NodeJS's documentation: + ## https://nodejs.org/api/buffer.html#buffer_buffers_and_typedarrays + (:coerce Uint8Array)) + + ## On the browser + (|> (TextEncoder::new [(//.name //.utf_8)]) + (TextEncoder::encode [value])) + ) + + @.python + (:coerce Binary ("python apply" (:assume ("python constant" "bytearray")) value "utf-8")) + + @.lua + ("lua utf8 encode" value) + + @.ruby + (|> value + (:coerce RubyString) + (RubyString::encode ["UTF-8"]) + (RubyString::bytes [])) + + @.php + (|> (..unpack [..php_byte_array_format value]) + ..array_values + ("php object new" "ArrayObject") + (:coerce Binary)) + + @.scheme + (..string->utf8 value)})) + +(def: (decode value) + (-> Binary (Try Text)) + (with_expansions [<jvm> (#try.Success (java/lang/String::new value (//.name //.utf_8)))] + (for {@.old <jvm> + @.jvm <jvm> + + @.js + (cond ffi.on_nashorn? + (|> ("js object new" ("js constant" "java.lang.String") [value "utf8"]) + (:coerce Text) + #try.Success) + + ffi.on_node_js? + (|> (Buffer::from|decode [value]) + (Buffer::toString ["utf8"]) + #try.Success) + + ## On the browser + (|> (TextDecoder::new [(//.name //.utf_8)]) + (TextDecoder::decode [value]) + #try.Success)) + + @.python + (ffi.try (:coerce Text ("python object do" "decode" (:assume value) "utf-8"))) + + @.lua + (#try.Success ("lua utf8 decode" value)) + + @.ruby + (|> value + (:coerce RubyArray) + (RubyArray::pack ["C*"]) + (:coerce RubyString) + (RubyString::force_encoding ["UTF-8"]) + #try.Success) + + @.php + (|> value + ("php pack" ..php_byte_array_format) + #try.Success) + + @.scheme + (|> value + ..utf8->string + #try.Success)}))) + +(structure: #export codec + (Codec Binary Text) + + (def: encode ..encode) + (def: decode ..decode)) |