aboutsummaryrefslogtreecommitdiff
path: root/stdlib/source/lux/data
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--stdlib/source/lux/data/format/binary.lux7
-rw-r--r--stdlib/source/lux/data/format/tar.lux31
-rw-r--r--stdlib/source/lux/data/text/encoding.lux155
-rw-r--r--stdlib/source/lux/data/text/encoding/utf8.lux158
4 files changed, 178 insertions, 173 deletions
diff --git a/stdlib/source/lux/data/format/binary.lux b/stdlib/source/lux/data/format/binary.lux
index 62ef08f4b..0cf1fbdd0 100644
--- a/stdlib/source/lux/data/format/binary.lux
+++ b/stdlib/source/lux/data/format/binary.lux
@@ -15,8 +15,9 @@
["." product]
["." binary (#+ Binary)]
[text
- ["." encoding]
- ["%" format (#+ format)]]
+ ["%" format (#+ format)]
+ [encoding
+ ["." utf8]]]
[collection
["." list]
["." row (#+ Row) ("#\." functor)]
@@ -164,7 +165,7 @@
(template [<name> <binary>]
[(def: #export <name>
(Writer Text)
- (|>> (\ encoding.utf8 encode) <binary>))]
+ (|>> (\ utf8.codec encode) <binary>))]
[utf8/8 ..binary/8]
[utf8/16 ..binary/16]
diff --git a/stdlib/source/lux/data/format/tar.lux b/stdlib/source/lux/data/format/tar.lux
index 7d4968239..504b7f5ac 100644
--- a/stdlib/source/lux/data/format/tar.lux
+++ b/stdlib/source/lux/data/format/tar.lux
@@ -13,7 +13,8 @@
["." binary (#+ Binary)]
["." text (#+ Char)
["%" format (#+ format)]
- ["." encoding]]
+ [encoding
+ ["." utf8]]]
["." format #_
["#" binary (#+ Writer) ("#\." monoid)]]
[collection
@@ -86,7 +87,7 @@
(\ n.octal encode)
(..octal_padding <size>)
(text.suffix suffix)
- (\ encoding.utf8 encode)
+ (\ utf8.codec encode)
(format.segment padded_size))))
(def: <coercion>
@@ -127,7 +128,7 @@
(Parser Small)
(do <>.monad
[digits (<b>.segment ..small_size)
- digits (<>.lift (\ encoding.utf8 decode digits))
+ digits (<>.lift (\ utf8.codec decode digits))
_ ..verify_small_suffix]
(<>.lift
(do {! try.monad}
@@ -138,7 +139,7 @@
(Parser Big)
(do <>.monad
[digits (<b>.segment ..big_size)
- digits (<>.lift (\ encoding.utf8 decode digits))
+ digits (<>.lift (\ utf8.codec decode digits))
end <b>.bits/8
_ (let [expected (`` (char (~~ (static ..blank))))]
(<>.assert (exception.construct ..wrong_character [expected end])
@@ -169,7 +170,7 @@
(def: checksum_checksum
(|> ..dummy_checksum
:representation
- (\ encoding.utf8 encode)
+ (\ utf8.codec encode)
..checksum))
(def: checksum_code
@@ -187,14 +188,14 @@
(let [padded_size (n.+ (text.size ..checksum_suffix)
..small_size)]
(|>> :representation
- (\ encoding.utf8 encode)
+ (\ utf8.codec encode)
(format.segment padded_size))))
(def: checksum_parser
(Parser [Nat Checksum])
(do <>.monad
[ascii (<b>.segment ..small_size)
- digits (<>.lift (\ encoding.utf8 decode ascii))
+ digits (<>.lift (\ utf8.codec decode ascii))
_ ..verify_small_suffix
value (<>.lift
(\ n.octal decode digits))]
@@ -208,7 +209,7 @@
(def: ascii?
(-> Text Bit)
- (|>> (\ encoding.utf8 encode)
+ (|>> (\ utf8.codec encode)
(binary.fold (function (_ char verdict)
(.and verdict
(n.<= ..last_ascii char)))
@@ -227,7 +228,7 @@
0 (#try.Success string)
size (loop [end (dec size)]
(case end
- 0 (#try.Success (\ encoding.utf8 encode ""))
+ 0 (#try.Success (\ utf8.codec encode ""))
_ (do try.monad
[last_char (binary.read/8 end string)]
(`` (case (.nat last_char)
@@ -250,7 +251,7 @@
(def: #export (<in> value)
(-> <representation> (Try <type>))
(if (..ascii? value)
- (if (|> value (\ encoding.utf8 encode) binary.size (n.<= <size>))
+ (if (|> value (\ utf8.codec encode) binary.size (n.<= <size>))
(#try.Success (:abstraction value))
(exception.throw <exception> [value]))
(exception.throw ..not_ascii [value])))
@@ -265,7 +266,7 @@
padded_size (n.+ (text.size suffix) <size>)]
(|>> :representation
(text.suffix suffix)
- (\ encoding.utf8 encode)
+ (\ utf8.codec encode)
(format.segment padded_size))))
(def: <parser>
@@ -279,7 +280,7 @@
(<>.lift
(do {! try.monad}
[ascii (..un_pad string)
- text (\ encoding.utf8 decode ascii)]
+ text (\ utf8.codec decode ascii)]
(<in> text)))))
(def: #export <none>
@@ -307,7 +308,7 @@
(let [padded_size (n.+ (text.size ..null)
..magic_size)]
(|>> :representation
- (\ encoding.utf8 encode)
+ (\ utf8.codec encode)
(format.segment padded_size))))
(def: magic_parser
@@ -320,7 +321,7 @@
(n.= expected end))]
(<>.lift
(\ try.monad map (|>> :abstraction)
- (\ encoding.utf8 decode string)))))
+ (\ utf8.codec decode string)))))
)
(def: block_size Size 512)
@@ -742,7 +743,7 @@
(-> Checksum Binary Nat)
(let [|checksum| (|> checksum
..from_checksum
- (\ encoding.utf8 encode)
+ (\ utf8.codec encode)
..checksum)]
(|> (..checksum header)
(n.- |checksum|)
diff --git a/stdlib/source/lux/data/text/encoding.lux b/stdlib/source/lux/data/text/encoding.lux
index 7445d5ebc..92f68dfe0 100644
--- a/stdlib/source/lux/data/text/encoding.lux
+++ b/stdlib/source/lux/data/text/encoding.lux
@@ -1,13 +1,5 @@
(.module:
[lux #*
- ["@" target]
- ["." ffi]
- [abstract
- [codec (#+ Codec)]]
- [control
- ["." try (#+ Try)]]
- [data
- ["." binary (#+ Binary)]]
[type
abstract]])
@@ -168,150 +160,3 @@
(-> Encoding Text)
(|>> :representation))
)
-
-(with_expansions [<jvm> (as_is (ffi.import: java/lang/String
- ["#::."
- (new [[byte] java/lang/String])
- (getBytes [java/lang/String] [byte])]))]
- (for {@.old (as_is <jvm>)
- @.jvm (as_is <jvm>)
-
- @.js
- (as_is (ffi.import: Uint8Array)
-
- ## On Node
- (ffi.import: Buffer
- (#static from #as from|encode [ffi.String ffi.String] Buffer)
- (#static from #as from|decode [Uint8Array] Buffer)
- (toString [ffi.String] ffi.String))
-
- ## On the browser
- (ffi.import: TextEncoder
- (new [ffi.String])
- (encode [ffi.String] Uint8Array))
-
- (ffi.import: TextDecoder
- (new [ffi.String])
- (decode [Uint8Array] ffi.String)))
-
- @.ruby
- (as_is (ffi.import: String #as RubyString
- (encode [Text] RubyString)
- (force_encoding [Text] Text)
- (bytes [] Binary))
-
- (ffi.import: Array #as RubyArray
- (pack [Text] RubyString)))
-
- @.php
- (as_is (ffi.import: Almost_Binary)
- (ffi.import: (unpack [ffi.String ffi.String] Almost_Binary))
- (ffi.import: (array_values [Almost_Binary] Binary))
- (def: php_byte_array_format "C*"))
-
- @.scheme
- ## https://srfi.schemers.org/srfi-140/srfi-140.html
- (as_is (ffi.import: (string->utf8 [Text] Binary))
- (ffi.import: (utf8->string [Binary] Text)))}
- (as_is)))
-
-(def: (utf8\encode value)
- (-> Text Binary)
- (for {@.old
- (java/lang/String::getBytes (..name ..utf_8)
- ## TODO: Remove coercion below.
- ## The coercion below may seem
- ## gratuitous, but removing it
- ## causes a grave compilation problem.
- (:coerce java/lang/String value))
-
- @.jvm
- (java/lang/String::getBytes (..name ..utf_8) value)
-
- @.js
- (cond ffi.on_nashorn?
- (:coerce Binary ("js object do" "getBytes" value ["utf8"]))
-
- ffi.on_node_js?
- (|> (Buffer::from|encode [value "utf8"])
- ## This coercion is valid as per NodeJS's documentation:
- ## https://nodejs.org/api/buffer.html#buffer_buffers_and_typedarrays
- (:coerce Uint8Array))
-
- ## On the browser
- (|> (TextEncoder::new [(..name ..utf_8)])
- (TextEncoder::encode [value]))
- )
-
- @.python
- (:coerce Binary ("python apply" (:assume ("python constant" "bytearray")) value "utf-8"))
-
- @.lua
- ("lua utf8 encode" value)
-
- @.ruby
- (|> value
- (:coerce RubyString)
- (RubyString::encode ["UTF-8"])
- (RubyString::bytes []))
-
- @.php
- (|> (..unpack [..php_byte_array_format value])
- ..array_values
- ("php object new" "ArrayObject")
- (:coerce Binary))
-
- @.scheme
- (..string->utf8 value)}))
-
-(def: (utf8\decode value)
- (-> Binary (Try Text))
- (with_expansions [<jvm> (#try.Success (java/lang/String::new value (..name ..utf_8)))]
- (for {@.old <jvm>
- @.jvm <jvm>
-
- @.js
- (cond ffi.on_nashorn?
- (|> ("js object new" ("js constant" "java.lang.String") [value "utf8"])
- (:coerce Text)
- #try.Success)
-
- ffi.on_node_js?
- (|> (Buffer::from|decode [value])
- (Buffer::toString ["utf8"])
- #try.Success)
-
- ## On the browser
- (|> (TextDecoder::new [(..name ..utf_8)])
- (TextDecoder::decode [value])
- #try.Success))
-
- @.python
- (ffi.try (:coerce Text ("python object do" "decode" (:assume value) "utf-8")))
-
- @.lua
- (#try.Success ("lua utf8 decode" value))
-
- @.ruby
- (|> value
- (:coerce RubyArray)
- (RubyArray::pack ["C*"])
- (:coerce RubyString)
- (RubyString::force_encoding ["UTF-8"])
- #try.Success)
-
- @.php
- (|> value
- ("php pack" ..php_byte_array_format)
- #try.Success)
-
- @.scheme
- (|> value
- ..utf8->string
- #try.Success)})))
-
-(structure: #export utf8
- (Codec Binary Text)
-
- (def: encode ..utf8\encode)
- (def: decode ..utf8\decode))
diff --git a/stdlib/source/lux/data/text/encoding/utf8.lux b/stdlib/source/lux/data/text/encoding/utf8.lux
new file mode 100644
index 000000000..01e4cd8a5
--- /dev/null
+++ b/stdlib/source/lux/data/text/encoding/utf8.lux
@@ -0,0 +1,158 @@
+(.module:
+ [lux #*
+ ["@" target]
+ ["." ffi]
+ [abstract
+ [codec (#+ Codec)]]
+ [control
+ ["." try (#+ Try)]]
+ [data
+ ["." binary (#+ Binary)]]]
+ ["." //])
+
+(with_expansions [<jvm> (as_is (ffi.import: java/lang/String
+ ["#::."
+ (new [[byte] java/lang/String])
+ (getBytes [java/lang/String] [byte])]))]
+ (for {@.old (as_is <jvm>)
+ @.jvm (as_is <jvm>)
+
+ @.js
+ (as_is (ffi.import: Uint8Array)
+
+ ## On Node
+ (ffi.import: Buffer
+ (#static from #as from|encode [ffi.String ffi.String] Buffer)
+ (#static from #as from|decode [Uint8Array] Buffer)
+ (toString [ffi.String] ffi.String))
+
+ ## On the browser
+ (ffi.import: TextEncoder
+ (new [ffi.String])
+ (encode [ffi.String] Uint8Array))
+
+ (ffi.import: TextDecoder
+ (new [ffi.String])
+ (decode [Uint8Array] ffi.String)))
+
+ @.ruby
+ (as_is (ffi.import: String #as RubyString
+ (encode [Text] RubyString)
+ (force_encoding [Text] Text)
+ (bytes [] Binary))
+
+ (ffi.import: Array #as RubyArray
+ (pack [Text] RubyString)))
+
+ @.php
+ (as_is (ffi.import: Almost_Binary)
+ (ffi.import: (unpack [ffi.String ffi.String] Almost_Binary))
+ (ffi.import: (array_values [Almost_Binary] Binary))
+ (def: php_byte_array_format "C*"))
+
+ @.scheme
+ ## https://srfi.schemers.org/srfi-140/srfi-140.html
+ (as_is (ffi.import: (string->utf8 [Text] Binary))
+ (ffi.import: (utf8->string [Binary] Text)))}
+ (as_is)))
+
+(def: (encode value)
+ (-> Text Binary)
+ (for {@.old
+ (java/lang/String::getBytes (//.name //.utf_8)
+ ## TODO: Remove coercion below.
+ ## The coercion below may seem
+ ## gratuitous, but removing it
+ ## causes a grave compilation problem.
+ (:coerce java/lang/String value))
+
+ @.jvm
+ (java/lang/String::getBytes (//.name //.utf_8) value)
+
+ @.js
+ (cond ffi.on_nashorn?
+ (:coerce Binary ("js object do" "getBytes" value ["utf8"]))
+
+ ffi.on_node_js?
+ (|> (Buffer::from|encode [value "utf8"])
+ ## This coercion is valid as per NodeJS's documentation:
+ ## https://nodejs.org/api/buffer.html#buffer_buffers_and_typedarrays
+ (:coerce Uint8Array))
+
+ ## On the browser
+ (|> (TextEncoder::new [(//.name //.utf_8)])
+ (TextEncoder::encode [value]))
+ )
+
+ @.python
+ (:coerce Binary ("python apply" (:assume ("python constant" "bytearray")) value "utf-8"))
+
+ @.lua
+ ("lua utf8 encode" value)
+
+ @.ruby
+ (|> value
+ (:coerce RubyString)
+ (RubyString::encode ["UTF-8"])
+ (RubyString::bytes []))
+
+ @.php
+ (|> (..unpack [..php_byte_array_format value])
+ ..array_values
+ ("php object new" "ArrayObject")
+ (:coerce Binary))
+
+ @.scheme
+ (..string->utf8 value)}))
+
+(def: (decode value)
+ (-> Binary (Try Text))
+ (with_expansions [<jvm> (#try.Success (java/lang/String::new value (//.name //.utf_8)))]
+ (for {@.old <jvm>
+ @.jvm <jvm>
+
+ @.js
+ (cond ffi.on_nashorn?
+ (|> ("js object new" ("js constant" "java.lang.String") [value "utf8"])
+ (:coerce Text)
+ #try.Success)
+
+ ffi.on_node_js?
+ (|> (Buffer::from|decode [value])
+ (Buffer::toString ["utf8"])
+ #try.Success)
+
+ ## On the browser
+ (|> (TextDecoder::new [(//.name //.utf_8)])
+ (TextDecoder::decode [value])
+ #try.Success))
+
+ @.python
+ (ffi.try (:coerce Text ("python object do" "decode" (:assume value) "utf-8")))
+
+ @.lua
+ (#try.Success ("lua utf8 decode" value))
+
+ @.ruby
+ (|> value
+ (:coerce RubyArray)
+ (RubyArray::pack ["C*"])
+ (:coerce RubyString)
+ (RubyString::force_encoding ["UTF-8"])
+ #try.Success)
+
+ @.php
+ (|> value
+ ("php pack" ..php_byte_array_format)
+ #try.Success)
+
+ @.scheme
+ (|> value
+ ..utf8->string
+ #try.Success)})))
+
+(structure: #export codec
+ (Codec Binary Text)
+
+ (def: encode ..encode)
+ (def: decode ..decode))