aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--stdlib/source/lux/data/format/json.lux6
-rw-r--r--stdlib/source/lux/data/format/xml.lux4
-rw-r--r--stdlib/source/lux/data/text/lexer.lux191
-rw-r--r--stdlib/source/lux/data/text/regex.lux81
-rw-r--r--stdlib/test/test/lux/data/text/lexer.lux30
5 files changed, 150 insertions, 162 deletions
diff --git a/stdlib/source/lux/data/format/json.lux b/stdlib/source/lux/data/format/json.lux
index 0ce1b602a..d7469e24b 100644
--- a/stdlib/source/lux/data/format/json.lux
+++ b/stdlib/source/lux/data/format/json.lux
@@ -260,16 +260,16 @@
(l;Lexer Number)
(do p;Monad<Parser>
[signed? (l;this? "-")
- digits (l;many l;digit)
+ digits (l;many l;decimal)
decimals (p;default "0"
(do @
[_ (l;this ".")]
- (l;many l;digit)))
+ (l;many l;decimal)))
exp (p;default ""
(do @
[mark (l;one-of "eE")
signed?' (l;this? "-")
- offset (l;many l;digit)]
+ offset (l;many l;decimal)]
(wrap (format mark (if signed?' "-" "") offset))))]
(case (Real/decode (format (if signed? "-" "") digits "." decimals exp))
(#R;Error message)
diff --git a/stdlib/source/lux/data/format/xml.lux b/stdlib/source/lux/data/format/xml.lux
index ef2f5d44d..c87502e30 100644
--- a/stdlib/source/lux/data/format/xml.lux
+++ b/stdlib/source/lux/data/format/xml.lux
@@ -51,10 +51,10 @@
[hex? (p;opt (l;this "x"))
code (case hex?
#;None
- (l;codec number;Codec<Text,Int> (l;many l;digit))
+ (l;codec number;Codec<Text,Int> (l;many l;decimal))
(#;Some _)
- (l;codec number;Hex@Codec<Text,Int> (l;many l;hex-digit)))]
+ (l;codec number;Hex@Codec<Text,Int> (l;many l;hexadecimal)))]
(wrap (|> code int-to-nat char;char char;as-text)))
(p;before (l;this ";"))
(p;after (l;this "&#"))))
diff --git a/stdlib/source/lux/data/text/lexer.lux b/stdlib/source/lux/data/text/lexer.lux
index 3d7423ca2..c57382134 100644
--- a/stdlib/source/lux/data/text/lexer.lux
+++ b/stdlib/source/lux/data/text/lexer.lux
@@ -13,31 +13,45 @@
["R" result]
(coll [list "L/" Functor<List>]))))
+(type: Offset Nat)
+
+(def: start-offset Offset +0)
+
(type: #export Lexer
- (p;Parser Text))
+ (p;Parser [Offset Text]))
+
+(def: (remaining offset tape)
+ (-> Offset Text Text)
+ (|> tape (text;split offset) assume product;right))
+
+(def: cannot-lex-error Text "Cannot lex from empty text.")
+
+(def: (unconsumed-input-error offset tape)
+ (-> Offset Text Text)
+ (format "Unconsumed input: " (remaining offset tape)))
(def: #export (run input lexer)
(All [a] (-> Text (Lexer a) (R;Result a)))
- (case (lexer input)
+ (case (lexer [start-offset input])
(#R;Error msg)
(#R;Error msg)
- (#R;Success [input' output])
- (if (T/= "" input')
+ (#R;Success [[end-offset _] output])
+ (if (n.= end-offset (text;size input))
(#R;Success output)
- (#R;Error (format "Remaining lexer input: " input')))
+ (#R;Error (unconsumed-input-error end-offset input)))
))
(def: #export any
{#;doc "Just returns the next character without applying any logic."}
(Lexer Text)
- (function [input]
- (case [(text;nth +0 input) (text;split +1 input)]
- [(#;Some output) (#;Some [_ input'])]
- (#R;Success [input' (char;as-text output)])
+ (function [[offset tape]]
+ (case (text;nth offset tape)
+ (#;Some output)
+ (#R;Success [[(n.inc offset) tape] (char;as-text output)])
_
- (#R;Error "Cannot parse character from empty text."))
+ (#R;Error cannot-lex-error))
))
(def: #export (not p)
@@ -54,59 +68,64 @@
(def: #export (this reference)
{#;doc "Lex a text if it matches the given sample."}
(-> Text (Lexer Unit))
- (function [input]
- (if (text;starts-with? reference input)
- (case (text;split (text;size reference) input)
- #;None (#R;Error "")
- (#;Some [_ input']) (#R;Success [input' []]))
- (#R;Error (format "Invalid match: " (text;encode reference) " @ " (text;encode input))))))
+ (function [[offset tape]]
+ (case (text;index-of reference offset tape)
+ (^multi (#;Some where) (n.= offset where))
+ (#R;Success [[(n.+ (text;size reference) offset) tape] []])
+
+ _
+ (#R;Error (format "Could not match: " (text;encode reference) " @ " tape)))))
(def: #export (this? reference)
{#;doc "Lex a text if it matches the given sample."}
(-> Text (Lexer Bool))
- (function [input]
- (if (text;starts-with? reference input)
- (case (text;split (text;size reference) input)
- #;None (#R;Success [input false])
- (#;Some [_ input']) (#R;Success [input' true]))
- (#R;Success [input false]))
- ))
+ (function [(^@ input [offset tape])]
+ (case (text;index-of reference offset tape)
+ (^multi (#;Some where) (n.= offset where))
+ (#R;Success [[(n.+ (text;size reference) offset) tape] true])
+
+ _
+ (#R;Success [input false]))))
(def: #export end
{#;doc "Ensure the lexer's input is empty."}
(Lexer Unit)
- (function [input]
- (case input
- "" (#R;Success [input []])
- _ (#R;Error (format "The text input has not been fully consumed @ " (text;encode input)))
- )))
+ (function [(^@ input [offset tape])]
+ (if (n.= offset (text;size tape))
+ (#R;Success [input []])
+ (#R;Error (unconsumed-input-error offset tape)))))
+
+(def: #export end?
+ {#;doc "Ask if the lexer's input is empty."}
+ (Lexer Bool)
+ (function [(^@ input [offset tape])]
+ (#R;Success [input (n.= offset (text;size tape))])))
(def: #export peek
{#;doc "Lex the next character (without consuming it from the input)."}
(Lexer Text)
- (function [input]
- (case (text;nth +0 input)
+ (function [(^@ input [offset tape])]
+ (case (text;nth offset tape)
(#;Some output)
(#R;Success [input (char;as-text output)])
_
- (#R;Error "Cannot peek character from empty text."))
+ (#R;Error cannot-lex-error))
))
(def: #export get-input
{#;doc "Get all of the remaining input (without consuming it)."}
(Lexer Text)
- (function [input]
- (#R;Success [input input])))
+ (function [(^@ input [offset tape])]
+ (#R;Success [input (remaining offset tape)])))
-(def: #export (char-range bottom top)
+(def: #export (range bottom top)
{#;doc "Only lex characters within a range."}
(-> Char Char (Lexer Text))
(do p;Monad<Parser>
- [input get-input
- char any
+ [char any
#let [char' (|> char (text;nth +0) assume)]
- _ (p;assert (format "Character is not within range: " (C/encode bottom) "-" (C/encode top) " @ " (text;encode input))
+ _ (p;assert (format "Character is not within range: " (C/encode bottom) "-" (C/encode top))
(and (C/>= bottom char')
(C/<= top char')))]
(wrap char)))
@@ -115,12 +134,12 @@
[(def: #export <name>
{#;doc (#;TextA (format "Only lex " <desc> " characters."))}
(Lexer Text)
- (char-range <bottom> <top>))]
+ (range <bottom> <top>))]
- [upper #"A" #"Z" "uppercase"]
- [lower #"a" #"z" "lowercase"]
- [digit #"0" #"9" "decimal"]
- [oct-digit #"0" #"7" "octal"]
+ [upper #"A" #"Z" "uppercase"]
+ [lower #"a" #"z" "lowercase"]
+ [decimal #"0" #"9" "decimal"]
+ [octal #"0" #"7" "octal"]
)
(def: #export alpha
@@ -131,68 +150,56 @@
(def: #export alpha-num
{#;doc "Only lex alphanumeric characters."}
(Lexer Text)
- (p;either alpha digit))
+ (p;either alpha decimal))
-(def: #export hex-digit
+(def: #export hexadecimal
{#;doc "Only lex hexadecimal digits."}
(Lexer Text)
($_ p;either
- digit
- (char-range #"a" #"f")
- (char-range #"A" #"F")))
+ decimal
+ (range #"a" #"f")
+ (range #"A" #"F")))
(def: #export (one-of options)
{#;doc "Only lex characters that are part of a piece of text."}
(-> Text (Lexer Text))
- (function [input]
- (case (text;split +1 input)
- (#;Some [init input'])
- (if (text;contains? init options)
- (case (text;nth +0 init)
- (#;Some output)
- (#R;Success [input' (char;as-text output)])
-
- _
- (#R;Error ""))
- (#R;Error (format "Character (" init ") is not one of: " options " @ " (text;encode input))))
+ (function [[offset tape]]
+ (case (text;nth offset tape)
+ (#;Some output)
+ (let [output (char;as-text output)]
+ (if (text;contains? output options)
+ (#R;Success [[(n.inc offset) tape] output])
+ (#R;Error (format "Character (" output ") is not one of: " options))))
_
- (#R;Error "Cannot parse character from empty text."))))
+ (#R;Error cannot-lex-error))))
(def: #export (none-of options)
{#;doc "Only lex characters that are not part of a piece of text."}
(-> Text (Lexer Text))
- (function [input]
- (case (text;split +1 input)
- (#;Some [init input'])
- (if (;not (text;contains? init options))
- (case (text;nth +0 init)
- (#;Some output)
- (#R;Success [input' (char;as-text output)])
-
- _
- (#R;Error ""))
- (#R;Error (format "Character (" init ") is one of: " options " @ " (text;encode input))))
+ (function [[offset tape]]
+ (case (text;nth offset tape)
+ (#;Some output)
+ (let [output (char;as-text output)]
+ (if (;not (text;contains? output options))
+ (#R;Success [[(n.inc offset) tape] output])
+ (#R;Error (format "Character (" output ") is one of: " options))))
_
- (#R;Error "Cannot parse character from empty text."))))
+ (#R;Error cannot-lex-error))))
(def: #export (satisfies p)
{#;doc "Only lex characters that satisfy a predicate."}
(-> (-> Char Bool) (Lexer Text))
- (function [input]
- (case (: (Maybe [Text Char])
- (do Monad<Maybe>
- [[init input'] (text;split +1 input)
- output (text;nth +0 init)]
- (wrap [input' output])))
- (#;Some [input' output])
+ (function [[offset tape]]
+ (case (text;nth offset tape)
+ (#;Some output)
(if (p output)
- (#R;Success [input' (char;as-text output)])
- (#R;Error (format "Character does not satisfy predicate: " (text;encode input))))
+ (#R;Success [[(n.inc offset) tape] (char;as-text output)])
+ (#R;Error (format "Character does not satisfy predicate: " (char;as-text output))))
_
- (#R;Error "Cannot parse character from empty text."))))
+ (#R;Error cannot-lex-error))))
(def: #export space
{#;doc "Only lex white-space."}
@@ -210,9 +217,7 @@
[(def: #export (<name> p)
{#;doc <doc>}
(-> (Lexer Text) (Lexer Text))
- (do p;Monad<Parser>
- []
- (|> p <base> (:: @ map text;concat))))]
+ (|> p <base> (:: p;Monad<Parser> map text;concat)))]
[some p;some "Lex some characters as a single continuous text."]
[many p;many "Lex many characters as a single continuous text."]
@@ -236,13 +241,7 @@
(-> Nat Nat (Lexer Text) (Lexer Text))
(|> p (p;between from to) (:: p;Monad<Parser> map text;concat)))
-(def: #export end?
- {#;doc "Ask if the lexer's input is empty."}
- (Lexer Bool)
- (function [input]
- (#R;Success [input (text;empty? input)])))
-
-(def: #export (codec codec lexer)
+(def: #export (codec Codec<a> lexer)
{#;doc "Lex a token by means of a codec."}
(All [a] (-> (Codec Text a) (Lexer Text) (Lexer a)))
(function [input]
@@ -251,7 +250,7 @@
(#R;Error error)
(#R;Success [input' to-decode])
- (case (:: codec decode to-decode)
+ (case (:: Codec<a> decode to-decode)
(#R;Error error)
(#R;Error error)
@@ -268,11 +267,9 @@
{#;doc "Run a lexer with the given input, instead of the real one."}
(All [a] (-> Text (Lexer a) (Lexer a)))
(function [real-input]
- (case (p;run local-input lexer)
+ (case (run local-input lexer)
(#R;Error error)
(#R;Error error)
- (#R;Success [unconsumed value])
- (if (T/= "" unconsumed)
- (#R;Success [real-input value])
- (#R;Error (format "Unconsumed input: " unconsumed))))))
+ (#R;Success value)
+ (#R;Success [real-input value]))))
diff --git a/stdlib/source/lux/data/text/regex.lux b/stdlib/source/lux/data/text/regex.lux
index 3666f68b8..86f215497 100644
--- a/stdlib/source/lux/data/text/regex.lux
+++ b/stdlib/source/lux/data/text/regex.lux
@@ -8,6 +8,7 @@
text/format
[number "Int/" Codec<Text,Int>]
[product]
+ ["R" result]
(coll [list "L/" Fold<List> Monad<List>]))
[macro #- run]
(macro [code]
@@ -26,21 +27,11 @@
l;any
regex-char^)))
-(def: (local^ state lexer)
- (All [a] (-> Text (l;Lexer a) (l;Lexer a)))
- (function [old-state]
- (case (lexer state)
- (#;Left error)
- (#;Left error)
-
- (#;Right [_ value])
- (#;Right [old-state value]))))
-
(def: #hidden (refine^ refinement^ base^)
(All [a] (-> (l;Lexer a) (l;Lexer Text) (l;Lexer Text)))
(do p;Monad<Parser>
[output base^
- _ (local^ output refinement^)]
+ _ (l;local output refinement^)]
(wrap output)))
(def: #hidden word^
@@ -65,7 +56,7 @@
(def: identifier-part^
(l;Lexer Text)
(do p;Monad<Parser>
- [head (refine^ (l;not l;digit)
+ [head (refine^ (l;not l;decimal)
identifier-char^)
tail (l;some identifier-char^)]
(wrap (format head tail))))
@@ -84,13 +75,13 @@
[ident (l;enclosed ["\\@<" ">"] (identifier^ current-module))]
(wrap (` (: (l;Lexer Text) (~ (code;symbol ident)))))))
-(def: re-char-range^
+(def: re-range^
(l;Lexer Code)
(do p;Monad<Parser>
[from (|> regex-char^ (:: @ map (|>. (text;nth +0) assume)))
_ (l;this "-")
to (|> regex-char^ (:: @ map (|>. (text;nth +0) assume)))]
- (wrap (` (l;char-range (~ (code;char from)) (~ (code;char to)))))))
+ (wrap (` (l;range (~ (code;char from)) (~ (code;char to)))))))
(def: re-char^
(l;Lexer Code)
@@ -98,7 +89,7 @@
[char escaped-char^]
(wrap (` (;;copy (~ (code;text char)))))))
-(def: re-char-options^
+(def: re-options^
(l;Lexer Code)
(do p;Monad<Parser>
[options (l;many escaped-char^)]
@@ -109,8 +100,8 @@
(do p;Monad<Parser>
[negate? (p;opt (l;this "^"))
parts (p;many ($_ p;either
- re-char-range^
- re-char-options^))]
+ re-range^
+ re-options^))]
(wrap (case negate?
(#;Some _) (` (l;not ($_ p;either (~@ parts))))
#;None (` ($_ p;either (~@ parts)))))))
@@ -132,11 +123,11 @@
(def: #hidden ascii^
(l;Lexer Text)
- (l;char-range #"\u0000" #"\u007F"))
+ (l;range #"\u0000" #"\u007F"))
(def: #hidden control^
(l;Lexer Text)
- (p;either (l;char-range #"\u0000" #"\u001F")
+ (p;either (l;range #"\u0000" #"\u001F")
(l;one-of "\u007F")))
(def: #hidden punct^
@@ -158,8 +149,8 @@
[]
($_ p;either
(p;after (l;this ".") (wrap (` l;any)))
- (p;after (l;this "\\d") (wrap (` l;digit)))
- (p;after (l;this "\\D") (wrap (` (l;not l;digit))))
+ (p;after (l;this "\\d") (wrap (` l;decimal)))
+ (p;after (l;this "\\D") (wrap (` (l;not l;decimal))))
(p;after (l;this "\\s") (wrap (` l;space)))
(p;after (l;this "\\S") (wrap (` (l;not l;space))))
(p;after (l;this "\\w") (wrap (` word^)))
@@ -168,11 +159,11 @@
(p;after (l;this "\\p{Lower}") (wrap (` l;lower)))
(p;after (l;this "\\p{Upper}") (wrap (` l;upper)))
(p;after (l;this "\\p{Alpha}") (wrap (` l;alpha)))
- (p;after (l;this "\\p{Digit}") (wrap (` l;digit)))
+ (p;after (l;this "\\p{Digit}") (wrap (` l;decimal)))
(p;after (l;this "\\p{Alnum}") (wrap (` l;alpha-num)))
(p;after (l;this "\\p{Space}") (wrap (` l;space)))
- (p;after (l;this "\\p{HexDigit}") (wrap (` l;hex-digit)))
- (p;after (l;this "\\p{OctDigit}") (wrap (` l;oct-digit)))
+ (p;after (l;this "\\p{HexDigit}") (wrap (` l;hexadecimal)))
+ (p;after (l;this "\\p{OctDigit}") (wrap (` l;octal)))
(p;after (l;this "\\p{Blank}") (wrap (` blank^)))
(p;after (l;this "\\p{ASCII}") (wrap (` ascii^)))
(p;after (l;this "\\p{Contrl}") (wrap (` control^)))
@@ -188,7 +179,7 @@
(def: number^
(l;Lexer Nat)
- (|> (l;many l;digit)
+ (|> (l;many l;decimal)
(l;codec number;Codec<Text,Int>)
(p/map int-to-nat)))
@@ -285,14 +276,14 @@
[Int (List Code) (List (List Code))])
(function [part [idx names steps]]
(case part
- (^or (#;Left complex) (#;Right [#Non-Capturing complex]))
+ (^or (#R;Error complex) (#R;Success [#Non-Capturing complex]))
[idx
names
(list& (list g!temp complex
(' #let) (` [(~ g!total) (_Text/append_ (~ g!total) (~ g!temp))]))
steps)]
- (#;Right [(#Capturing [?name num-captures]) scoped])
+ (#R;Success [(#Capturing [?name num-captures]) scoped])
(let [[idx! name!] (case ?name
(#;Some _name)
[idx (code;symbol ["" _name])]
@@ -329,31 +320,31 @@
(All [l r] (-> (l;Lexer [Text l]) (l;Lexer [Text r]) (l;Lexer [Text (| l r)])))
(function [input]
(case (left input)
- (#;Right [input' [lt lv]])
- (#;Right [input' [lt (+0 lv)]])
+ (#R;Success [input' [lt lv]])
+ (#R;Success [input' [lt (+0 lv)]])
- (#;Left _)
+ (#R;Error _)
(case (right input)
- (#;Right [input' [rt rv]])
- (#;Right [input' [rt (+1 rv)]])
+ (#R;Success [input' [rt rv]])
+ (#R;Success [input' [rt (+1 rv)]])
- (#;Left error)
- (#;Left error)))))
+ (#R;Error error)
+ (#R;Error error)))))
(def: #hidden (|||_^ left right)
(All [l r] (-> (l;Lexer [Text l]) (l;Lexer [Text r]) (l;Lexer Text)))
(function [input]
(case (left input)
- (#;Right [input' [lt lv]])
- (#;Right [input' lt])
+ (#R;Success [input' [lt lv]])
+ (#R;Success [input' lt])
- (#;Left _)
+ (#R;Error _)
(case (right input)
- (#;Right [input' [rt rv]])
- (#;Right [input' rt])
+ (#R;Success [input' [rt rv]])
+ (#R;Success [input' rt])
- (#;Left error)
- (#;Left error)))))
+ (#R;Error error)
+ (#R;Error error)))))
(def: (prep-alternative [num-captures alt])
(-> [Nat Code] Code)
@@ -471,11 +462,11 @@
(case (|> (regex^ current-module)
(p;before l;end)
(l;run pattern))
- (#;Left error)
+ (#R;Error error)
(macro;fail (format "Error while parsing regular-expression:\n"
error))
- (#;Right regex)
+ (#R;Success regex)
(wrap (list regex))
)))
@@ -497,7 +488,7 @@
[g!temp (macro;gensym "temp")]
(wrap (list& (` (^multi (~ g!temp)
[(l;run (~ g!temp) (regex (~ (code;text pattern))))
- (#;Right (~ (default g!temp
- bindings)))]))
+ (#R;Success (~ (default g!temp
+ bindings)))]))
body
branches))))
diff --git a/stdlib/test/test/lux/data/text/lexer.lux b/stdlib/test/test/lux/data/text/lexer.lux
index 76eadfbb0..8752d4b96 100644
--- a/stdlib/test/test/lux/data/text/lexer.lux
+++ b/stdlib/test/test/lux/data/text/lexer.lux
@@ -94,9 +94,9 @@
(test "Can lex characters ranges."
(and (should-passT "Y" (&;run "Y"
- (&;char-range #"X" #"Z")))
+ (&;range #"X" #"Z")))
(should-fail (&;run "M"
- (&;char-range #"X" #"Z")))))
+ (&;range #"X" #"Z")))))
(test "Can lex upper-case and &;lower-case letters."
(and (should-passT "Y" (&;run "Y"
@@ -111,23 +111,23 @@
(test "Can lex numbers."
(and (should-passT "1" (&;run "1"
- &;digit))
+ &;decimal))
(should-fail (&;run " "
- &;digit))
+ &;decimal))
(should-passT "7" (&;run "7"
- &;oct-digit))
+ &;octal))
(should-fail (&;run "8"
- &;oct-digit))
+ &;octal))
(should-passT "1" (&;run "1"
- &;hex-digit))
+ &;hexadecimal))
(should-passT "a" (&;run "a"
- &;hex-digit))
+ &;hexadecimal))
(should-passT "A" (&;run "A"
- &;hex-digit))
+ &;hexadecimal))
(should-fail (&;run " "
- &;hex-digit))
+ &;hexadecimal))
))
(test "Can lex alphabetic characters."
@@ -167,9 +167,9 @@
(test "Can create the opposite of a lexer."
(and (should-passT "a" (&;run "a"
- (&;not (p;alt &;digit &;upper))))
+ (&;not (p;alt &;decimal &;upper))))
(should-fail (&;run "A"
- (&;not (p;alt &;digit &;upper))))))
+ (&;not (p;alt &;decimal &;upper))))))
(test "Can select from among a set of characters."
(and (should-passT "C" (&;run "C"
@@ -191,10 +191,10 @@
(test "Can apply a lexer multiple times."
(and (should-passT "0123456789ABCDEF" (&;run "0123456789ABCDEF"
- (&;many &;hex-digit)))
+ (&;many &;hexadecimal)))
(should-fail (&;run "yolo"
- (&;many &;hex-digit)))
+ (&;many &;hexadecimal)))
(should-passT "" (&;run ""
- (&;some &;hex-digit)))))
+ (&;some &;hexadecimal)))))
))