aboutsummaryrefslogtreecommitdiff
path: root/new-luxc/source/luxc/parser.lux
diff options
context:
space:
mode:
Diffstat (limited to 'new-luxc/source/luxc/parser.lux')
-rw-r--r--new-luxc/source/luxc/parser.lux65
1 files changed, 19 insertions, 46 deletions
diff --git a/new-luxc/source/luxc/parser.lux b/new-luxc/source/luxc/parser.lux
index 1e280e62b..7d9c77f2b 100644
--- a/new-luxc/source/luxc/parser.lux
+++ b/new-luxc/source/luxc/parser.lux
@@ -30,7 +30,6 @@
(lux (control monad
["p" parser "p/" Monad<Parser>])
(data [bool]
- [char]
[text]
["R" result]
[number]
@@ -160,29 +159,28 @@
## and 4 characters long (e.g. \u12aB).
## Escaped characters may show up in Char and Text literals.
(def: escaped-char^
- (l;Lexer [Text Char])
+ (l;Lexer [Nat Text])
(p;after (l;this "\\")
(do p;Monad<Parser>
[code l;any]
(case code
## Handle special cases.
- "t" (wrap ["\\t" #"\t"])
- "v" (wrap ["\\v" #"\v"])
- "b" (wrap ["\\b" #"\b"])
- "n" (wrap ["\\n" #"\n"])
- "r" (wrap ["\\r" #"\r"])
- "f" (wrap ["\\f" #"\f"])
- "\"" (wrap ["\\\"" #"\""])
- "\\" (wrap ["\\\\" #"\\"])
+ "t" (wrap [+2 "\t"])
+ "v" (wrap [+2 "\v"])
+ "b" (wrap [+2 "\b"])
+ "n" (wrap [+2 "\n"])
+ "r" (wrap [+2 "\r"])
+ "f" (wrap [+2 "\f"])
+ "\"" (wrap [+2 "\""])
+ "\\" (wrap [+2 "\\"])
## Handle unicode escapes.
"u"
(do p;Monad<Parser>
- [code (l;between +1 +4 l;hex-digit)]
- (wrap (case (:: number;Hex@Codec<Text,Nat> decode
- (format "+" code))
+ [code (l;between +1 +4 l;hexadecimal)]
+ (wrap (case (|> code (format "+") (:: number;Hex@Codec<Text,Nat> decode))
(#;Right value)
- [(format "\\u" code) (char;char value)]
+ [(n.+ +2 (text;size code)) (text;from-code value)]
_
(undefined))))
@@ -190,31 +188,17 @@
_
(p;fail (format "Invalid escaping syntax: " (%t code)))))))
-## A character can be either a normal glyph, or a escaped character.
-## The reason why this parser returns both the Char and it's textual
-## representation in the source-code, is for the sake of updating the
-## cursor after parsing the char.
-## A character only represents one glyph, but it's source-code
-## representation may be multi-glyph (e.g. \u1234, \n), in which case,
-## the text that was parsed needs to be counted to update the cursor.
-(def: raw-char^
- (l;Lexer [Text Char])
- (p;either (do p;Monad<Parser>
- [char (l;none-of "\\\"\n")]
- (wrap [char (|> char (text;nth +0) assume)]))
- escaped-char^))
-
## These are very simple parsers that just cut chunks of text in
## specific shapes and then use decoders already present in the
## standard library to actually produce the values from the literals.
(def: rich-digit
(l;Lexer Text)
- (p;either l;digit
+ (p;either l;decimal
(p;after (l;this "_") (p/wrap ""))))
(def: rich-digits^
(l;Lexer Text)
- (l;seq l;digit
+ (l;seq l;decimal
(l;some rich-digit)))
(def: (marker^ token)
@@ -262,16 +246,6 @@
number;Codec<Text,Deg>]
)
-## This parser doesn't delegate the work of producing the value to a
-## codec, since the raw-char^ parser already takes care of that magic.
-(def: #export (parse-char where)
- (-> Cursor (l;Lexer [Cursor Code]))
- (do p;Monad<Parser>
- [[chunk value] (l;enclosed ["#\"" "\""]
- raw-char^)]
- (wrap [(update@ #;column (|>. ($_ n.+ +3 (text;size chunk))) where)
- [where (#;Char value)]])))
-
## This parser looks so complex because text in Lux can be multi-line
## and there are rules regarding how this is handled.
(def: #export (parse-text where)
@@ -334,10 +308,10 @@
## Must handle escaped
## chars separately.
(do @
- [[chunk char] escaped-char^]
- (recur (format text-read (char;as-text char))
+ [[chars-consumed char] escaped-char^]
+ (recur (format text-read char)
(|> where
- (update@ #;column (n.+ (text;size chunk))))
+ (update@ #;column (n.+ chars-consumed)))
false))
## The text ends when it
## reaches the right-delimiter.
@@ -538,15 +512,14 @@
(parse-deg where)
(parse-symbol where)
(parse-tag where)
- (parse-char where)
(parse-text where)
)))
(def: #export (parse [where code])
(-> [Cursor Text] (R;Result [[Cursor Text] Code]))
- (case (p;run code (parse-ast where))
+ (case (p;run [+0 code] (parse-ast where))
(#R;Error error)
(#R;Error error)
- (#R;Success [remaining [where' output]])
+ (#R;Success [[_ remaining] [where' output]])
(#R;Success [[where' remaining] output])))