From c0acd75d41ed0e927ec318d4b12c0ec4f5f2e1d3 Mon Sep 17 00:00:00 2001
From: Eduardo Julian
Date: Sun, 2 Jul 2017 15:52:36 -0400
Subject: - Adjusted compiler to the new lack of Char type. - WIP: PM/case
 synthesis.

---
 new-luxc/source/luxc/parser.lux | 65 ++++++++++++-----------------------------
 1 file changed, 19 insertions(+), 46 deletions(-)

(limited to 'new-luxc/source/luxc/parser.lux')
diff --git a/new-luxc/source/luxc/parser.lux b/new-luxc/source/luxc/parser.lux
index 1e280e62b..7d9c77f2b 100644
--- a/new-luxc/source/luxc/parser.lux
+++ b/new-luxc/source/luxc/parser.lux
@@ -30,7 +30,6 @@
   (lux (control monad
                 ["p" parser "p/" Monad<Parser>])
        (data [bool]
-             [char]
              [text]
              ["R" result]
              [number]
@@ -160,29 +159,28 @@
 ## and 4 characters long (e.g. \u12aB).
 ## Escaped characters may show up in Char and Text literals.
 (def: escaped-char^
-  (l;Lexer [Text Char])
+  (l;Lexer [Nat Text])
   (p;after (l;this "\\")
            (do p;Monad<Parser>
              [code l;any]
              (case code
                ## Handle special cases.
-               "t"  (wrap ["\\t"  #"\t"])
-               "v"  (wrap ["\\v"  #"\v"])
-               "b"  (wrap ["\\b"  #"\b"])
-               "n"  (wrap ["\\n"  #"\n"])
-               "r"  (wrap ["\\r"  #"\r"])
-               "f"  (wrap ["\\f"  #"\f"])
-               "\"" (wrap ["\\\"" #"\""])
-               "\\" (wrap ["\\\\" #"\\"])
+               "t"  (wrap [+2 "\t"])
+               "v"  (wrap [+2 "\v"])
+               "b"  (wrap [+2 "\b"])
+               "n"  (wrap [+2 "\n"])
+               "r"  (wrap [+2 "\r"])
+               "f"  (wrap [+2 "\f"])
+               "\"" (wrap [+2 "\""])
+               "\\" (wrap [+2 "\\"])
 
                ## Handle unicode escapes.
                "u"
                (do p;Monad<Parser>
-                 [code (l;between +1 +4 l;hex-digit)]
-                 (wrap (case (:: number;Hex@Codec<Text,Nat> decode
-                                 (format "+" code))
+                 [code (l;between +1 +4 l;hexadecimal)]
+                 (wrap (case (|> code (format "+") (:: number;Hex@Codec<Text,Nat> decode))
                          (#;Right value)
-                         [(format "\\u" code) (char;char value)]
+                         [(n.+ +2 (text;size code)) (text;from-code value)]
 
                          _
                          (undefined))))
@@ -190,31 +188,17 @@
                _
                (p;fail (format "Invalid escaping syntax: " (%t code)))))))
 
-## A character can be either a normal glyph, or a escaped character.
-## The reason why this parser returns both the Char and it's textual
-## representation in the source-code, is for the sake of updating the
-## cursor after parsing the char.
-## A character only represents one glyph, but it's source-code
-## representation may be multi-glyph (e.g. \u1234, \n), in which case,
-## the text that was parsed needs to be counted to update the cursor.
-(def: raw-char^
-  (l;Lexer [Text Char])
-  (p;either (do p;Monad<Parser>
-              [char (l;none-of "\\\"\n")]
-              (wrap [char (|> char (text;nth +0) assume)]))
-            escaped-char^))
-
 ## These are very simple parsers that just cut chunks of text in
 ## specific shapes and then use decoders already present in the
 ## standard library to actually produce the values from the literals.
 (def: rich-digit
   (l;Lexer Text)
-  (p;either l;digit
+  (p;either l;decimal
             (p;after (l;this "_") (p/wrap ""))))
 
 (def: rich-digits^
   (l;Lexer Text)
-  (l;seq l;digit
+  (l;seq l;decimal
          (l;some rich-digit)))
 
 (def: (marker^ token)
@@ -262,16 +246,6 @@
    number;Codec<Text,Deg>]
   )
 
-## This parser doesn't delegate the work of producing the value to a
-## codec, since the raw-char^ parser already takes care of that magic.
-(def: #export (parse-char where)
-  (-> Cursor (l;Lexer [Cursor Code]))
-  (do p;Monad<Parser>
-    [[chunk value] (l;enclosed ["#\"" "\""]
-                               raw-char^)]
-    (wrap [(update@ #;column (|>. ($_ n.+ +3 (text;size chunk))) where)
-           [where (#;Char value)]])))
-
 ## This parser looks so complex because text in Lux can be multi-line
 ## and there are rules regarding how this is handled.
 (def: #export (parse-text where)
@@ -334,10 +308,10 @@
                                              ## Must handle escaped
                                              ## chars separately.
                                              (do @
-                                               [[chunk char] escaped-char^]
-                                               (recur (format text-read (char;as-text char))
+                                               [[chars-consumed char] escaped-char^]
+                                               (recur (format text-read char)
                                                       (|> where
-                                                          (update@ #;column (n.+ (text;size chunk))))
+                                                          (update@ #;column (n.+ chars-consumed)))
                                                       false))
                                              ## The text ends when it
                                              ## reaches the right-delimiter.
@@ -538,15 +512,14 @@
         (parse-deg where)
         (parse-symbol where)
         (parse-tag where)
-        (parse-char where)
         (parse-text where)
         )))
 
 (def: #export (parse [where code])
   (-> [Cursor Text] (R;Result [[Cursor Text] Code]))
-  (case (p;run code (parse-ast where))
+  (case (p;run [+0 code] (parse-ast where))
     (#R;Error error)
     (#R;Error error)
 
-    (#R;Success [remaining [where' output]])
+    (#R;Success [[_ remaining] [where' output]])
     (#R;Success [[where' remaining] output])))
-- 
cgit v1.2.3