From f74614ad4173cc7df1dd22944fddebf5a03bab27 Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Tue, 21 Aug 2018 22:48:17 -0400 Subject: - Added single-line comment parsing. - Added tag parsing. - Added tracking. --- stdlib/source/lux.lux | 2 +- .../source/lux/compiler/default/phase/analysis.lux | 20 +- stdlib/source/lux/compiler/default/syntax.lux | 393 ++++++++++++--------- 3 files changed, 236 insertions(+), 179 deletions(-) diff --git a/stdlib/source/lux.lux b/stdlib/source/lux.lux index d64b83bd3..1114b069c 100644 --- a/stdlib/source/lux.lux +++ b/stdlib/source/lux.lux @@ -5945,7 +5945,7 @@ (^ (list (~+ (list/map (|>> [""] identifier$) args)))) (#.Right [(~ g!compiler) (list (~+ (list/map (function (_ template) - (` (` (~ (replace-syntax rep-env template))))) + (` (`' (~ (replace-syntax rep-env template))))) input-templates)))]) (~ g!_) diff --git a/stdlib/source/lux/compiler/default/phase/analysis.lux b/stdlib/source/lux/compiler/default/phase/analysis.lux index 8ef8324ae..e26cd3516 100644 --- a/stdlib/source/lux/compiler/default/phase/analysis.lux +++ b/stdlib/source/lux/compiler/default/phase/analysis.lux @@ -81,20 +81,20 @@ [(template: #export ( content) ( content))] - [control/case #Case] + [control/case #..Case] ) (do-template [ ] [(def: #export (-> Analysis) - (|>> #Primitive))] - - [bit Bit #Bit] - [nat Nat #Nat] - [int Int #Int] - [rev Rev #Rev] - [frac Frac #Frac] - [text Text #Text] + (|>> #..Primitive))] + + [bit Bit #..Bit] + [nat Nat #..Nat] + [int Int #..Int] + [rev Rev #..Rev] + [frac Frac #..Frac] + [text Text #..Text] ) (type: #export Arity Nat) @@ -142,7 +142,7 @@ (do-template [ ] [(template: #export ( content) - (.<| #Complex + (.<| #..Complex content))] diff --git a/stdlib/source/lux/compiler/default/syntax.lux b/stdlib/source/lux/compiler/default/syntax.lux index 4d778136f..1ae6a8620 100644 --- a/stdlib/source/lux/compiler/default/syntax.lux +++ b/stdlib/source/lux/compiler/default/syntax.lux @@ -463,93 +463,93 @@ ## encoded on the parser. (def: name-separator ".") -## A Lux name is a pair of chunks of text, where the first-part -## refers to the module that gives context to the name, and the -## second part corresponds to the short of the name itself. -## The module part may be absent (by being the empty text ""), but the -## name part must always be present. -## The rules for which characters you may use are specified in terms -## of which characters you must avoid (to keep things as open-ended as -## possible). -## In particular, no white-space can be used, and neither can other -## characters which are already used by Lux as delimiters for other -## Code nodes (thereby reducing ambiguity while parsing). -## Additionally, the first character in an name's part cannot be -## a digit, to avoid confusion with regards to numbers. -(def: name-part^ - (Lexer Text) - (let [delimiters (format ..open-form ..close-form - ..open-tuple ..close-tuple - ..open-record ..close-record - ..sigil - ..text-delimiter - ..name-separator) - space (format ..white-space ..new-line) - head (l.none-of! (format ..digits delimiters space)) - tail (l.some! (l.none-of! (format delimiters space)))] - (l.slice (l.and! head tail)))) - -(def: current-module-mark Text (format ..name-separator ..name-separator)) - -(def: (name^ current-module aliases) - (-> Text Aliases (Lexer [Name Nat])) - ($_ p.either - ## When an name starts with 2 marks, its module is - ## taken to be the current-module being compiled at the moment. - ## This can be useful when mentioning names and tags - ## inside quoted/templated code in macros. - (do p.Monad - [_ (l.this current-module-mark) - def-name name-part^] - (wrap [[current-module def-name] - ("lux i64 +" 2 (text.size def-name))])) - ## If the name is prefixed by the mark, but no module - ## part, the module is assumed to be "lux" (otherwise known as - ## the 'prelude'). - ## This makes it easy to refer to definitions in that module, - ## since it is the most fundamental module in the entire - ## standard library. - (do p.Monad - [_ (l.this name-separator) - def-name name-part^] - (wrap [["lux" def-name] - ("lux i64 +" 1 (text.size def-name))])) - ## Not all names must be specified with a module part. - ## If that part is not provided, the name will be created - ## with the empty "" text as the module. - ## During program analysis, such names tend to be treated - ## as if their context is the current-module, but this only - ## applies to names for tags and module definitions. - ## Function arguments and local-variables may not be referred-to - ## using names with module parts, so being able to specify - ## names with empty modules helps with those use-cases. - (do p.Monad - [first-part name-part^] - (p.either (do @ - [_ (l.this name-separator) - second-part name-part^] - (wrap [[(|> aliases (dictionary.get first-part) (maybe.default first-part)) - second-part] - ($_ "lux i64 +" - (text.size first-part) - 1 - (text.size second-part))])) - (wrap [["" first-part] - (text.size first-part)]))))) - -(do-template [
  ]
-  [(def: #export ( current-module aliases)
-     (-> Text Aliases Syntax)
-     (function (_ where)
-       (do p.Monad
-         [[value length] (<| 
-                             (name^ current-module aliases))]
-         (wrap [(update@ #.column (n/+ ) where)
-                [where ( value)]]))))]
-
-  [tag        (p.after (l.this ..sigil)) #.Tag        ("lux i64 +" 1 length)]
-  [identifier (|>)                       #.Identifier length]
-  )
+## ## A Lux name is a pair of chunks of text, where the first-part
+## ## refers to the module that gives context to the name, and the
+## ## second part corresponds to the short of the name itself.
+## ## The module part may be absent (by being the empty text ""), but the
+## ## name part must always be present.
+## ## The rules for which characters you may use are specified in terms
+## ## of which characters you must avoid (to keep things as open-ended as
+## ## possible).
+## ## In particular, no white-space can be used, and neither can other
+## ## characters which are already used by Lux as delimiters for other
+## ## Code nodes (thereby reducing ambiguity while parsing).
+## ## Additionally, the first character in an name's part cannot be
+## ## a digit, to avoid confusion with regards to numbers.
+## (def: name-part^
+##   (Lexer Text)
+##   (let [delimiters (format ..open-form ..close-form
+##                            ..open-tuple ..close-tuple
+##                            ..open-record ..close-record
+##                            ..sigil
+##                            ..text-delimiter
+##                            ..name-separator)
+##         space (format ..white-space ..new-line)
+##         head (l.none-of! (format ..digits delimiters space))
+##         tail (l.some! (l.none-of! (format delimiters space)))]
+##     (l.slice (l.and! head tail))))
+
+## (def: current-module-mark Text (format ..name-separator ..name-separator))
+
+## (def: (name^ current-module aliases)
+##   (-> Text Aliases (Lexer [Name Nat]))
+##   ($_ p.either
+##       ## When an name starts with 2 marks, its module is
+##       ## taken to be the current-module being compiled at the moment.
+##       ## This can be useful when mentioning names and tags
+##       ## inside quoted/templated code in macros.
+##       (do p.Monad
+##         [_ (l.this current-module-mark)
+##          def-name name-part^]
+##         (wrap [[current-module def-name]
+##                ("lux i64 +" 2 (text.size def-name))]))
+##       ## If the name is prefixed by the mark, but no module
+##       ## part, the module is assumed to be "lux" (otherwise known as
+##       ## the 'prelude').
+##       ## This makes it easy to refer to definitions in that module,
+##       ## since it is the most fundamental module in the entire
+##       ## standard library.
+##       (do p.Monad
+##         [_ (l.this name-separator)
+##          def-name name-part^]
+##         (wrap [["lux" def-name]
+##                ("lux i64 +" 1 (text.size def-name))]))
+##       ## Not all names must be specified with a module part.
+##       ## If that part is not provided, the name will be created
+##       ## with the empty "" text as the module.
+##       ## During program analysis, such names tend to be treated
+##       ## as if their context is the current-module, but this only
+##       ## applies to names for tags and module definitions.
+##       ## Function arguments and local-variables may not be referred-to
+##       ## using names with module parts, so being able to specify
+##       ## names with empty modules helps with those use-cases.
+##       (do p.Monad
+##         [first-part name-part^]
+##         (p.either (do @
+##                     [_ (l.this name-separator)
+##                      second-part name-part^]
+##                     (wrap [[(|> aliases (dictionary.get first-part) (maybe.default first-part))
+##                             second-part]
+##                            ($_ "lux i64 +"
+##                                (text.size first-part)
+##                                1
+##                                (text.size second-part))]))
+##                   (wrap [["" first-part]
+##                          (text.size first-part)])))))
+
+## (do-template [ 
  ]
+##   [(def: #export ( current-module aliases)
+##      (-> Text Aliases Syntax)
+##      (function (_ where)
+##        (do p.Monad
+##          [[value length] (<| 
+##                              (name^ current-module aliases))]
+##          (wrap [(update@ #.column (n/+ ) where)
+##                 [where ( value)]]))))]
+
+##   [tag        (p.after (l.this ..sigil)) #.Tag        ("lux i64 +" 1 length)]
+##   [identifier (|>)                       #.Identifier length]
+##   )
 
 ## (do-template [ ]
 ##   [(def: 
@@ -595,8 +595,8 @@
               (..text where))
           ## (<| (..timed "identifier")
           ##     (..identifier current-module aliases where))
-          (<| (..timed "tag")
-              (..tag current-module aliases where))
+          ## (<| (..timed "tag")
+          ##     (..tag current-module aliases where))
           ## (<| (..timed "form")
           ##     (..form ast' where))
           ## (<| (..timed "tuple")
@@ -610,28 +610,36 @@
               (p.fail (ex.construct unrecognized-input where))))
           ))))
 
-(type: Simple
-  (-> Source (Error [Source Code])))
+(type: Tracker
+  {#new-line Offset})
 
-(type: Reader
-  (-> Text Aliases Simple))
+(def: fresh-tracker
+  Tracker
+  {#new-line 0})
+
+(type: (Simple a)
+  (-> Tracker Source (Error [Tracker Source a])))
+
+(type: (Reader a)
+  (-> Text Aliases (Simple a)))
 
 (do-template [ ]
   [(template: ( value)
      ( value 1))]
 
-  [inc! "lux i64 +"]
-  [dec! "lux i64 -"]
+  [!inc "lux i64 +"]
+  [!dec "lux i64 -"]
   )
 
 (do-template [  ]
-  [(def: ( read-code source)
-     (-> Simple Simple)
-     (loop [source source
+  [(def: ( read-code tracker source)
+     (-> (Simple Code) (Simple Code))
+     (loop [tracker tracker
+            source source
             stack (: (List Code) #.Nil)]
-       (case (read-code source)
-         (#error.Success [source' top])
-         (recur source' (#.Cons top stack))
+       (case (read-code tracker source)
+         (#error.Success [tracker' source' top])
+         (recur tracker' source' (#.Cons top stack))
          
          (#error.Error error)
          (let [[where offset source-code] source]
@@ -639,8 +647,9 @@
              (#.Some char)
              (`` (case char
                    (^ (char (~~ (static ))))
-                   (#error.Success [[(update@ #.column inc where)
-                                     (inc! offset)
+                   (#error.Success [tracker
+                                    [(update@ #.column inc where)
+                                     (!inc offset)
                                      source-code]
                                     [where ( (list.reverse stack))]])
 
@@ -657,35 +666,36 @@
   [read-tuple ..close-tuple #.Tuple]
   )
 
-(template: (clip! from to text)
+(template: (!clip from to text)
   ## TODO: Optimize away "maybe.assume"
   (maybe.assume ("lux text clip" text from to)))
 
-(def: (read-text [where offset source-code])
-  Simple
+(def: (read-text tracker [where offset source-code])
+  (Simple Code)
   (case ("lux text index" source-code (static ..text-delimiter) offset)
     (#.Some end)
-    (#error.Success [[(update@ #.column (n/+ ("lux i64 -" end offset)) where)
-                      (inc! end)
+    (#error.Success [tracker
+                     [(update@ #.column (n/+ ("lux i64 -" end offset)) where)
+                      (!inc end)
                       source-code]
                      [where
-                      (#.Text (clip! offset end source-code))]])
+                      (#.Text (!clip offset end source-code))]])
     
     _
     (ex.throw unrecognized-input where)))
 
-(def: digit-bottom Nat (dec! (char "0")))
-(def: digit-top Nat (inc! (char "9")))
+(def: digit-bottom Nat (!dec (char "0")))
+(def: digit-top Nat (!inc (char "9")))
 
-(template: (digit? char)
+(template: (!digit? char)
   (and ("lux int <" (:coerce Int (static ..digit-bottom)) (:coerce Int char))
        ("lux int <" (:coerce Int char) (:coerce Int (static ..digit-top)))))
 
-(`` (template: (digit?+ char)
-      (or (digit? char)
+(`` (template: (!digit?+ char)
+      (or (!digit? char)
           ("lux i64 =" (.char (~~ (static ..digit-separator))) char))))
 
-(`` (template: (name-char? char)
+(`` (template: (!strict-name-char? char)
       (not (or ("lux i64 =" (.char (~~ (static ..white-space))) char)
                ("lux i64 =" (.char (~~ (static ..new-line))) char)
 
@@ -703,42 +713,48 @@
                ("lux i64 =" (.char (~~ (static ..text-delimiter))) char)
                ("lux i64 =" (.char (~~ (static ..sigil))) char)))))
 
-(template: (name-char?+ char)
-  (or (name-char? char)
-      (digit? char)))
+(template: (!name-char?|head char)
+  (and (!strict-name-char? char)
+       (not (!digit? char))))
 
-(with-expansions [ (case (:: number.Codec decode (clip! start end source-code))
+(template: (!name-char? char)
+  (or (!strict-name-char? char)
+      (!digit? char)))
+
+(with-expansions [ (case (:: number.Codec decode (!clip start end source-code))
                              (#error.Success output)
-                             (#error.Success [[(update@ #.column (n/+ ("lux i64 -" end start)) where)
+                             (#error.Success [tracker
+                                              [(update@ #.column (n/+ ("lux i64 -" end start)) where)
                                                end
                                                source-code]
                                               [where (#.Nat output)]])
 
                              (#error.Error error)
                              (#error.Error error))]
-  (def: (read-nat start [where offset source-code])
-    (-> Offset Simple)
+  (def: (read-nat start tracker [where offset source-code])
+    (-> Offset (Simple Code))
     (loop [end offset]
       (case ("lux text char" source-code end)
         (#.Some char)
-        (if (digit?+ char)
-          (recur (inc! end))
+        (if (!digit?+ char)
+          (recur (!inc end))
           )
         
         _
         ))))
 
-(with-expansions [ (#error.Success [[(update@ #.column (n/+ ("lux i64 -" end start)) where)
+(with-expansions [ (#error.Success [tracker
+                                            [(update@ #.column (n/+ ("lux i64 -" end start)) where)
                                              end
                                              source-code]
-                                            [where (#.Identifier ["" (clip! start end source-code)])]])]
-  (def: (read-name start [where offset source-code])
-    (-> Offset Simple)
+                                            ["" (!clip start end source-code)]])]
+  (def: (read-name start tracker [where offset source-code])
+    (-> Offset (Simple Name))
     (loop [end offset]
       (case ("lux text char" source-code end)
         (#.Some char)
-        (cond (name-char?+ char)
-              (recur (inc! end))
+        (cond (!name-char? char)
+              (recur (!inc end))
 
               ## else
               )
@@ -746,65 +762,99 @@
         _
         ))))
 
-(template: (leap-bit! value)
+(template: (!leap-bit value)
   ("lux i64 +" value 2))
 
-(with-expansions [ (as-is [where (inc! offset) source-code])]
-  (def: (read-code current-module aliases source)
-    Reader
+(template: (!new-line where)
+  (let [[where::file where::line where::column] where]
+    [where::file (!inc where::line) 0]))
+
+(template: (!read-name @offset @tracker @source @where @tag)
+  (case (..read-name @offset @tracker @source)
+    (#error.Success [tracker' source' name])
+    (#error.Success [tracker' source' [@where (@tag name)]])
+    
+    (#error.Error error)
+    (#error.Error error)))
+
+(with-expansions [ (as-is [where (!inc offset) source-code])
+                   (as-is (recur tracker
+                                               [where ("lux text size" source-code) source-code]))]
+  (def: (read-code current-module aliases tracker source)
+    (Reader Code)
     (let [read-code' (read-code current-module aliases)]
-      (loop [[where offset source-code] source]
+      (loop [tracker tracker
+             [where offset source-code] source]
         (case ("lux text char" source-code offset)
-          (#.Some char)
-          (`` (case char
+          (#.Some current)
+          (`` (case current
+                ## White-space
                 (^template [ ]
                   (^ (char ))
-                  (recur [(update@  inc where)
-                          (inc! offset)
+                  (recur tracker
+                         [(update@  inc where)
+                          (!inc offset)
                           source-code]))
                 ([(~~ (static ..white-space))     #.column]
                  [(~~ (static ..carriage-return)) #.column])
 
                 (^ (char (~~ (static ..new-line))))
-                (let [[where::file where::line where::column] where]
-                  (recur [[where::file (inc! where::line) 0]
-                          (inc! offset)
-                          source-code]))
-                
+                (recur tracker [(!new-line where) (!inc offset) source-code])
+
+                ## Form
                 (^ (char (~~ (static ..open-form))))
-                (read-form read-code' )
+                (read-form read-code' tracker )
 
+                ## Tuple
                 (^ (char (~~ (static ..open-tuple))))
-                (read-tuple read-code' )
+                (read-tuple read-code' tracker )
 
+                ## Text
                 (^ (char (~~ (static ..text-delimiter))))
-                (read-text )
+                (read-text tracker )
 
+                ## Special code
                 (^ (char (~~ (static ..sigil))))
-                (case ("lux text char" source-code (inc! offset))
-                  (#.Some next)
-                  (case next
-                    (^template [ ]
-                      (^ (char ))
-                      (#error.Success [[(update@ #.column (|>> leap-bit!) where)
-                                        (leap-bit! offset)
-                                        source-code]
-                                       [where (#.Bit )]]))
-                    (["0" #0]
-                     ["1" #1])
-
+                (let [offset' (!inc offset)]
+                  (case ("lux text char" source-code offset')
+                    (#.Some next)
+                    (case next
+                      (^template [ ]
+                        (^ (char ))
+                        (#error.Success [tracker
+                                         [(update@ #.column (|>> !leap-bit) where)
+                                          (!leap-bit offset)
+                                          source-code]
+                                         [where (#.Bit )]]))
+                      (["0" #0]
+                       ["1" #1])
+
+                      ## Single-line comment
+                      (^ (char (~~ (static ..sigil))))
+                      (case ("lux text index" source-code (static ..new-line) offset')
+                        (#.Some end)
+                        (recur tracker [(!new-line where) (!inc end) source-code])
+                        
+                        _
+                        )
+
+                      _
+                      (cond (!name-char?|head next) ## Tag
+                            (!read-name offset tracker  where #.Tag)
+
+                            ## else
+                            (ex.throw unrecognized-input where)))
+                    
                     _
-                    (ex.throw unrecognized-input where))
-                  
-                  _
-                  (ex.throw end-of-file current-module))
+                    (ex.throw end-of-file current-module)))
 
                 _
-                (cond (digit? char)
-                      (read-nat offset )
+                (cond (!digit? current) ## Natural number
+                      (read-nat offset tracker )
 
-                      (name-char? char)
-                      (read-name offset )
+                      ## Identifier
+                      (!name-char?|head current)
+                      (!read-name offset tracker  where #.Identifier)
                       
                       ## else
                       (ex.throw unrecognized-input where))))
@@ -813,7 +863,14 @@
           (ex.throw end-of-file current-module))))))
 
 ## [where offset source-code]
-(def: #export read Reader read-code)
+(def: #export (read current-module aliases source)
+  (-> Text Aliases Source (Error [Source Code]))
+  (case (read-code current-module aliases fresh-tracker source)
+    (#error.Error error)
+    (#error.Error error)
+
+    (#error.Success [tracker' source' output])
+    (#error.Success [source' output])))
 
 ## (def: #export (read current-module aliases source)
 ##   (-> Text Aliases Source (Error [Source Code]))
-- 
cgit v1.2.3