From 81a04b5935cd755efb4096d5e3f347cd0bb6cdef Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Tue, 21 Aug 2018 00:35:28 -0400 Subject: Minor optimizations + timing. --- stdlib/source/lux/compiler/default/syntax.lux | 205 +++++++++++++++----------- stdlib/source/lux/data/text/lexer.lux | 22 +-- 2 files changed, 135 insertions(+), 92 deletions(-) diff --git a/stdlib/source/lux/compiler/default/syntax.lux b/stdlib/source/lux/compiler/default/syntax.lux index 50c02c11d..21b142ec0 100644 --- a/stdlib/source/lux/compiler/default/syntax.lux +++ b/stdlib/source/lux/compiler/default/syntax.lux @@ -39,9 +39,30 @@ ["l" lexer (#+ Lexer)] format] [collection - ["." row (#+ Row)] ["." dictionary (#+ Dictionary)]]] - ["." function]]) + ["." function] + ["." io] + [time + ["." instant] + ["." duration]]]) + +(type: #export Syntax + (-> Cursor (Lexer [Cursor Code]))) + +(def: #export (timed description lexer) + (All [a] + (-> Text (Lexer [Cursor Code]) (Lexer [Cursor Code]))) + (do p.Monad + [_ (wrap []) + #let [pre (io.run instant.now)] + [where output] lexer + #let [_ (log! (|> instant.now + io.run + instant.relative + (duration.difference (instant.relative pre)) + %duration + (format (%code output) " [" description "]: ")))]] + (wrap [where output]))) (type: #export Aliases (Dictionary Text Text)) (def: #export no-aliases Aliases (dictionary.new text.Hash)) @@ -56,6 +77,15 @@ (def: text-delimiter "\"") (def: text-delimiter^ (l.this text-delimiter)) +(def: open-form "(") +(def: close-form ")") + +(def: open-tuple "[") +(def: close-tuple "]") + +(def: open-record "{") +(def: close-record "}") + (def: escape "\\") (def: sigil "#") @@ -94,8 +124,8 @@ ## This is just a helper parser to find text which doesn't run into ## any special character sequences for multi-line comments. -(def: multi-line-comment-start^ (l.this (format ..sigil "("))) -(def: multi-line-comment-end^ (l.this (format ")" ..sigil))) +(def: multi-line-comment-start^ (l.this (format ..sigil open-form))) +(def: multi-line-comment-end^ (l.this (format close-form ..sigil))) (def: multi-line-comment-bound^ (Lexer Any) @@ -220,7 +250,7 @@ (do-template [ ] [(def: #export ( where) - (-> Cursor (Lexer [Cursor Code])) + Syntax (do p.Monad [chunk ] (case (:: decode chunk) @@ -242,7 +272,7 @@ ) (def: #export (nat where) - (-> Cursor (Lexer [Cursor Code])) + Syntax (do p.Monad [chunk rich-digits^] (case (:: number.Codec decode chunk) @@ -254,7 +284,7 @@ [where (#.Nat value)]])))) (def: #export (frac where) - (-> Cursor (Lexer [Cursor Code])) + Syntax (do p.Monad [chunk ($_ l.and sign^ @@ -277,7 +307,7 @@ ## This parser looks so complex because text in Lux can be multi-line ## and there are rules regarding how this is handled. (def: #export (text where) - (-> Cursor (Lexer [Cursor Code])) + Syntax (do p.Monad [## Lux text "is delimited by double-quotes", as usual in most ## programming languages. @@ -289,7 +319,7 @@ ## as many spaces as necessary to be column-aligned. ## This helps ensure that the formatting on the text in the ## source-code matches the formatting of the Text value. - #let [offset-column (inc (get@ #.column where))] + #let [offset (inc (get@ #.column where))] [where' text-read] (: (Lexer [Cursor Text]) ## I must keep track of how much of the ## text body has been read, how far the @@ -308,9 +338,9 @@ ## the text's body's column, ## to ensure they are aligned. (do @ - [_ (l.exactly! offset-column (l.one-of! " "))] + [_ (p.exactly offset (l.this " "))] (recur text-read - (update@ #.column (n/+ offset-column) where) + (update@ #.column (n/+ offset) where) #0)) ($_ p.either ## Normal text characters. @@ -347,38 +377,43 @@ (wrap [where' [where (#.Text text-read)]]))) +(def: (composite open close element) + (All [a] + (-> Text Text + (-> Cursor (Lexer [Cursor a])) + (-> Cursor (Lexer [Cursor (List a)])))) + (let [open^ (l.this open) + close^ (l.this close)] + (function (_ where) + (do p.Monad + [_ open^] + (loop [where where] + (p.either (do @ + [## Must update the cursor as I + ## go along, to keep things accurate. + [where' head] (element where)] + (parser/map (product.both id (|>> (#.Cons head))) + (recur where'))) + (do @ + [## Must take into account any + ## padding present before the + ## end-delimiter. + where' (left-padding^ where) + _ close^] + (wrap [(update@ #.column inc where') + #.Nil])))))))) + ## Form and tuple syntax is mostly the same, differing only in the ## delimiters involved. ## They may have an arbitrary number of arbitrary Code nodes as elements. (do-template [ ] - [(def: ( where ast) - (-> Cursor - (-> Cursor (Lexer [Cursor Code])) - (Lexer [Cursor Code])) - (do p.Monad - [_ (l.this ) - [where' elems] (loop [elems (: (Row Code) - row.empty) - where where] - (p.either (do @ - [## Must update the cursor as I - ## go along, to keep things accurate. - [where' elem] (ast where)] - (recur (row.add elem elems) - where')) - (do @ - [## Must take into account any - ## padding present before the - ## end-delimiter. - where' (left-padding^ where) - _ (l.this )] - (wrap [(update@ #.column inc where') - (row.to-list elems)]))))] - (wrap [where' - [where ( elems)]])))] - - [form #.Form "(" ")"] - [tuple #.Tuple "[" "]"] + [(def: ( ast where) + (-> Syntax Syntax) + (<| (parser/map (product.both id (|>> [where]))) + (composite ast where)))] + + [form #.Form ..open-form ..close-form] + [tuple #.Tuple ..open-tuple ..close-tuple] ) ## Records are almost (syntactically) the same as forms and tuples, @@ -390,27 +425,16 @@ ## Code node, however, record Code nodes allow any Code node to occupy ## this position, since it may be useful when processing Code syntax in ## macros. -(def: (record where ast) - (-> Cursor - (-> Cursor (Lexer [Cursor Code])) - (Lexer [Cursor Code])) - (do p.Monad - [_ (l.this "{") - [where' elems] (loop [elems (: (Row [Code Code]) - row.empty) - where where] - (p.either (do @ - [[where' key] (ast where) - [where' val] (ast where')] - (recur (row.add [key val] elems) - where')) - (do @ - [where' (left-padding^ where) - _ (l.this "}")] - (wrap [(update@ #.column inc where') - (row.to-list elems)]))))] - (wrap [where' - [where (#.Record elems)]]))) +(def: (record ast where) + (-> Syntax Syntax) + (<| (parser/map (product.both id (|>> #.Record [where]))) + (composite ..open-record ..close-record + (function (_ where') + (do p.Monad + [[where' key] (ast where') + [where' val] (ast where')] + (wrap [where' [key val]]))) + where))) ## The parts of an name are separated by a single mark. ## E.g. module.short. @@ -436,8 +460,13 @@ ## a digit, to avoid confusion with regards to numbers. (def: name-part^ (Lexer Text) - (let [delimiters (format "()[]{}" ..sigil ..text-delimiter ..name-separator) - space (format white-space new-line) + (let [delimiters (format ..open-form ..close-form + ..open-tuple ..close-tuple + ..open-record ..close-record + ..sigil + ..text-delimiter + ..name-separator) + space (format ..white-space ..new-line) head (l.none-of! (format ..digits delimiters space)) tail (l.some! (l.none-of! (format delimiters space)))] (l.slice (l.and! head tail)))) @@ -491,13 +520,14 @@ (text.size first-part)]))))) (do-template [
  ]
-  [(def: #export ( current-module aliases where)
-     (-> Text Aliases Cursor (Lexer [Cursor Code]))
-     (do p.Monad
-       [[value length] (<| 
-                           (name^ current-module aliases))]
-       (wrap [(update@ #.column (n/+ ) where)
-              [where ( value)]])))]
+  [(def: #export ( current-module aliases)
+     (-> Text Aliases Syntax)
+     (function (_ where)
+       (do p.Monad
+         [[value length] (<| 
+                             (name^ current-module aliases))]
+         (wrap [(update@ #.column (n/+ ) where)
+                [where ( value)]]))))]
 
   [tag        (p.after (l.this ..sigil)) #.Tag        ("lux i64 +" 1 length)]
   [identifier (|>)                       #.Identifier length]
@@ -513,7 +543,7 @@
   )
 
 (def: #export (bit where)
-  (-> Cursor (Lexer [Cursor Code]))
+  Syntax
   (do p.Monad
     [value (p.either ..false ..true)]
     (wrap [(update@ #.column (n/+ 2) where)
@@ -528,22 +558,33 @@
              ["Column" (%n column)]))
 
 (def: (ast current-module aliases)
-  (-> Text Aliases Cursor (Lexer [Cursor Code]))
+  (-> Text Aliases Syntax)
   (function (ast' where)
     (do p.Monad
       [where (left-padding^ where)]
       ($_ p.either
-          (..bit where)
-          (..nat where)
-          (..frac where)
-          (..rev where)
-          (..int where)
-          (..text where)
-          (..identifier current-module aliases where)
-          (..tag current-module aliases where)
-          (..form where ast')
-          (..tuple where ast')
-          (..record where ast')
+          (<| (..timed "bit")
+              (..bit where))
+          (<| (..timed "nat")
+              (..nat where))
+          (<| (..timed "frac")
+              (..frac where))
+          (<| (..timed "rev")
+              (..rev where))
+          (<| (..timed "int")
+              (..int where))
+          (<| (..timed "text")
+              (..text where))
+          (<| (..timed "identifier")
+              (..identifier current-module aliases where))
+          (<| (..timed "tag")
+              (..tag current-module aliases where))
+          (<| (..timed "form")
+              (..form ast' where))
+          (<| (..timed "tuple")
+              (..tuple ast' where))
+          (<| (..timed "record")
+              (..record ast' where))
           (do @
             [end? l.end?]
             (if end?
diff --git a/stdlib/source/lux/data/text/lexer.lux b/stdlib/source/lux/data/text/lexer.lux
index 677810eb8..e6186aea8 100644
--- a/stdlib/source/lux/data/text/lexer.lux
+++ b/stdlib/source/lux/data/text/lexer.lux
@@ -58,7 +58,7 @@
      slices lexer]
     (wrap (list/fold (function (_ [slice::basis slice::distance]
                                   [total::basis total::distance])
-                       [total::basis (n/+ slice::distance total::distance)])
+                       [total::basis ("lux i64 +" slice::distance total::distance)])
                      {#basis offset
                       #distance 0}
                      slices))))
@@ -69,7 +69,7 @@
   (function (_ [offset tape])
     (case (//.nth offset tape)
       (#.Some output)
-      (#e.Success [[(inc offset) tape] (//.from-code output)])
+      (#e.Success [[("lux i64 +" 1 offset) tape] (//.from-code output)])
 
       _
       (#e.Error cannot-lex-error))))
@@ -78,7 +78,7 @@
   {#.doc "Just returns the next character without applying any logic."}
   (Lexer Slice)
   (function (_ [offset tape])
-    (#e.Success [[(inc offset) tape]
+    (#e.Success [[("lux i64 +" 1 offset) tape]
                  {#basis offset
                   #distance 1}])))
 
@@ -105,7 +105,8 @@
     (case (//.index-of' reference offset tape)
       (#.Some where)
       (if (n/= offset where)
-        (#e.Success [[(n/+ (//.size reference) offset) tape] []])
+        (#e.Success [[("lux i64 +" (//.size reference) offset) tape]
+                     []])
         (#e.Error ($_ text/compose "Could not match: " (//.encode reference) " @ " (maybe.assume (//.clip' offset tape)))))
 
       _
@@ -117,7 +118,8 @@
   (function (_ (^@ input [offset tape]))
     (case (//.index-of' reference offset tape)
       (^multi (#.Some where) (n/= offset where))
-      (#e.Success [[(n/+ (//.size reference) offset) tape] #1])
+      (#e.Success [[("lux i64 +" (//.size reference) offset) tape]
+                   #1])
 
       _
       (#e.Success [input #0]))))
@@ -203,7 +205,7 @@
          (#.Some output)
          (let [output (//.from-code output)]
            (if ( (//.contains? output options))
-             (#e.Success [[(inc offset) tape] output])
+             (#e.Success [[("lux i64 +" 1 offset) tape] output])
              (#e.Error ($_ text/compose "Character (" output
                            ") is should " 
                            "be one of: " options))))
@@ -224,7 +226,7 @@
          (#.Some output)
          (let [output (//.from-code output)]
            (if ( (//.contains? output options))
-             (#e.Success [[(inc offset) tape]
+             (#e.Success [[("lux i64 +" 1 offset) tape]
                           {#basis offset
                            #distance 1}])
              (#e.Error ($_ text/compose "Character (" output
@@ -245,7 +247,7 @@
     (case (//.nth offset tape)
       (#.Some output)
       (if (p output)
-        (#e.Success [[(inc offset) tape] (//.from-code output)])
+        (#e.Success [[("lux i64 +" 1 offset) tape] (//.from-code output)])
         (#e.Error ($_ text/compose "Character does not satisfy predicate: " (//.from-code output))))
 
       _
@@ -268,7 +270,7 @@
   (do p.Monad
     [[left::basis left::distance] left
      [right::basis right::distance] right]
-    (wrap [left::basis (n/+ left::distance right::distance)])))
+    (wrap [left::basis ("lux i64 +" left::distance right::distance)])))
 
 (do-template [  ]
   [(def: #export ( lexer)
@@ -344,7 +346,7 @@
   (do p.Monad
     [[basis distance] lexer]
     (function (_ (^@ input [offset tape]))
-      (case (//.clip basis (n/+ basis distance) tape)
+      (case (//.clip basis ("lux i64 +" basis distance) tape)
         (#.Some output)
         (#e.Success [input output])
 
-- 
cgit v1.2.3