Trying to minimize the amount of clips+concats that happen.

author: Eduardo Julian 2018-08-20 21:51:07 -0400
committer: Eduardo Julian 2018-08-20 21:51:07 -0400
commit: ec1f6caacb47ca6d06f829a95a9b692d004b48a2 (patch)
tree: bb1721daac34b2aab6f5bf71def6080bb45f16e0
parent: 58c299b90fbb3a20cf4e624fd20e4bb7f5846672 (diff)
2 files changed, 162 insertions, 72 deletions
diff --git a/stdlib/source/lux/compiler/default/syntax.lux b/stdlib/source/lux/compiler/default/syntax.lux
index 5f2d6d93b..322035fd8 100644
--- a/stdlib/source/lux/compiler/default/syntax.lux
+++ b/stdlib/source/lux/compiler/default/syntax.lux
@@ -55,20 +55,19 @@
 ## It operates recursively in order to produce the longest continuous
 ## chunk of white-space.
 (def: (space^ where)
-  (-> Cursor (Lexer [Cursor Text]))
+  (-> Cursor (Lexer [Cursor Any]))
   (p.either (do p.Monad<Parser>
-              [content (l.many (l.one-of white-space))]
-              (wrap [(update@ #.column (n/+ (text.size content)) where)
-                     content]))
+              [content (l.many! (l.one-of! white-space))]
+              (wrap [(update@ #.column (n/+ (get@ #l.distance content)) where)
+                     []]))
             ## New-lines must be handled as a separate case to ensure line
             ## information is handled properly.
             (do p.Monad<Parser>
-              [content (l.many (l.one-of new-line))]
+              [content (l.many! (l.one-of! new-line))]
               (wrap [(|> where
-                         (update@ #.line (n/+ (text.size content)))
+                         (update@ #.line (n/+ (get@ #l.distance content)))
                          (set@ #.column 0))
-                     content]))
-            ))
+                     []]))))
 
 ## Single-line comments can start anywhere, but only go up to the
 ## next new-line.
@@ -76,7 +75,7 @@
   (-> Cursor (Lexer [Cursor Text]))
   (do p.Monad<Parser>
     [_ (l.this "##")
-     comment (l.some (l.none-of new-line))
+     comment (l.slice (l.some! (l.none-of! new-line)))
      _ (l.this new-line)]
     (wrap [(|> where
                (update@ #.line inc)
@@ -157,7 +156,7 @@
         [[where comment] (comment^ where)]
         (left-padding^ where))
       (do p.Monad<Parser>
-        [[where white-space] (space^ where)]
+        [[where _] (space^ where)]
         (left-padding^ where))
       (:: p.Monad<Parser> wrap where)))
 
@@ -365,7 +364,7 @@
                                          ## the text's body's column,
                                          ## to ensure they are aligned.
                                          (do @
-                                           [offset (l.many (l.one-of " "))
+                                           [offset (l.slice (l.many! (l.one-of! " ")))
                                             #let [offset-size (text.size offset)]]
                                            (if (n/>= offset-column offset-size)
                                              ## Any extra offset
@@ -385,7 +384,7 @@
                                          ($_ p.either
                                              ## Normal text characters.
                                              (do @
-                                               [normal (l.many (l.none-of "\\\"\n"))]
+                                               [normal (l.slice (l.many! (l.none-of! "\\\"\n")))]
                                                (recur (format text-read normal)
                                                       (|> where
                                                           (update@ #.column (n/+ (text.size normal))))
@@ -512,8 +511,8 @@
     [#let [digits "0123456789"
            delimiters (format "()[]{}#\"" name-separator)
            space (format white-space new-line)
-           head-lexer (l.none-of (format digits delimiters space))
-           tail-lexer (l.some (l.none-of (format delimiters space)))]
+           head-lexer (l.slice (l.none-of! (format digits delimiters space)))
+           tail-lexer (l.slice (l.some! (l.none-of! (format delimiters space))))]
      head head-lexer
      tail tail-lexer]
     (wrap (format head tail))))
diff --git a/stdlib/source/lux/data/text/lexer.lux b/stdlib/source/lux/data/text/lexer.lux
index 481d17b0a..3b4b63a26 100644
--- a/stdlib/source/lux/data/text/lexer.lux
+++ b/stdlib/source/lux/data/text/lexer.lux
@@ -9,7 +9,7 @@
     ["." maybe]
     ["e" error]
     [collection
-     ["." list]]]
+     ["." list ("list/." Fold<List>)]]]
    [macro
     ["." code]]])
 
@@ -20,6 +20,10 @@
 (type: #export Lexer
   (p.Parser [Offset Text]))
 
+(type: #export Slice
+  {#basis Offset
+   #distance Offset})
+
 (def: (remaining offset tape)
   (-> Offset Text Text)
   (|> tape (text.split offset) maybe.assume product.right))
@@ -42,6 +46,23 @@
       (#e.Error (unconsumed-input-error end-offset input)))
     ))
 
+(def: #export offset
+  (Lexer Offset)
+  (function (_ (^@ input [offset tape]))
+    (#e.Success [input offset])))
+
+(def: (with-slices lexer)
+  (-> (Lexer (List Slice)) (Lexer Slice))
+  (do p.Monad<Parser>
+    [offset ..offset
+     slices lexer]
+    (wrap (list/fold (function (_ [slice::basis slice::distance]
+                                  [total::basis total::distance])
+                       [total::basis (n/+ slice::distance total::distance)])
+                     {#basis offset
+                      #distance 0}
+                     slices))))
+
 (def: #export any
   {#.doc "Just returns the next character without applying any logic."}
   (Lexer Text)
@@ -51,19 +72,31 @@
       (#e.Success [[(inc offset) tape] (text.from-code output)])
 
       _
-      (#e.Error cannot-lex-error))
-    ))
+      (#e.Error cannot-lex-error))))
 
-(def: #export (not p)
-  {#.doc "Produce a character if the lexer fails."}
-  (All [a] (-> (Lexer a) (Lexer Text)))
-  (function (_ input)
-    (case (p input)
-      (#e.Error msg)
-      (any input)
-      
-      _
-      (#e.Error "Expected to fail; yet succeeded."))))
+(def: #export any!
+  {#.doc "Just returns the next character without applying any logic."}
+  (Lexer Slice)
+  (function (_ [offset tape])
+    (#e.Success [[(inc offset) tape]
+                 {#basis offset
+                  #distance 1}])))
+
+(do-template [<name> <type> <any>]
+  [(def: #export (<name> p)
+     {#.doc "Produce a character if the lexer fails."}
+     (All [a] (-> (Lexer a) (Lexer <type>)))
+     (function (_ input)
+       (case (p input)
+         (#e.Error msg)
+         (<any> input)
+         
+         _
+         (#e.Error "Expected to fail; yet succeeded."))))]
+
+  [not  Text  ..any]
+  [not! Slice ..any!]
+  )
 
 (def: #export (this reference)
   {#.doc "Lex a text if it matches the given sample."}
@@ -112,8 +145,7 @@
       (#e.Success [input (text.from-code output)])
 
       _
-      (#e.Error cannot-lex-error))
-    ))
+      (#e.Error cannot-lex-error))))
 
 (def: #export get-input
   {#.doc "Get all of the remaining input (without consuming it)."}
@@ -162,33 +194,49 @@
       (range (char "a") (char "f"))
       (range (char "A") (char "F"))))
 
-(def: #export (one-of options)
-  {#.doc "Only lex characters that are part of a piece of text."}
-  (-> Text (Lexer Text))
-  (function (_ [offset tape])
-    (case (text.nth offset tape)
-      (#.Some output)
-      (let [output (text.from-code output)]
-        (if (text.contains? output options)
-          (#e.Success [[(inc offset) tape] output])
-          (#e.Error ($_ text/compose "Character (" output ") is not one of: " options))))
-
-      _
-      (#e.Error cannot-lex-error))))
-
-(def: #export (none-of options)
-  {#.doc "Only lex characters that are not part of a piece of text."}
-  (-> Text (Lexer Text))
-  (function (_ [offset tape])
-    (case (text.nth offset tape)
-      (#.Some output)
-      (let [output (text.from-code output)]
-        (if (.not (text.contains? output options))
-          (#e.Success [[(inc offset) tape] output])
-          (#e.Error ($_ text/compose "Character (" output ") is one of: " options))))
+(do-template [<name> <description-modifier> <modifier>]
+  [(def: #export (<name> options)
+     {#.doc (code.text ($_ text/compose "Only lex characters that are" <description-modifier> " part of a piece of text."))}
+     (-> Text (Lexer Text))
+     (function (_ [offset tape])
+       (case (text.nth offset tape)
+         (#.Some output)
+         (let [output (text.from-code output)]
+           (if (<modifier> (text.contains? output options))
+             (#e.Success [[(inc offset) tape] output])
+             (#e.Error ($_ text/compose "Character (" output
+                           ") is should " <description-modifier>
+                           "be one of: " options))))
+
+         _
+         (#e.Error cannot-lex-error))))]
+
+  [one-of  ""     |>]
+  [none-of " not" .not]
+  )
 
-      _
-      (#e.Error cannot-lex-error))))
+(do-template [<name> <description-modifier> <modifier>]
+  [(def: #export (<name> options)
+     {#.doc (code.text ($_ text/compose "Only lex characters that are" <description-modifier> " part of a piece of text."))}
+     (-> Text (Lexer Slice))
+     (function (_ [offset tape])
+       (case (text.nth offset tape)
+         (#.Some output)
+         (let [output (text.from-code output)]
+           (if (<modifier> (text.contains? output options))
+             (#e.Success [[(inc offset) tape]
+                          {#basis offset
+                           #distance 1}])
+             (#e.Error ($_ text/compose "Character (" output
+                           ") is should " <description-modifier>
+                           "be one of: " options))))
+
+         _
+         (#e.Error cannot-lex-error))))]
+
+  [one-of!  ""     |>]
+  [none-of! " not" .not]
+  )
 
 (def: #export (satisfies p)
   {#.doc "Only lex characters that satisfy a predicate."}
@@ -215,33 +263,64 @@
      =right right]
     (wrap ($_ text/compose =left =right))))
 
-(do-template [<name> <base> <doc>]
-  [(def: #export (<name> p)
-     {#.doc <doc>}
+(def: #export (and! left right)
+  (-> (Lexer Slice) (Lexer Slice) (Lexer Slice))
+  (do p.Monad<Parser>
+    [[left::basis left::distance] left
+     [right::basis right::distance] right]
+    (wrap [left::basis (n/+ right::distance right::distance)])))
+
+(do-template [<name> <base> <doc-modifier>]
+  [(def: #export (<name> lexer)
+     {#.doc (code.text ($_ text/compose "Lex " <doc-modifier> " characters as a single continuous text."))}
      (-> (Lexer Text) (Lexer Text))
-     (|> p <base> (:: p.Monad<Parser> map text.concat)))]
+     (|> lexer <base> (:: p.Monad<Parser> map text.concat)))]
 
-  [some p.some "Lex some characters as a single continuous text."]
-  [many p.many "Lex many characters as a single continuous text."]
+  [some p.some "some"]
+  [many p.many "many"]
   )
 
-(do-template [<name> <base> <doc>]
-  [(def: #export (<name> n p)
-     {#.doc <doc>}
+(do-template [<name> <base> <doc-modifier>]
+  [(def: #export (<name> lexer)
+     {#.doc (code.text ($_ text/compose "Lex " <doc-modifier> " characters as a single continuous text."))}
+     (-> (Lexer Slice) (Lexer Slice))
+     (with-slices (<base> lexer)))]
+
+  [some! p.some "some"]
+  [many! p.many "many"]
+  )
+
+(do-template [<name> <base> <doc-modifier>]
+  [(def: #export (<name> amount lexer)
+     {#.doc (code.text ($_ text/compose "Lex " <doc-modifier> " N characters."))}
      (-> Nat (Lexer Text) (Lexer Text))
-     (do p.Monad<Parser>
-       []
-       (|> p (<base> n) (:: @ map text.concat))))]
+     (|> lexer (<base> amount) (:: p.Monad<Parser> map text.concat)))]
+
+  [exactly  p.exactly  "exactly"]
+  [at-most  p.at-most  "at most"]
+  [at-least p.at-least "at least"]
+  )
 
-  [exactly  p.exactly  "Lex exactly N characters."]
-  [at-most  p.at-most  "Lex at most N characters."]
-  [at-least p.at-least "Lex at least N characters."]
+(do-template [<name> <base> <doc-modifier>]
+  [(def: #export (<name> amount lexer)
+     {#.doc (code.text ($_ text/compose "Lex " <doc-modifier> " N characters."))}
+     (-> Nat (Lexer Slice) (Lexer Slice))
+     (with-slices (<base> amount lexer)))]
+
+  [exactly!  p.exactly  "exactly"]
+  [at-most!  p.at-most  "at most"]
+  [at-least! p.at-least "at least"]
   )
 
-(def: #export (between from to p)
+(def: #export (between from to lexer)
   {#.doc "Lex between N and M characters."}
   (-> Nat Nat (Lexer Text) (Lexer Text))
-  (|> p (p.between from to) (:: p.Monad<Parser> map text.concat)))
+  (|> lexer (p.between from to) (:: p.Monad<Parser> map text.concat)))
+
+(def: #export (between! from to lexer)
+  {#.doc "Lex between N and M characters."}
+  (-> Nat Nat (Lexer Slice) (Lexer Slice))
+  (with-slices (p.between from to lexer)))
 
 (def: #export (enclosed [start end] lexer)
   (All [a] (-> [Text Text] (Lexer a) (Lexer a)))
@@ -259,3 +338,15 @@
 
       (#e.Success value)
       (#e.Success [real-input value]))))
+
+(def: #export (slice lexer)
+  (-> (Lexer Slice) (Lexer Text))
+  (do p.Monad<Parser>
+    [[basis distance] lexer]
+    (function (_ (^@ input [offset tape]))
+      (case (text.clip basis (n/+ basis distance) tape)
+        (#.Some output)
+        (#e.Success [input output])
+
+        #.None
+        (#e.Error "Cannot slice.")))))
author	Eduardo Julian	2018-08-20 21:51:07 -0400
committer	Eduardo Julian	2018-08-20 21:51:07 -0400
commit	ec1f6caacb47ca6d06f829a95a9b692d004b48a2 (patch)
tree	bb1721daac34b2aab6f5bf71def6080bb45f16e0
parent	58c299b90fbb3a20cf4e624fd20e4bb7f5846672 (diff)