From ec1f6caacb47ca6d06f829a95a9b692d004b48a2 Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Mon, 20 Aug 2018 21:51:07 -0400 Subject: Trying to minimize the amount of clips+concats that happen. --- stdlib/source/lux/compiler/default/syntax.lux | 27 ++-- stdlib/source/lux/data/text/lexer.lux | 207 ++++++++++++++++++-------- 2 files changed, 162 insertions(+), 72 deletions(-) diff --git a/stdlib/source/lux/compiler/default/syntax.lux b/stdlib/source/lux/compiler/default/syntax.lux index 5f2d6d93b..322035fd8 100644 --- a/stdlib/source/lux/compiler/default/syntax.lux +++ b/stdlib/source/lux/compiler/default/syntax.lux @@ -55,20 +55,19 @@ ## It operates recursively in order to produce the longest continuous ## chunk of white-space. (def: (space^ where) - (-> Cursor (Lexer [Cursor Text])) + (-> Cursor (Lexer [Cursor Any])) (p.either (do p.Monad - [content (l.many (l.one-of white-space))] - (wrap [(update@ #.column (n/+ (text.size content)) where) - content])) + [content (l.many! (l.one-of! white-space))] + (wrap [(update@ #.column (n/+ (get@ #l.distance content)) where) + []])) ## New-lines must be handled as a separate case to ensure line ## information is handled properly. (do p.Monad - [content (l.many (l.one-of new-line))] + [content (l.many! (l.one-of! new-line))] (wrap [(|> where - (update@ #.line (n/+ (text.size content))) + (update@ #.line (n/+ (get@ #l.distance content))) (set@ #.column 0)) - content])) - )) + []])))) ## Single-line comments can start anywhere, but only go up to the ## next new-line. @@ -76,7 +75,7 @@ (-> Cursor (Lexer [Cursor Text])) (do p.Monad [_ (l.this "##") - comment (l.some (l.none-of new-line)) + comment (l.slice (l.some! (l.none-of! new-line))) _ (l.this new-line)] (wrap [(|> where (update@ #.line inc) @@ -157,7 +156,7 @@ [[where comment] (comment^ where)] (left-padding^ where)) (do p.Monad - [[where white-space] (space^ where)] + [[where _] (space^ where)] (left-padding^ where)) (:: p.Monad wrap where))) @@ -365,7 +364,7 @@ ## the text's body's column, ## to ensure they are aligned. (do @ - [offset (l.many (l.one-of " ")) + [offset (l.slice (l.many! (l.one-of! " "))) #let [offset-size (text.size offset)]] (if (n/>= offset-column offset-size) ## Any extra offset @@ -385,7 +384,7 @@ ($_ p.either ## Normal text characters. (do @ - [normal (l.many (l.none-of "\\\"\n"))] + [normal (l.slice (l.many! (l.none-of! "\\\"\n")))] (recur (format text-read normal) (|> where (update@ #.column (n/+ (text.size normal)))) @@ -512,8 +511,8 @@ [#let [digits "0123456789" delimiters (format "()[]{}#\"" name-separator) space (format white-space new-line) - head-lexer (l.none-of (format digits delimiters space)) - tail-lexer (l.some (l.none-of (format delimiters space)))] + head-lexer (l.slice (l.none-of! (format digits delimiters space))) + tail-lexer (l.slice (l.some! (l.none-of! (format delimiters space))))] head head-lexer tail tail-lexer] (wrap (format head tail)))) diff --git a/stdlib/source/lux/data/text/lexer.lux b/stdlib/source/lux/data/text/lexer.lux index 481d17b0a..3b4b63a26 100644 --- a/stdlib/source/lux/data/text/lexer.lux +++ b/stdlib/source/lux/data/text/lexer.lux @@ -9,7 +9,7 @@ ["." maybe] ["e" error] [collection - ["." list]]] + ["." list ("list/." Fold)]]] [macro ["." code]]]) @@ -20,6 +20,10 @@ (type: #export Lexer (p.Parser [Offset Text])) +(type: #export Slice + {#basis Offset + #distance Offset}) + (def: (remaining offset tape) (-> Offset Text Text) (|> tape (text.split offset) maybe.assume product.right)) @@ -42,6 +46,23 @@ (#e.Error (unconsumed-input-error end-offset input))) )) +(def: #export offset + (Lexer Offset) + (function (_ (^@ input [offset tape])) + (#e.Success [input offset]))) + +(def: (with-slices lexer) + (-> (Lexer (List Slice)) (Lexer Slice)) + (do p.Monad + [offset ..offset + slices lexer] + (wrap (list/fold (function (_ [slice::basis slice::distance] + [total::basis total::distance]) + [total::basis (n/+ slice::distance total::distance)]) + {#basis offset + #distance 0} + slices)))) + (def: #export any {#.doc "Just returns the next character without applying any logic."} (Lexer Text) @@ -51,19 +72,31 @@ (#e.Success [[(inc offset) tape] (text.from-code output)]) _ - (#e.Error cannot-lex-error)) - )) + (#e.Error cannot-lex-error)))) -(def: #export (not p) - {#.doc "Produce a character if the lexer fails."} - (All [a] (-> (Lexer a) (Lexer Text))) - (function (_ input) - (case (p input) - (#e.Error msg) - (any input) - - _ - (#e.Error "Expected to fail; yet succeeded.")))) +(def: #export any! + {#.doc "Just returns the next character without applying any logic."} + (Lexer Slice) + (function (_ [offset tape]) + (#e.Success [[(inc offset) tape] + {#basis offset + #distance 1}]))) + +(do-template [ ] + [(def: #export ( p) + {#.doc "Produce a character if the lexer fails."} + (All [a] (-> (Lexer a) (Lexer ))) + (function (_ input) + (case (p input) + (#e.Error msg) + ( input) + + _ + (#e.Error "Expected to fail; yet succeeded."))))] + + [not Text ..any] + [not! Slice ..any!] + ) (def: #export (this reference) {#.doc "Lex a text if it matches the given sample."} @@ -112,8 +145,7 @@ (#e.Success [input (text.from-code output)]) _ - (#e.Error cannot-lex-error)) - )) + (#e.Error cannot-lex-error)))) (def: #export get-input {#.doc "Get all of the remaining input (without consuming it)."} @@ -162,33 +194,49 @@ (range (char "a") (char "f")) (range (char "A") (char "F")))) -(def: #export (one-of options) - {#.doc "Only lex characters that are part of a piece of text."} - (-> Text (Lexer Text)) - (function (_ [offset tape]) - (case (text.nth offset tape) - (#.Some output) - (let [output (text.from-code output)] - (if (text.contains? output options) - (#e.Success [[(inc offset) tape] output]) - (#e.Error ($_ text/compose "Character (" output ") is not one of: " options)))) - - _ - (#e.Error cannot-lex-error)))) - -(def: #export (none-of options) - {#.doc "Only lex characters that are not part of a piece of text."} - (-> Text (Lexer Text)) - (function (_ [offset tape]) - (case (text.nth offset tape) - (#.Some output) - (let [output (text.from-code output)] - (if (.not (text.contains? output options)) - (#e.Success [[(inc offset) tape] output]) - (#e.Error ($_ text/compose "Character (" output ") is one of: " options)))) +(do-template [ ] + [(def: #export ( options) + {#.doc (code.text ($_ text/compose "Only lex characters that are" " part of a piece of text."))} + (-> Text (Lexer Text)) + (function (_ [offset tape]) + (case (text.nth offset tape) + (#.Some output) + (let [output (text.from-code output)] + (if ( (text.contains? output options)) + (#e.Success [[(inc offset) tape] output]) + (#e.Error ($_ text/compose "Character (" output + ") is should " + "be one of: " options)))) + + _ + (#e.Error cannot-lex-error))))] + + [one-of "" |>] + [none-of " not" .not] + ) - _ - (#e.Error cannot-lex-error)))) +(do-template [ ] + [(def: #export ( options) + {#.doc (code.text ($_ text/compose "Only lex characters that are" " part of a piece of text."))} + (-> Text (Lexer Slice)) + (function (_ [offset tape]) + (case (text.nth offset tape) + (#.Some output) + (let [output (text.from-code output)] + (if ( (text.contains? output options)) + (#e.Success [[(inc offset) tape] + {#basis offset + #distance 1}]) + (#e.Error ($_ text/compose "Character (" output + ") is should " + "be one of: " options)))) + + _ + (#e.Error cannot-lex-error))))] + + [one-of! "" |>] + [none-of! " not" .not] + ) (def: #export (satisfies p) {#.doc "Only lex characters that satisfy a predicate."} @@ -215,33 +263,64 @@ =right right] (wrap ($_ text/compose =left =right)))) -(do-template [ ] - [(def: #export ( p) - {#.doc } +(def: #export (and! left right) + (-> (Lexer Slice) (Lexer Slice) (Lexer Slice)) + (do p.Monad + [[left::basis left::distance] left + [right::basis right::distance] right] + (wrap [left::basis (n/+ right::distance right::distance)]))) + +(do-template [ ] + [(def: #export ( lexer) + {#.doc (code.text ($_ text/compose "Lex " " characters as a single continuous text."))} (-> (Lexer Text) (Lexer Text)) - (|> p (:: p.Monad map text.concat)))] + (|> lexer (:: p.Monad map text.concat)))] - [some p.some "Lex some characters as a single continuous text."] - [many p.many "Lex many characters as a single continuous text."] + [some p.some "some"] + [many p.many "many"] ) -(do-template [ ] - [(def: #export ( n p) - {#.doc } +(do-template [ ] + [(def: #export ( lexer) + {#.doc (code.text ($_ text/compose "Lex " " characters as a single continuous text."))} + (-> (Lexer Slice) (Lexer Slice)) + (with-slices ( lexer)))] + + [some! p.some "some"] + [many! p.many "many"] + ) + +(do-template [ ] + [(def: #export ( amount lexer) + {#.doc (code.text ($_ text/compose "Lex " " N characters."))} (-> Nat (Lexer Text) (Lexer Text)) - (do p.Monad - [] - (|> p ( n) (:: @ map text.concat))))] + (|> lexer ( amount) (:: p.Monad map text.concat)))] + + [exactly p.exactly "exactly"] + [at-most p.at-most "at most"] + [at-least p.at-least "at least"] + ) - [exactly p.exactly "Lex exactly N characters."] - [at-most p.at-most "Lex at most N characters."] - [at-least p.at-least "Lex at least N characters."] +(do-template [ ] + [(def: #export ( amount lexer) + {#.doc (code.text ($_ text/compose "Lex " " N characters."))} + (-> Nat (Lexer Slice) (Lexer Slice)) + (with-slices ( amount lexer)))] + + [exactly! p.exactly "exactly"] + [at-most! p.at-most "at most"] + [at-least! p.at-least "at least"] ) -(def: #export (between from to p) +(def: #export (between from to lexer) {#.doc "Lex between N and M characters."} (-> Nat Nat (Lexer Text) (Lexer Text)) - (|> p (p.between from to) (:: p.Monad map text.concat))) + (|> lexer (p.between from to) (:: p.Monad map text.concat))) + +(def: #export (between! from to lexer) + {#.doc "Lex between N and M characters."} + (-> Nat Nat (Lexer Slice) (Lexer Slice)) + (with-slices (p.between from to lexer))) (def: #export (enclosed [start end] lexer) (All [a] (-> [Text Text] (Lexer a) (Lexer a))) @@ -259,3 +338,15 @@ (#e.Success value) (#e.Success [real-input value])))) + +(def: #export (slice lexer) + (-> (Lexer Slice) (Lexer Text)) + (do p.Monad + [[basis distance] lexer] + (function (_ (^@ input [offset tape])) + (case (text.clip basis (n/+ basis distance) tape) + (#.Some output) + (#e.Success [input output]) + + #.None + (#e.Error "Cannot slice."))))) -- cgit v1.2.3