aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--stdlib/source/lux/compiler/default/syntax.lux27
-rw-r--r--stdlib/source/lux/data/text/lexer.lux207
2 files changed, 162 insertions, 72 deletions
diff --git a/stdlib/source/lux/compiler/default/syntax.lux b/stdlib/source/lux/compiler/default/syntax.lux
index 5f2d6d93b..322035fd8 100644
--- a/stdlib/source/lux/compiler/default/syntax.lux
+++ b/stdlib/source/lux/compiler/default/syntax.lux
@@ -55,20 +55,19 @@
## It operates recursively in order to produce the longest continuous
## chunk of white-space.
(def: (space^ where)
- (-> Cursor (Lexer [Cursor Text]))
+ (-> Cursor (Lexer [Cursor Any]))
(p.either (do p.Monad<Parser>
- [content (l.many (l.one-of white-space))]
- (wrap [(update@ #.column (n/+ (text.size content)) where)
- content]))
+ [content (l.many! (l.one-of! white-space))]
+ (wrap [(update@ #.column (n/+ (get@ #l.distance content)) where)
+ []]))
## New-lines must be handled as a separate case to ensure line
## information is handled properly.
(do p.Monad<Parser>
- [content (l.many (l.one-of new-line))]
+ [content (l.many! (l.one-of! new-line))]
(wrap [(|> where
- (update@ #.line (n/+ (text.size content)))
+ (update@ #.line (n/+ (get@ #l.distance content)))
(set@ #.column 0))
- content]))
- ))
+ []]))))
## Single-line comments can start anywhere, but only go up to the
## next new-line.
@@ -76,7 +75,7 @@
(-> Cursor (Lexer [Cursor Text]))
(do p.Monad<Parser>
[_ (l.this "##")
- comment (l.some (l.none-of new-line))
+ comment (l.slice (l.some! (l.none-of! new-line)))
_ (l.this new-line)]
(wrap [(|> where
(update@ #.line inc)
@@ -157,7 +156,7 @@
[[where comment] (comment^ where)]
(left-padding^ where))
(do p.Monad<Parser>
- [[where white-space] (space^ where)]
+ [[where _] (space^ where)]
(left-padding^ where))
(:: p.Monad<Parser> wrap where)))
@@ -365,7 +364,7 @@
## the text's body's column,
## to ensure they are aligned.
(do @
- [offset (l.many (l.one-of " "))
+ [offset (l.slice (l.many! (l.one-of! " ")))
#let [offset-size (text.size offset)]]
(if (n/>= offset-column offset-size)
## Any extra offset
@@ -385,7 +384,7 @@
($_ p.either
## Normal text characters.
(do @
- [normal (l.many (l.none-of "\\\"\n"))]
+ [normal (l.slice (l.many! (l.none-of! "\\\"\n")))]
(recur (format text-read normal)
(|> where
(update@ #.column (n/+ (text.size normal))))
@@ -512,8 +511,8 @@
[#let [digits "0123456789"
delimiters (format "()[]{}#\"" name-separator)
space (format white-space new-line)
- head-lexer (l.none-of (format digits delimiters space))
- tail-lexer (l.some (l.none-of (format delimiters space)))]
+ head-lexer (l.slice (l.none-of! (format digits delimiters space)))
+ tail-lexer (l.slice (l.some! (l.none-of! (format delimiters space))))]
head head-lexer
tail tail-lexer]
(wrap (format head tail))))
diff --git a/stdlib/source/lux/data/text/lexer.lux b/stdlib/source/lux/data/text/lexer.lux
index 481d17b0a..3b4b63a26 100644
--- a/stdlib/source/lux/data/text/lexer.lux
+++ b/stdlib/source/lux/data/text/lexer.lux
@@ -9,7 +9,7 @@
["." maybe]
["e" error]
[collection
- ["." list]]]
+ ["." list ("list/." Fold<List>)]]]
[macro
["." code]]])
@@ -20,6 +20,10 @@
(type: #export Lexer
(p.Parser [Offset Text]))
+(type: #export Slice
+ {#basis Offset
+ #distance Offset})
+
(def: (remaining offset tape)
(-> Offset Text Text)
(|> tape (text.split offset) maybe.assume product.right))
@@ -42,6 +46,23 @@
(#e.Error (unconsumed-input-error end-offset input)))
))
+(def: #export offset
+ (Lexer Offset)
+ (function (_ (^@ input [offset tape]))
+ (#e.Success [input offset])))
+
+(def: (with-slices lexer)
+ (-> (Lexer (List Slice)) (Lexer Slice))
+ (do p.Monad<Parser>
+ [offset ..offset
+ slices lexer]
+ (wrap (list/fold (function (_ [slice::basis slice::distance]
+ [total::basis total::distance])
+ [total::basis (n/+ slice::distance total::distance)])
+ {#basis offset
+ #distance 0}
+ slices))))
+
(def: #export any
{#.doc "Just returns the next character without applying any logic."}
(Lexer Text)
@@ -51,19 +72,31 @@
(#e.Success [[(inc offset) tape] (text.from-code output)])
_
- (#e.Error cannot-lex-error))
- ))
+ (#e.Error cannot-lex-error))))
-(def: #export (not p)
- {#.doc "Produce a character if the lexer fails."}
- (All [a] (-> (Lexer a) (Lexer Text)))
- (function (_ input)
- (case (p input)
- (#e.Error msg)
- (any input)
-
- _
- (#e.Error "Expected to fail; yet succeeded."))))
+(def: #export any!
+ {#.doc "Just returns the next character without applying any logic."}
+ (Lexer Slice)
+ (function (_ [offset tape])
+ (#e.Success [[(inc offset) tape]
+ {#basis offset
+ #distance 1}])))
+
+(do-template [<name> <type> <any>]
+ [(def: #export (<name> p)
+ {#.doc "Produce a character if the lexer fails."}
+ (All [a] (-> (Lexer a) (Lexer <type>)))
+ (function (_ input)
+ (case (p input)
+ (#e.Error msg)
+ (<any> input)
+
+ _
+ (#e.Error "Expected to fail; yet succeeded."))))]
+
+ [not Text ..any]
+ [not! Slice ..any!]
+ )
(def: #export (this reference)
{#.doc "Lex a text if it matches the given sample."}
@@ -112,8 +145,7 @@
(#e.Success [input (text.from-code output)])
_
- (#e.Error cannot-lex-error))
- ))
+ (#e.Error cannot-lex-error))))
(def: #export get-input
{#.doc "Get all of the remaining input (without consuming it)."}
@@ -162,33 +194,49 @@
(range (char "a") (char "f"))
(range (char "A") (char "F"))))
-(def: #export (one-of options)
- {#.doc "Only lex characters that are part of a piece of text."}
- (-> Text (Lexer Text))
- (function (_ [offset tape])
- (case (text.nth offset tape)
- (#.Some output)
- (let [output (text.from-code output)]
- (if (text.contains? output options)
- (#e.Success [[(inc offset) tape] output])
- (#e.Error ($_ text/compose "Character (" output ") is not one of: " options))))
-
- _
- (#e.Error cannot-lex-error))))
-
-(def: #export (none-of options)
- {#.doc "Only lex characters that are not part of a piece of text."}
- (-> Text (Lexer Text))
- (function (_ [offset tape])
- (case (text.nth offset tape)
- (#.Some output)
- (let [output (text.from-code output)]
- (if (.not (text.contains? output options))
- (#e.Success [[(inc offset) tape] output])
- (#e.Error ($_ text/compose "Character (" output ") is one of: " options))))
+(do-template [<name> <description-modifier> <modifier>]
+ [(def: #export (<name> options)
+ {#.doc (code.text ($_ text/compose "Only lex characters that are" <description-modifier> " part of a piece of text."))}
+ (-> Text (Lexer Text))
+ (function (_ [offset tape])
+ (case (text.nth offset tape)
+ (#.Some output)
+ (let [output (text.from-code output)]
+ (if (<modifier> (text.contains? output options))
+ (#e.Success [[(inc offset) tape] output])
+ (#e.Error ($_ text/compose "Character (" output
+ ") is should " <description-modifier>
+ "be one of: " options))))
+
+ _
+ (#e.Error cannot-lex-error))))]
+
+ [one-of "" |>]
+ [none-of " not" .not]
+ )
- _
- (#e.Error cannot-lex-error))))
+(do-template [<name> <description-modifier> <modifier>]
+ [(def: #export (<name> options)
+ {#.doc (code.text ($_ text/compose "Only lex characters that are" <description-modifier> " part of a piece of text."))}
+ (-> Text (Lexer Slice))
+ (function (_ [offset tape])
+ (case (text.nth offset tape)
+ (#.Some output)
+ (let [output (text.from-code output)]
+ (if (<modifier> (text.contains? output options))
+ (#e.Success [[(inc offset) tape]
+ {#basis offset
+ #distance 1}])
+ (#e.Error ($_ text/compose "Character (" output
+ ") is should " <description-modifier>
+ "be one of: " options))))
+
+ _
+ (#e.Error cannot-lex-error))))]
+
+ [one-of! "" |>]
+ [none-of! " not" .not]
+ )
(def: #export (satisfies p)
{#.doc "Only lex characters that satisfy a predicate."}
@@ -215,33 +263,64 @@
=right right]
(wrap ($_ text/compose =left =right))))
-(do-template [<name> <base> <doc>]
- [(def: #export (<name> p)
- {#.doc <doc>}
+(def: #export (and! left right)
+ (-> (Lexer Slice) (Lexer Slice) (Lexer Slice))
+ (do p.Monad<Parser>
+ [[left::basis left::distance] left
+ [right::basis right::distance] right]
+ (wrap [left::basis (n/+ right::distance right::distance)])))
+
+(do-template [<name> <base> <doc-modifier>]
+ [(def: #export (<name> lexer)
+ {#.doc (code.text ($_ text/compose "Lex " <doc-modifier> " characters as a single continuous text."))}
(-> (Lexer Text) (Lexer Text))
- (|> p <base> (:: p.Monad<Parser> map text.concat)))]
+ (|> lexer <base> (:: p.Monad<Parser> map text.concat)))]
- [some p.some "Lex some characters as a single continuous text."]
- [many p.many "Lex many characters as a single continuous text."]
+ [some p.some "some"]
+ [many p.many "many"]
)
-(do-template [<name> <base> <doc>]
- [(def: #export (<name> n p)
- {#.doc <doc>}
+(do-template [<name> <base> <doc-modifier>]
+ [(def: #export (<name> lexer)
+ {#.doc (code.text ($_ text/compose "Lex " <doc-modifier> " characters as a single continuous text."))}
+ (-> (Lexer Slice) (Lexer Slice))
+ (with-slices (<base> lexer)))]
+
+ [some! p.some "some"]
+ [many! p.many "many"]
+ )
+
+(do-template [<name> <base> <doc-modifier>]
+ [(def: #export (<name> amount lexer)
+ {#.doc (code.text ($_ text/compose "Lex " <doc-modifier> " N characters."))}
(-> Nat (Lexer Text) (Lexer Text))
- (do p.Monad<Parser>
- []
- (|> p (<base> n) (:: @ map text.concat))))]
+ (|> lexer (<base> amount) (:: p.Monad<Parser> map text.concat)))]
+
+ [exactly p.exactly "exactly"]
+ [at-most p.at-most "at most"]
+ [at-least p.at-least "at least"]
+ )
- [exactly p.exactly "Lex exactly N characters."]
- [at-most p.at-most "Lex at most N characters."]
- [at-least p.at-least "Lex at least N characters."]
+(do-template [<name> <base> <doc-modifier>]
+ [(def: #export (<name> amount lexer)
+ {#.doc (code.text ($_ text/compose "Lex " <doc-modifier> " N characters."))}
+ (-> Nat (Lexer Slice) (Lexer Slice))
+ (with-slices (<base> amount lexer)))]
+
+ [exactly! p.exactly "exactly"]
+ [at-most! p.at-most "at most"]
+ [at-least! p.at-least "at least"]
)
-(def: #export (between from to p)
+(def: #export (between from to lexer)
{#.doc "Lex between N and M characters."}
(-> Nat Nat (Lexer Text) (Lexer Text))
- (|> p (p.between from to) (:: p.Monad<Parser> map text.concat)))
+ (|> lexer (p.between from to) (:: p.Monad<Parser> map text.concat)))
+
+(def: #export (between! from to lexer)
+ {#.doc "Lex between N and M characters."}
+ (-> Nat Nat (Lexer Slice) (Lexer Slice))
+ (with-slices (p.between from to lexer)))
(def: #export (enclosed [start end] lexer)
(All [a] (-> [Text Text] (Lexer a) (Lexer a)))
@@ -259,3 +338,15 @@
(#e.Success value)
(#e.Success [real-input value]))))
+
+(def: #export (slice lexer)
+ (-> (Lexer Slice) (Lexer Text))
+ (do p.Monad<Parser>
+ [[basis distance] lexer]
+ (function (_ (^@ input [offset tape]))
+ (case (text.clip basis (n/+ basis distance) tape)
+ (#.Some output)
+ (#e.Success [input output])
+
+ #.None
+ (#e.Error "Cannot slice.")))))