aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEduardo Julian2018-08-21 21:42:49 -0400
committerEduardo Julian2018-08-21 21:42:49 -0400
commit1bcf5f7a124a1f8b3aa8c994edf2ec824799ab2f (patch)
treef9941d741176713fb522cb55531e05c01fef624a
parent2d430f16e801b2589f7bfdfae943ccbd8ea90b5c (diff)
Low-level re-implementation of the parser.
-rw-r--r--stdlib/source/lux/compiler/default/syntax.lux320
-rw-r--r--stdlib/source/lux/data/text/lexer.lux2
2 files changed, 264 insertions, 58 deletions
diff --git a/stdlib/source/lux/compiler/default/syntax.lux b/stdlib/source/lux/compiler/default/syntax.lux
index b7b2d06d8..4d778136f 100644
--- a/stdlib/source/lux/compiler/default/syntax.lux
+++ b/stdlib/source/lux/compiler/default/syntax.lux
@@ -31,14 +31,15 @@
["p" parser ("parser/." Monad<Parser>)]
["ex" exception (#+ exception:)]]
[data
- ["e" error]
+ ["." error (#+ Error)]
["." number]
["." product]
["." maybe]
["." text
- ["l" lexer (#+ Lexer)]
+ ["l" lexer (#+ Offset Lexer)]
format]
[collection
+ ["." list]
["." dictionary (#+ Dictionary)]]]
["." function]
["." io]
@@ -84,7 +85,8 @@
(def: digits "0123456789")
(def: digits+ (format "_" ..digits))
-(def: white-space Text "\t\v \r\f")
+(def: white-space " ")
+(def: carriage-return "\r")
(def: new-line "\n")
(def: new-line^ (l.this new-line))
@@ -104,6 +106,8 @@
(def: sigil "#")
+(def: digit-separator "_")
+
(def: single-line-comment-marker (format ..sigil ..sigil))
## This is the parser for white-space.
@@ -285,17 +289,17 @@
number.Codec<Text,Rev>]
)
-(def: #export (nat where)
- Syntax
- (do p.Monad<Parser>
- [chunk rich-digits^]
- (case (:: number.Codec<Text,Nat> decode chunk)
- (#.Left error)
- (p.fail error)
+## (def: #export (nat where)
+## Syntax
+## (do p.Monad<Parser>
+## [chunk rich-digits^]
+## (case (:: number.Codec<Text,Nat> decode chunk)
+## (#.Left error)
+## (p.fail error)
- (#.Right value)
- (wrap [(update@ #.column (n/+ (text.size chunk)) where)
- [where (#.Nat value)]]))))
+## (#.Right value)
+## (wrap [(update@ #.column (n/+ (text.size chunk)) where)
+## [where (#.Nat value)]]))))
(def: #export (frac where)
Syntax
@@ -352,7 +356,7 @@
## the text's body's column,
## to ensure they are aligned.
(do @
- [_ (p.exactly offset (l.this " "))]
+ [_ (p.exactly offset (l.this ..white-space))]
(recur text-read
(update@ #.column (n/+ offset) where)
#0))
@@ -420,18 +424,15 @@
(wrap [(update@ #.column inc where')
#.Nil])))))))))
-## Form and tuple syntax is mostly the same, differing only in the
-## delimiters involved.
-## They may have an arbitrary number of arbitrary Code nodes as elements.
-(do-template [<name> <tag> <open> <close>]
- [(def: (<name> ast where)
- (-> Syntax Syntax)
- (<| (parser/map (product.both id (|>> <tag> [where])))
- (composite <open> <close> ast where)))]
-
- [form #.Form ..open-form ..close-form]
- [tuple #.Tuple ..open-tuple ..close-tuple]
- )
+## (do-template [<name> <tag> <open> <close>]
+## [(def: (<name> ast where)
+## (-> Syntax Syntax)
+## (<| (parser/map (product.both id (|>> <tag> [where])))
+## (composite <open> <close> ast where)))]
+
+## [form #.Form ..open-form ..close-form]
+## [tuple #.Tuple ..open-tuple ..close-tuple]
+## )
## Records are almost (syntactically) the same as forms and tuples,
## with the exception that their elements must come in pairs (as in
@@ -460,7 +461,7 @@
## mark], and the short [after the mark]).
## There are also some extra rules regarding name syntax,
## encoded on the parser.
-(def: name-separator Text ".")
+(def: name-separator ".")
## A Lux name is a pair of chunks of text, where the first-part
## refers to the module that gives context to the name, and the
@@ -550,21 +551,21 @@
[identifier (|>) #.Identifier length]
)
-(do-template [<name> <value>]
- [(def: <name>
- (Lexer Bit)
- (parser/map (function.constant <value>) (l.this (%b <value>))))]
+## (do-template [<name> <value>]
+## [(def: <name>
+## (Lexer Bit)
+## (parser/map (function.constant <value>) (l.this (%b <value>))))]
- [false #0]
- [true #1]
- )
+## [false #0]
+## [true #1]
+## )
-(def: #export (bit where)
- Syntax
- (do p.Monad<Parser>
- [value (p.either ..false ..true)]
- (wrap [(update@ #.column (n/+ 2) where)
- [where (#.Bit value)]])))
+## (def: #export (bit where)
+## Syntax
+## (do p.Monad<Parser>
+## [value (p.either ..false ..true)]
+## (wrap [(update@ #.column (n/+ 2) where)
+## [where (#.Bit value)]])))
(exception: #export (end-of-file {module Text})
(ex.report ["Module" (%t module)]))
@@ -580,10 +581,10 @@
(do p.Monad<Parser>
[where (left-padding^ where)]
($_ p.either
- (<| (..timed "bit")
- (..bit where))
- (<| (..timed "nat")
- (..nat where))
+ ## (<| (..timed "bit")
+ ## (..bit where))
+ ## (<| (..timed "nat")
+ ## (..nat where))
(<| (..timed "frac")
(..frac where))
(<| (..timed "rev")
@@ -592,14 +593,14 @@
(..int where))
(<| (..timed "text")
(..text where))
- (<| (..timed "identifier")
- (..identifier current-module aliases where))
+ ## (<| (..timed "identifier")
+ ## (..identifier current-module aliases where))
(<| (..timed "tag")
(..tag current-module aliases where))
- (<| (..timed "form")
- (..form ast' where))
- (<| (..timed "tuple")
- (..tuple ast' where))
+ ## (<| (..timed "form")
+ ## (..form ast' where))
+ ## (<| (..timed "tuple")
+ ## (..tuple ast' where))
(<| (..timed "record")
(..record ast' where))
(do @
@@ -609,11 +610,216 @@
(p.fail (ex.construct unrecognized-input where))))
))))
-(def: #export (read current-module aliases [where offset source-code])
- (-> Text Aliases Source (e.Error [Source Code]))
- (case (p.run [offset source-code] (ast current-module aliases where))
- (#e.Error error)
- (#e.Error error)
+(type: Simple
+ (-> Source (Error [Source Code])))
+
+(type: Reader
+ (-> Text Aliases Simple))
+
+(do-template [<name> <extension>]
+ [(template: (<name> value)
+ (<extension> value 1))]
+
+ [inc! "lux i64 +"]
+ [dec! "lux i64 -"]
+ )
+
+(do-template [<name> <close> <tag>]
+ [(def: (<name> read-code source)
+ (-> Simple Simple)
+ (loop [source source
+ stack (: (List Code) #.Nil)]
+ (case (read-code source)
+ (#error.Success [source' top])
+ (recur source' (#.Cons top stack))
+
+ (#error.Error error)
+ (let [[where offset source-code] source]
+ (case ("lux text char" source-code offset)
+ (#.Some char)
+ (`` (case char
+ (^ (char (~~ (static <close>))))
+ (#error.Success [[(update@ #.column inc where)
+ (inc! offset)
+ source-code]
+ [where (<tag> (list.reverse stack))]])
+
+ _
+ (ex.throw unrecognized-input where)))
+
+ _
+ (#error.Error error))))))]
+
+ ## Form and tuple syntax is mostly the same, differing only in the
+ ## delimiters involved.
+ ## They may have an arbitrary number of arbitrary Code nodes as elements.
+ [read-form ..close-form #.Form]
+ [read-tuple ..close-tuple #.Tuple]
+ )
- (#e.Success [[offset' remaining] [where' output]])
- (#e.Success [[where' offset' remaining] output])))
+(template: (clip! from to text)
+ ## TODO: Optimize away "maybe.assume"
+ (maybe.assume ("lux text clip" text from to)))
+
+(def: (read-text [where offset source-code])
+ Simple
+ (case ("lux text index" source-code (static ..text-delimiter) offset)
+ (#.Some end)
+ (#error.Success [[(update@ #.column (n/+ ("lux i64 -" end offset)) where)
+ (inc! end)
+ source-code]
+ [where
+ (#.Text (clip! offset end source-code))]])
+
+ _
+ (ex.throw unrecognized-input where)))
+
+(def: digit-bottom Nat (dec! (char "0")))
+(def: digit-top Nat (inc! (char "9")))
+
+(template: (digit? char)
+ (and ("lux int <" (:coerce Int (static ..digit-bottom)) (:coerce Int char))
+ ("lux int <" (:coerce Int char) (:coerce Int (static ..digit-top)))))
+
+(`` (template: (digit?+ char)
+ (or (digit? char)
+ ("lux i64 =" (.char (~~ (static ..digit-separator))) char))))
+
+(`` (template: (name-char? char)
+ (not (or ("lux i64 =" (.char (~~ (static ..white-space))) char)
+ ("lux i64 =" (.char (~~ (static ..new-line))) char)
+
+ ("lux i64 =" (.char (~~ (static ..name-separator))) char)
+
+ ("lux i64 =" (.char (~~ (static ..open-form))) char)
+ ("lux i64 =" (.char (~~ (static ..close-form))) char)
+
+ ("lux i64 =" (.char (~~ (static ..open-tuple))) char)
+ ("lux i64 =" (.char (~~ (static ..close-tuple))) char)
+
+ ("lux i64 =" (.char (~~ (static ..open-record))) char)
+ ("lux i64 =" (.char (~~ (static ..close-record))) char)
+
+ ("lux i64 =" (.char (~~ (static ..text-delimiter))) char)
+ ("lux i64 =" (.char (~~ (static ..sigil))) char)))))
+
+(template: (name-char?+ char)
+ (or (name-char? char)
+ (digit? char)))
+
+(with-expansions [<output> (case (:: number.Codec<Text,Nat> decode (clip! start end source-code))
+ (#error.Success output)
+ (#error.Success [[(update@ #.column (n/+ ("lux i64 -" end start)) where)
+ end
+ source-code]
+ [where (#.Nat output)]])
+
+ (#error.Error error)
+ (#error.Error error))]
+ (def: (read-nat start [where offset source-code])
+ (-> Offset Simple)
+ (loop [end offset]
+ (case ("lux text char" source-code end)
+ (#.Some char)
+ (if (digit?+ char)
+ (recur (inc! end))
+ <output>)
+
+ _
+ <output>))))
+
+(with-expansions [<output> (#error.Success [[(update@ #.column (n/+ ("lux i64 -" end start)) where)
+ end
+ source-code]
+ [where (#.Identifier ["" (clip! start end source-code)])]])]
+ (def: (read-name start [where offset source-code])
+ (-> Offset Simple)
+ (loop [end offset]
+ (case ("lux text char" source-code end)
+ (#.Some char)
+ (cond (name-char?+ char)
+ (recur (inc! end))
+
+ ## else
+ <output>)
+
+ _
+ <output>))))
+
+(template: (leap-bit! value)
+ ("lux i64 +" value 2))
+
+(with-expansions [<consume-1> (as-is [where (inc! offset) source-code])]
+ (def: (read-code current-module aliases source)
+ Reader
+ (let [read-code' (read-code current-module aliases)]
+ (loop [[where offset source-code] source]
+ (case ("lux text char" source-code offset)
+ (#.Some char)
+ (`` (case char
+ (^template [<char> <direction>]
+ (^ (char <char>))
+ (recur [(update@ <direction> inc where)
+ (inc! offset)
+ source-code]))
+ ([(~~ (static ..white-space)) #.column]
+ [(~~ (static ..carriage-return)) #.column])
+
+ (^ (char (~~ (static ..new-line))))
+ (let [[where::file where::line where::column] where]
+ (recur [[where::file (inc! where::line) 0]
+ (inc! offset)
+ source-code]))
+
+ (^ (char (~~ (static ..open-form))))
+ (read-form read-code' <consume-1>)
+
+ (^ (char (~~ (static ..open-tuple))))
+ (read-tuple read-code' <consume-1>)
+
+ (^ (char (~~ (static ..text-delimiter))))
+ (read-text <consume-1>)
+
+ (^ (char (~~ (static ..sigil))))
+ (case ("lux text char" source-code (inc! offset))
+ (#.Some next)
+ (case next
+ (^template [<char> <bit>]
+ (^ (char <char>))
+ (#error.Success [[(update@ #.column (|>> leap-bit!) where)
+ (leap-bit! offset)
+ source-code]
+ [where (#.Bit <bit>)]]))
+ (["0" #0]
+ ["1" #1])
+
+ _
+ (ex.throw unrecognized-input where))
+
+ _
+ (ex.throw end-of-file current-module))
+
+ _
+ (cond (digit? char)
+ (read-nat offset <consume-1>)
+
+ (name-char? char)
+ (read-name offset <consume-1>)
+
+ ## else
+ (ex.throw unrecognized-input where))))
+
+ _
+ (ex.throw end-of-file current-module))))))
+
+## [where offset source-code]
+(def: #export read Reader read-code)
+
+## (def: #export (read current-module aliases source)
+## (-> Text Aliases Source (Error [Source Code]))
+## (case (p.run [offset source-code] (ast current-module aliases where))
+## (#error.Error error)
+## (#error.Error error)
+
+## (#error.Success [[offset' remaining] [where' output]])
+## (#error.Success [[where' offset' remaining] output])))
diff --git a/stdlib/source/lux/data/text/lexer.lux b/stdlib/source/lux/data/text/lexer.lux
index e6186aea8..21aba8360 100644
--- a/stdlib/source/lux/data/text/lexer.lux
+++ b/stdlib/source/lux/data/text/lexer.lux
@@ -13,7 +13,7 @@
["." code]]]
["." // ("text/." Monoid<Text>)])
-(type: Offset Nat)
+(type: #export Offset Nat)
(def: start-offset Offset 0)