aboutsummaryrefslogtreecommitdiff
path: root/stdlib/source/lux/tool
diff options
context:
space:
mode:
authorEduardo Julian2020-12-17 22:03:54 -0400
committerEduardo Julian2020-12-17 22:03:54 -0400
commit68b1dd82f23d6648ac3d9075a8f84b0174392945 (patch)
tree2db148a005c21552947d96dfd4e788ba21705037 /stdlib/source/lux/tool
parentabc5c5293603229b447b8b5dfa7f3275571ad982 (diff)
More optimizations to the Lux syntax parser.
Diffstat (limited to '')
-rw-r--r--stdlib/source/lux/tool/compiler/language/lux/syntax.lux469
1 files changed, 246 insertions, 223 deletions
diff --git a/stdlib/source/lux/tool/compiler/language/lux/syntax.lux b/stdlib/source/lux/tool/compiler/language/lux/syntax.lux
index 2b5cfd4a8..1916cfe15 100644
--- a/stdlib/source/lux/tool/compiler/language/lux/syntax.lux
+++ b/stdlib/source/lux/tool/compiler/language/lux/syntax.lux
@@ -90,39 +90,50 @@
[!n/- "lux i64 -"]
)
-(type: #export Aliases (Dictionary Text Text))
-(def: #export no-aliases Aliases (dictionary.new text.hash))
+(type: #export Aliases
+ (Dictionary Text Text))
+
+(def: #export no-aliases
+ Aliases
+ (dictionary.new text.hash))
(def: #export prelude "lux")
(def: #export text-delimiter text.double-quote)
-(def: #export open-form "(")
-(def: #export close-form ")")
+(template [<char> <definition>]
+ [(def: #export <definition> <char>)]
+
+ ## Form delimiters
+ ["(" open-form]
+ [")" close-form]
-(def: #export open-tuple "[")
-(def: #export close-tuple "]")
+ ## Tuple delimiters
+ ["[" open-tuple]
+ ["]" close-tuple]
-(def: #export open-record "{")
-(def: #export close-record "}")
+ ## Record delimiters
+ ["{" open-record]
+ ["}" close-record]
-(def: #export sigil "#")
+ ["#" sigil]
-(def: #export digit-separator ",")
+ ["," digit-separator]
-(def: #export positive-sign "+")
-(def: #export negative-sign "-")
+ ["+" positive-sign]
+ ["-" negative-sign]
-(def: #export frac-separator ".")
+ ["." frac-separator]
-## The parts of a name are separated by a single mark.
-## E.g. module.short.
-## Only one such mark may be used in an name, since there
-## can only be 2 parts to a name (the module [before the
-## mark], and the short [after the mark]).
-## There are also some extra rules regarding name syntax,
-## encoded in the parser.
-(def: #export name-separator ".")
+ ## The parts of a name are separated by a single mark.
+ ## E.g. module.short.
+ ## Only one such mark may be used in an name, since there
+ ## can only be 2 parts to a name (the module [before the
+ ## mark], and the short [after the mark]).
+ ## There are also some extra rules regarding name syntax,
+ ## encoded in the parser.
+ ["." name-separator]
+ )
(exception: #export (end-of-file {module Text})
(exception.report
@@ -130,8 +141,8 @@
(def: amount-of-input-shown 64)
-(def: (input-at start input)
- (-> Offset Text Text)
+(template: (input-at start input)
+ ## (-> Offset Text Text)
(let [end (|> start (!n/+ amount-of-input-shown) (n.min ("lux text size" input)))]
(!clip start end input)))
@@ -197,48 +208,42 @@
(!inc offset)
source-code])
-(def: close-signal
- (template.with-locals [g!close-signal]
- (template.text [g!close-signal])))
-
-(template [<name> <close> <tag> <context>]
- [(def: (<name> parse source)
- (-> (Parser Code) (Parser Code))
- (let [[where offset source-code] source]
- (loop [source (: Source [(!forward 1 where) offset source-code])
- stack (: (List Code) #.Nil)]
- (case (parse source)
- (#.Right [source' top])
- (recur source' (#.Cons top stack))
-
- (#.Left [source' error])
- (if (is? <close> error)
- (#.Right [source'
- [where (<tag> (list.reverse stack))]])
- (#.Left [source' error]))))))]
+(template [<name> <close> <tag>]
+ [(template: (<name> parse where offset source-code)
+ ## (-> (Parser Code) (Parser Code))
+ (loop [source (: Source [(!forward 1 where) offset source-code])
+ stack (: (List Code) #.Nil)]
+ (case (parse source)
+ (#.Right [source' top])
+ (recur source' (#.Cons top stack))
+
+ (#.Left [source' error])
+ (if (is? <close> error)
+ (#.Right [source'
+ [where (<tag> (list.reverse stack))]])
+ (#.Left [source' error])))))]
## Form and tuple syntax is mostly the same, differing only in the
## delimiters involved.
## They may have an arbitrary number of arbitrary Code nodes as elements.
- [parse-form ..close-form #.Form "Form"]
- [parse-tuple ..close-tuple #.Tuple "Tuple"]
+ [parse-form ..close-form #.Form]
+ [parse-tuple ..close-tuple #.Tuple]
)
-(def: (parse-record parse source)
- (-> (Parser Code) (Parser Code))
- (let [[where offset source-code] source]
- (loop [source (: Source [(!forward 1 where) offset source-code])
- stack (: (List [Code Code]) #.Nil)]
- (case (parse source)
- (#.Right [sourceF field])
- (!letE [sourceFV value] (parse sourceF)
- (recur sourceFV (#.Cons [field value] stack)))
-
- (#.Left [source' error])
- (if (is? ..close-record error)
- (#.Right [source'
- [where (#.Record (list.reverse stack))]])
- (#.Left [source' error]))))))
+(template: (parse-record parse where offset source-code)
+ ## (-> (Parser Code) (Parser Code))
+ (loop [source (: Source [(!forward 1 where) offset source-code])
+ stack (: (List [Code Code]) #.Nil)]
+ (case (parse source)
+ (#.Right [sourceF field])
+ (!letE [sourceFV value] (parse sourceF)
+ (recur sourceFV (#.Cons [field value] stack)))
+
+ (#.Left [source' error])
+ (if (is? ..close-record error)
+ (#.Right [source'
+ [where (#.Record (list.reverse stack))]])
+ (#.Left [source' error])))))
(template: (!guarantee-no-new-lines where offset source-code content body)
(case ("lux text index" 0 (static text.new-line) content)
@@ -253,185 +258,202 @@
(-> Location Nat Text (Either [Source Text] [Source Code]))
(case ("lux text index" offset (static ..text-delimiter) source-code)
(#.Some g!end)
- (let [g!content (!clip offset g!end source-code)]
- (<| (!guarantee-no-new-lines where offset source-code g!content)
- (#.Right [[(let [size (!n/- offset g!end)]
- (update@ #.column (|>> (!n/+ size) (!n/+ 2)) where))
- (!inc g!end)
- source-code]
- [where
- (#.Text g!content)]])))
+ (<| (let [g!content (!clip offset g!end source-code)])
+ (!guarantee-no-new-lines where offset source-code g!content)
+ (#.Right [[(let [size (!n/- offset g!end)]
+ (update@ #.column (|>> (!n/+ size) (!n/+ 2)) where))
+ (!inc g!end)
+ source-code]
+ [where
+ (#.Text g!content)]]))
_
(!failure ..parse-text where offset source-code)))
-(def: digit-bottom Nat (!dec (char "0")))
-(def: digit-top Nat (!inc (char "9")))
-
-(template: (!digit? char)
- (and (!i/< (:coerce Int char) (:coerce Int (static ..digit-bottom)))
- (!i/< (:coerce Int (static ..digit-top)) (:coerce Int char))))
-
-(`` (template: (!digit?+ char)
- (or (!digit? char)
- ("lux i64 =" (.char (~~ (static ..digit-separator))) char))))
-
-(with-expansions [<non-name-chars> (template [<char>]
+(with-expansions [<digits> (as-is "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")
+ <non-name-chars> (template [<char>]
[(~~ (static <char>))]
[text.space]
- [text.new-line]
+ [text.new-line] [text.carriage-return]
[..name-separator]
[..open-form] [..close-form]
[..open-tuple] [..close-tuple]
[..open-record] [..close-record]
[..text-delimiter]
- [..sigil])]
- (`` (template: (!strict-name-char? char)
- ("lux syntax char case!" char
+ [..sigil])
+ <digit-separator> (static ..digit-separator)]
+ (template: (!if-digit? @char @then @else)
+ ("lux syntax char case!" @char
+ [[<digits>]
+ @then]
+
+ ## else
+ @else))
+
+ (template: (!if-digit?+ @char @then @else-options @else)
+ (`` ("lux syntax char case!" @char
+ [[<digits> <digit-separator>]
+ @then
+
+ (~~ (template.splice @else-options))]
+
+ ## else
+ @else)))
+
+ (`` (template: (!if-name-char?|tail @char @then @else)
+ ("lux syntax char case!" @char
[[<non-name-chars>]
- #0]
+ @else]
## else
- #1))))
+ @then)))
-(template: (!name-char?|head char)
- (and (!strict-name-char? char)
- (not (!digit? char))))
+ (`` (template: (!if-name-char?|head @char @then @else)
+ ("lux syntax char case!" @char
+ [[<non-name-chars> <digits>]
+ @else]
-(template: (!name-char? char)
- (!strict-name-char? char))
+ ## else
+ @then)))
+ )
-(template: (!number-output <start> <end> <codec> <tag>)
- (case (|> source-code
+(template: (!number-output <source-code> <start> <end> <codec> <tag>)
+ (case (|> <source-code>
(!clip <start> <end>)
(text.replace-all ..digit-separator "")
(\ <codec> decode))
(#.Right output)
- (#.Right [[(update@ #.column (|>> (!n/+ (!n/- <start> <end>))) where)
+ (#.Right [[(let [[where::file where::line where::column] where]
+ [where::file where::line (!n/+ (!n/- <start> <end>) where::column)])
<end>
- source-code]
+ <source-code>]
[where (<tag> output)]])
(#.Left error)
- (#.Left [[where <start> source-code]
+ (#.Left [[where <start> <source-code>]
error])))
(def: no-exponent Offset 0)
-(with-expansions [<int-output> (as-is (!number-output start end int.decimal #.Int))
- <frac-output> (as-is (!number-output start end frac.decimal #.Frac))
- <failure> (!failure ..parse-frac where offset source-code)]
- (def: (parse-frac source-code//size start [where offset source-code])
- (-> Nat Offset (Parser Code))
+(with-expansions [<int-output> (as-is (!number-output source-code start end int.decimal #.Int))
+ <frac-output> (as-is (!number-output source-code start end frac.decimal #.Frac))
+ <failure> (!failure ..parse-frac where offset source-code)
+ <frac-separator> (static ..frac-separator)
+ <signs> (template [<sign>]
+ [(~~ (static <sign>))]
+
+ [..positive-sign]
+ [..negative-sign])]
+ (template: (parse-frac source-code//size start where offset source-code)
+ ## (-> Nat Offset (Parser Code))
(loop [end offset
- exponent ..no-exponent]
+ exponent (static ..no-exponent)]
(<| (!with-char+ source-code//size source-code end char/0 <frac-output>)
- (cond (!digit?+ char/0)
- (recur (!inc end) exponent)
-
- (and (or (!n/= (char "e") char/0)
- (!n/= (char "E") char/0))
- (is? ..no-exponent exponent))
- (<| (!with-char+ source-code//size source-code (!inc end) char/1 <failure>)
- (if (or (!n/= (`` (char (~~ (static ..positive-sign)))) char/1)
- (!n/= (`` (char (~~ (static ..negative-sign)))) char/1))
- (<| (!with-char+ source-code//size source-code (!n/+ 2 end) char/2 <failure>)
- (if (!digit?+ char/2)
- (recur (!n/+ 3 end) char/0)
- <failure>))
- <failure>))
-
- ## else
- <frac-output>))))
-
- (def: (parse-signed start [where offset source-code])
- (-> Offset (Parser Code))
- (let [source-code//size ("lux text size" source-code)]
- (loop [end offset]
- (<| (!with-char+ source-code//size source-code end char <int-output>)
- (cond (!digit?+ char)
- (recur (!inc end))
-
- (!n/= (`` (.char (~~ (static ..frac-separator))))
- char)
- (parse-frac source-code//size start [where (!inc end) source-code])
-
- ## else
- <int-output>))))))
+ (!if-digit?+ char/0
+ (recur (!inc end) exponent)
+
+ [["e" "E"]
+ (if (is? (static ..no-exponent) exponent)
+ (<| (!with-char+ source-code//size source-code (!inc end) char/1 <failure>)
+ (`` ("lux syntax char case!" char/1
+ [[<signs>]
+ (<| (!with-char+ source-code//size source-code (!n/+ 2 end) char/2 <failure>)
+ (!if-digit?+ char/2
+ (recur (!n/+ 3 end) char/0)
+ []
+ <failure>))]
+ ## else
+ <failure>)))
+ <frac-output>)]
+
+ <frac-output>))))
+
+ (template: (parse-signed source-code//size start where offset source-code)
+ ## (-> Nat Offset (Parser Code))
+ (loop [end offset]
+ (<| (!with-char+ source-code//size source-code end char <int-output>)
+ (!if-digit?+ char
+ (recur (!inc end))
+
+ [[<frac-separator>]
+ (parse-frac source-code//size start where (!inc end) source-code)]
+
+ <int-output>))))
+ )
(template [<parser> <codec> <tag>]
- [(def: (<parser> source-code//size start where offset source-code)
- (-> Nat Nat Location Nat Text (Either [Source Text] [Source Code]))
+ [(template: (<parser> source-code//size start where offset source-code)
+ ## (-> Nat Nat Location Nat Text (Either [Source Text] [Source Code]))
(loop [g!end offset]
- (<| (!with-char+ source-code//size source-code g!end g!char (!number-output start g!end <codec> <tag>))
- (if (!digit?+ g!char)
- (recur (!inc g!end))
- (!number-output start g!end <codec> <tag>)))))]
+ (<| (!with-char+ source-code//size source-code g!end g!char (!number-output source-code start g!end <codec> <tag>))
+ (!if-digit?+ g!char
+ (recur (!inc g!end))
+ []
+ (!number-output source-code start g!end <codec> <tag>)))))]
[parse-nat n.decimal #.Nat]
[parse-rev rev.decimal #.Rev]
)
(template: (!parse-signed source-code//size offset where source-code @aliases @end)
- (let [g!offset/1 (!inc offset)]
- (<| (!with-char+ source-code//size source-code g!offset/1 g!char/1 @end)
- (if (!digit? g!char/1)
- (parse-signed offset [where (!inc/2 offset) source-code])
- (!parse-full-name offset [where (!inc offset) source-code] where @aliases #.Identifier)))))
+ (<| (let [g!offset/1 (!inc offset)])
+ (!with-char+ source-code//size source-code g!offset/1 g!char/1 @end)
+ (!if-digit? g!char/1
+ (parse-signed source-code//size offset where (!inc/2 offset) source-code)
+ (!parse-full-name offset [where (!inc offset) source-code] where @aliases #.Identifier))))
(with-expansions [<output> (#.Right [[(update@ #.column (|>> (!n/+ (!n/- start end))) where)
end
source-code]
(!clip start end source-code)])]
- (def: (parse-name-part start [where offset source-code])
- (-> Offset (Parser Text))
+ (template: (parse-name-part start where offset source-code)
+ ## (-> Offset (Parser Text))
(let [source-code//size ("lux text size" source-code)]
(loop [end offset]
(<| (!with-char+ source-code//size source-code end char <output>)
- (if (!name-char? char)
- (recur (!inc end))
- <output>))))))
+ (!if-name-char?|tail char
+ (recur (!inc end))
+ <output>))))))
(template: (!parse-half-name @offset @char @module)
- (cond (!name-char?|head @char)
- (!letE [source' name] (..parse-name-part @offset [where (!inc @offset) source-code])
- (#.Right [source' [@module name]]))
-
- ## else
- (!failure ..!parse-half-name where @offset source-code)))
-
-(`` (def: (parse-short-name current-module [where offset/0 source-code])
- (-> Text (Parser Name))
- (<| (!with-char source-code offset/0 char/0
- (!end-of-file where offset/0 source-code current-module))
+ (!if-name-char?|head @char
+ (!letE [source' name] (..parse-name-part @offset where (!inc @offset) source-code)
+ (#.Right [source' [@module name]]))
+ (!failure ..!parse-half-name where @offset source-code)))
+
+(`` (def: (parse-short-name source-code//size current-module [where offset/0 source-code])
+ (-> Nat Text (Parser Name))
+ (<| (!with-char+ source-code//size source-code offset/0 char/0
+ (!end-of-file where offset/0 source-code current-module))
(if (!n/= (char (~~ (static ..name-separator))) char/0)
- (let [offset/1 (!inc offset/0)]
- (<| (!with-char source-code offset/1 char/1
- (!end-of-file where offset/1 source-code current-module))
- (!parse-half-name offset/1 char/1 current-module)))
- (!parse-half-name offset/0 char/0 ..prelude)))))
-
-(template: (!parse-short-name @current-module @source @where @tag)
- (!letE [source' name] (..parse-short-name @current-module @source)
+ (<| (let [offset/1 (!inc offset/0)])
+ (!with-char+ source-code//size source-code offset/1 char/1
+ (!end-of-file where offset/1 source-code current-module))
+ (!parse-half-name offset/1 char/1 current-module))
+ (!parse-half-name offset/0 char/0 (static ..prelude))))))
+
+(template: (!parse-short-name source-code//size @current-module @source @where @tag)
+ (!letE [source' name] (..parse-short-name source-code//size @current-module @source)
(#.Right [source' [@where (@tag name)]])))
(with-expansions [<simple> (as-is (#.Right [source' ["" simple]]))]
(`` (def: (parse-full-name aliases start source)
(-> Aliases Offset (Parser Name))
- (<| (!letE [source' simple] (..parse-name-part start source))
+ (<| (!letE [source' simple] (let [[where offset source-code] source]
+ (..parse-name-part start where offset source-code)))
(let [[where' offset' source-code'] source'])
(!with-char source-code' offset' char/separator <simple>)
(if (!n/= (char (~~ (static ..name-separator))) char/separator)
- (let [offset'' (!inc offset')]
- (!letE [source'' complex] (..parse-name-part offset'' [(!forward 1 where') offset'' source-code'])
- (if ("lux text =" "" complex)
- (let [[where offset source-code] source]
- (!failure ..parse-full-name where offset source-code))
- (#.Right [source'' [(|> aliases
- (dictionary.get simple)
- (maybe.default simple))
- complex]]))))
+ (<| (let [offset'' (!inc offset')])
+ (!letE [source'' complex] (..parse-name-part offset'' (!forward 1 where') offset'' source-code'))
+ (if ("lux text =" "" complex)
+ (let [[where offset source-code] source]
+ (!failure ..parse-full-name where offset source-code))
+ (#.Right [source'' [(|> aliases
+ (dictionary.get simple)
+ (maybe.default simple))
+ complex]])))
<simple>)))))
(template: (!parse-full-name @offset @source @where @aliases @tag)
@@ -443,7 +465,7 @@
## [expression ...]
## [form "(" [#* expression] ")"])
-(with-expansions [<consume-1> (as-is [where (!inc offset/0) source-code])
+(with-expansions [<consume-1> (as-is where (!inc offset/0) source-code)
<move-1> (as-is [(!forward 1 where) (!inc offset/0) source-code])
<move-2> (as-is [(!forward 1 where) (!inc/2 offset/0) source-code])
<recur> (as-is (parse current-module aliases source-code//size))
@@ -488,47 +510,48 @@
## Special code
[(~~ (static ..sigil))]
- (let [offset/1 (!inc offset/0)]
- (<| (!with-char+ source-code//size source-code offset/1 char/1
- (!end-of-file where offset/1 source-code current-module))
- ("lux syntax char case!" char/1
- [[(~~ (static ..name-separator))]
- (!parse-short-name current-module <move-2> where #.Tag)
-
- ## Single-line comment
- [(~~ (static ..sigil))]
- (case ("lux text index" (!inc offset/1) (static text.new-line) source-code)
- (#.Some end)
- (recur (!vertical where end source-code))
-
- _
- (!end-of-file where offset/1 source-code current-module))
-
- (~~ (template [<char> <bit>]
- [[<char>]
- (#.Right [[(update@ #.column (|>> !inc/2) where)
- (!inc offset/1)
- source-code]
- [where (#.Bit <bit>)]])]
-
- ["0" #0]
- ["1" #1]))]
-
- ## else
- (cond (!name-char?|head char/1) ## Tag
- (!parse-full-name offset/1 <move-2> where aliases #.Tag)
-
- ## else
- (!failure ..parse where offset/0 source-code)))))
+ (<| (let [offset/1 (!inc offset/0)])
+ (!with-char+ source-code//size source-code offset/1 char/1
+ (!end-of-file where offset/1 source-code current-module))
+ ("lux syntax char case!" char/1
+ [[(~~ (static ..name-separator))]
+ (!parse-short-name source-code//size current-module <move-2> where #.Tag)
+
+ ## Single-line comment
+ [(~~ (static ..sigil))]
+ (case ("lux text index" (!inc offset/1) (static text.new-line) source-code)
+ (#.Some end)
+ (recur (!vertical where end source-code))
+
+ _
+ (!end-of-file where offset/1 source-code current-module))
+
+ (~~ (template [<char> <bit>]
+ [[<char>]
+ (#.Right [[(update@ #.column (|>> !inc/2) where)
+ (!inc offset/1)
+ source-code]
+ [where (#.Bit <bit>)]])]
+
+ ["0" #0]
+ ["1" #1]))]
+
+ ## else
+ (!if-name-char?|head char/1
+ ## Tag
+ (!parse-full-name offset/1 <move-2> where aliases #.Tag)
+ (!failure ..parse where offset/0 source-code))))
## Coincidentally (= ..name-separator ..frac-separator)
- [(~~ (static ..name-separator))]
- (let [offset/1 (!inc offset/0)]
- (<| (!with-char+ source-code//size source-code offset/1 char/1
- (!end-of-file where offset/1 source-code current-module))
- (if (!digit? char/1)
- (parse-rev source-code//size offset/0 where (!inc offset/1) source-code)
- (!parse-short-name current-module [where offset/1 source-code] where #.Identifier))))
+ [(~~ (static ..name-separator))
+ ## (~~ (static ..frac-separator))
+ ]
+ (<| (let [offset/1 (!inc offset/0)])
+ (!with-char+ source-code//size source-code offset/1 char/1
+ (!end-of-file where offset/1 source-code current-module))
+ (!if-digit? char/1
+ (parse-rev source-code//size offset/0 where (!inc offset/1) source-code)
+ (!parse-short-name source-code//size current-module [where offset/1 source-code] where #.Identifier)))
[(~~ (static ..positive-sign))
(~~ (static ..negative-sign))]
@@ -536,11 +559,11 @@
(!end-of-file where offset/0 source-code current-module))]
## else
- (if (!digit? char/0)
- ## Natural number
- (parse-nat source-code//size offset/0 where (!inc offset/0) source-code)
- ## Identifier
- (!parse-full-name offset/0 <consume-1> where aliases #.Identifier))
+ (!if-digit? char/0
+ ## Natural number
+ (parse-nat source-code//size offset/0 where (!inc offset/0) source-code)
+ ## Identifier
+ (!parse-full-name offset/0 [<consume-1>] where aliases #.Identifier))
)))
)))
))