diff options
author | Eduardo Julian | 2020-12-17 22:03:54 -0400 |
---|---|---|
committer | Eduardo Julian | 2020-12-17 22:03:54 -0400 |
commit | 68b1dd82f23d6648ac3d9075a8f84b0174392945 (patch) | |
tree | 2db148a005c21552947d96dfd4e788ba21705037 /stdlib/source/lux/tool | |
parent | abc5c5293603229b447b8b5dfa7f3275571ad982 (diff) |
More optimizations to the Lux syntax parser.
Diffstat (limited to 'stdlib/source/lux/tool')
-rw-r--r-- | stdlib/source/lux/tool/compiler/language/lux/syntax.lux | 469 |
1 files changed, 246 insertions, 223 deletions
diff --git a/stdlib/source/lux/tool/compiler/language/lux/syntax.lux b/stdlib/source/lux/tool/compiler/language/lux/syntax.lux index 2b5cfd4a8..1916cfe15 100644 --- a/stdlib/source/lux/tool/compiler/language/lux/syntax.lux +++ b/stdlib/source/lux/tool/compiler/language/lux/syntax.lux @@ -90,39 +90,50 @@ [!n/- "lux i64 -"] ) -(type: #export Aliases (Dictionary Text Text)) -(def: #export no-aliases Aliases (dictionary.new text.hash)) +(type: #export Aliases + (Dictionary Text Text)) + +(def: #export no-aliases + Aliases + (dictionary.new text.hash)) (def: #export prelude "lux") (def: #export text-delimiter text.double-quote) -(def: #export open-form "(") -(def: #export close-form ")") +(template [<char> <definition>] + [(def: #export <definition> <char>)] + + ## Form delimiters + ["(" open-form] + [")" close-form] -(def: #export open-tuple "[") -(def: #export close-tuple "]") + ## Tuple delimiters + ["[" open-tuple] + ["]" close-tuple] -(def: #export open-record "{") -(def: #export close-record "}") + ## Record delimiters + ["{" open-record] + ["}" close-record] -(def: #export sigil "#") + ["#" sigil] -(def: #export digit-separator ",") + ["," digit-separator] -(def: #export positive-sign "+") -(def: #export negative-sign "-") + ["+" positive-sign] + ["-" negative-sign] -(def: #export frac-separator ".") + ["." frac-separator] -## The parts of a name are separated by a single mark. -## E.g. module.short. -## Only one such mark may be used in an name, since there -## can only be 2 parts to a name (the module [before the -## mark], and the short [after the mark]). -## There are also some extra rules regarding name syntax, -## encoded in the parser. -(def: #export name-separator ".") + ## The parts of a name are separated by a single mark. + ## E.g. module.short. + ## Only one such mark may be used in an name, since there + ## can only be 2 parts to a name (the module [before the + ## mark], and the short [after the mark]). + ## There are also some extra rules regarding name syntax, + ## encoded in the parser. + ["." name-separator] + ) (exception: #export (end-of-file {module Text}) (exception.report @@ -130,8 +141,8 @@ (def: amount-of-input-shown 64) -(def: (input-at start input) - (-> Offset Text Text) +(template: (input-at start input) + ## (-> Offset Text Text) (let [end (|> start (!n/+ amount-of-input-shown) (n.min ("lux text size" input)))] (!clip start end input))) @@ -197,48 +208,42 @@ (!inc offset) source-code]) -(def: close-signal - (template.with-locals [g!close-signal] - (template.text [g!close-signal]))) - -(template [<name> <close> <tag> <context>] - [(def: (<name> parse source) - (-> (Parser Code) (Parser Code)) - (let [[where offset source-code] source] - (loop [source (: Source [(!forward 1 where) offset source-code]) - stack (: (List Code) #.Nil)] - (case (parse source) - (#.Right [source' top]) - (recur source' (#.Cons top stack)) - - (#.Left [source' error]) - (if (is? <close> error) - (#.Right [source' - [where (<tag> (list.reverse stack))]]) - (#.Left [source' error]))))))] +(template [<name> <close> <tag>] + [(template: (<name> parse where offset source-code) + ## (-> (Parser Code) (Parser Code)) + (loop [source (: Source [(!forward 1 where) offset source-code]) + stack (: (List Code) #.Nil)] + (case (parse source) + (#.Right [source' top]) + (recur source' (#.Cons top stack)) + + (#.Left [source' error]) + (if (is? <close> error) + (#.Right [source' + [where (<tag> (list.reverse stack))]]) + (#.Left [source' error])))))] ## Form and tuple syntax is mostly the same, differing only in the ## delimiters involved. ## They may have an arbitrary number of arbitrary Code nodes as elements. - [parse-form ..close-form #.Form "Form"] - [parse-tuple ..close-tuple #.Tuple "Tuple"] + [parse-form ..close-form #.Form] + [parse-tuple ..close-tuple #.Tuple] ) -(def: (parse-record parse source) - (-> (Parser Code) (Parser Code)) - (let [[where offset source-code] source] - (loop [source (: Source [(!forward 1 where) offset source-code]) - stack (: (List [Code Code]) #.Nil)] - (case (parse source) - (#.Right [sourceF field]) - (!letE [sourceFV value] (parse sourceF) - (recur sourceFV (#.Cons [field value] stack))) - - (#.Left [source' error]) - (if (is? ..close-record error) - (#.Right [source' - [where (#.Record (list.reverse stack))]]) - (#.Left [source' error])))))) +(template: (parse-record parse where offset source-code) + ## (-> (Parser Code) (Parser Code)) + (loop [source (: Source [(!forward 1 where) offset source-code]) + stack (: (List [Code Code]) #.Nil)] + (case (parse source) + (#.Right [sourceF field]) + (!letE [sourceFV value] (parse sourceF) + (recur sourceFV (#.Cons [field value] stack))) + + (#.Left [source' error]) + (if (is? ..close-record error) + (#.Right [source' + [where (#.Record (list.reverse stack))]]) + (#.Left [source' error]))))) (template: (!guarantee-no-new-lines where offset source-code content body) (case ("lux text index" 0 (static text.new-line) content) @@ -253,185 +258,202 @@ (-> Location Nat Text (Either [Source Text] [Source Code])) (case ("lux text index" offset (static ..text-delimiter) source-code) (#.Some g!end) - (let [g!content (!clip offset g!end source-code)] - (<| (!guarantee-no-new-lines where offset source-code g!content) - (#.Right [[(let [size (!n/- offset g!end)] - (update@ #.column (|>> (!n/+ size) (!n/+ 2)) where)) - (!inc g!end) - source-code] - [where - (#.Text g!content)]]))) + (<| (let [g!content (!clip offset g!end source-code)]) + (!guarantee-no-new-lines where offset source-code g!content) + (#.Right [[(let [size (!n/- offset g!end)] + (update@ #.column (|>> (!n/+ size) (!n/+ 2)) where)) + (!inc g!end) + source-code] + [where + (#.Text g!content)]])) _ (!failure ..parse-text where offset source-code))) -(def: digit-bottom Nat (!dec (char "0"))) -(def: digit-top Nat (!inc (char "9"))) - -(template: (!digit? char) - (and (!i/< (:coerce Int char) (:coerce Int (static ..digit-bottom))) - (!i/< (:coerce Int (static ..digit-top)) (:coerce Int char)))) - -(`` (template: (!digit?+ char) - (or (!digit? char) - ("lux i64 =" (.char (~~ (static ..digit-separator))) char)))) - -(with-expansions [<non-name-chars> (template [<char>] +(with-expansions [<digits> (as-is "0" "1" "2" "3" "4" "5" "6" "7" "8" "9") + <non-name-chars> (template [<char>] [(~~ (static <char>))] [text.space] - [text.new-line] + [text.new-line] [text.carriage-return] [..name-separator] [..open-form] [..close-form] [..open-tuple] [..close-tuple] [..open-record] [..close-record] [..text-delimiter] - [..sigil])] - (`` (template: (!strict-name-char? char) - ("lux syntax char case!" char + [..sigil]) + <digit-separator> (static ..digit-separator)] + (template: (!if-digit? @char @then @else) + ("lux syntax char case!" @char + [[<digits>] + @then] + + ## else + @else)) + + (template: (!if-digit?+ @char @then @else-options @else) + (`` ("lux syntax char case!" @char + [[<digits> <digit-separator>] + @then + + (~~ (template.splice @else-options))] + + ## else + @else))) + + (`` (template: (!if-name-char?|tail @char @then @else) + ("lux syntax char case!" @char [[<non-name-chars>] - #0] + @else] ## else - #1)))) + @then))) -(template: (!name-char?|head char) - (and (!strict-name-char? char) - (not (!digit? char)))) + (`` (template: (!if-name-char?|head @char @then @else) + ("lux syntax char case!" @char + [[<non-name-chars> <digits>] + @else] -(template: (!name-char? char) - (!strict-name-char? char)) + ## else + @then))) + ) -(template: (!number-output <start> <end> <codec> <tag>) - (case (|> source-code +(template: (!number-output <source-code> <start> <end> <codec> <tag>) + (case (|> <source-code> (!clip <start> <end>) (text.replace-all ..digit-separator "") (\ <codec> decode)) (#.Right output) - (#.Right [[(update@ #.column (|>> (!n/+ (!n/- <start> <end>))) where) + (#.Right [[(let [[where::file where::line where::column] where] + [where::file where::line (!n/+ (!n/- <start> <end>) where::column)]) <end> - source-code] + <source-code>] [where (<tag> output)]]) (#.Left error) - (#.Left [[where <start> source-code] + (#.Left [[where <start> <source-code>] error]))) (def: no-exponent Offset 0) -(with-expansions [<int-output> (as-is (!number-output start end int.decimal #.Int)) - <frac-output> (as-is (!number-output start end frac.decimal #.Frac)) - <failure> (!failure ..parse-frac where offset source-code)] - (def: (parse-frac source-code//size start [where offset source-code]) - (-> Nat Offset (Parser Code)) +(with-expansions [<int-output> (as-is (!number-output source-code start end int.decimal #.Int)) + <frac-output> (as-is (!number-output source-code start end frac.decimal #.Frac)) + <failure> (!failure ..parse-frac where offset source-code) + <frac-separator> (static ..frac-separator) + <signs> (template [<sign>] + [(~~ (static <sign>))] + + [..positive-sign] + [..negative-sign])] + (template: (parse-frac source-code//size start where offset source-code) + ## (-> Nat Offset (Parser Code)) (loop [end offset - exponent ..no-exponent] + exponent (static ..no-exponent)] (<| (!with-char+ source-code//size source-code end char/0 <frac-output>) - (cond (!digit?+ char/0) - (recur (!inc end) exponent) - - (and (or (!n/= (char "e") char/0) - (!n/= (char "E") char/0)) - (is? ..no-exponent exponent)) - (<| (!with-char+ source-code//size source-code (!inc end) char/1 <failure>) - (if (or (!n/= (`` (char (~~ (static ..positive-sign)))) char/1) - (!n/= (`` (char (~~ (static ..negative-sign)))) char/1)) - (<| (!with-char+ source-code//size source-code (!n/+ 2 end) char/2 <failure>) - (if (!digit?+ char/2) - (recur (!n/+ 3 end) char/0) - <failure>)) - <failure>)) - - ## else - <frac-output>)))) - - (def: (parse-signed start [where offset source-code]) - (-> Offset (Parser Code)) - (let [source-code//size ("lux text size" source-code)] - (loop [end offset] - (<| (!with-char+ source-code//size source-code end char <int-output>) - (cond (!digit?+ char) - (recur (!inc end)) - - (!n/= (`` (.char (~~ (static ..frac-separator)))) - char) - (parse-frac source-code//size start [where (!inc end) source-code]) - - ## else - <int-output>)))))) + (!if-digit?+ char/0 + (recur (!inc end) exponent) + + [["e" "E"] + (if (is? (static ..no-exponent) exponent) + (<| (!with-char+ source-code//size source-code (!inc end) char/1 <failure>) + (`` ("lux syntax char case!" char/1 + [[<signs>] + (<| (!with-char+ source-code//size source-code (!n/+ 2 end) char/2 <failure>) + (!if-digit?+ char/2 + (recur (!n/+ 3 end) char/0) + [] + <failure>))] + ## else + <failure>))) + <frac-output>)] + + <frac-output>)))) + + (template: (parse-signed source-code//size start where offset source-code) + ## (-> Nat Offset (Parser Code)) + (loop [end offset] + (<| (!with-char+ source-code//size source-code end char <int-output>) + (!if-digit?+ char + (recur (!inc end)) + + [[<frac-separator>] + (parse-frac source-code//size start where (!inc end) source-code)] + + <int-output>)))) + ) (template [<parser> <codec> <tag>] - [(def: (<parser> source-code//size start where offset source-code) - (-> Nat Nat Location Nat Text (Either [Source Text] [Source Code])) + [(template: (<parser> source-code//size start where offset source-code) + ## (-> Nat Nat Location Nat Text (Either [Source Text] [Source Code])) (loop [g!end offset] - (<| (!with-char+ source-code//size source-code g!end g!char (!number-output start g!end <codec> <tag>)) - (if (!digit?+ g!char) - (recur (!inc g!end)) - (!number-output start g!end <codec> <tag>)))))] + (<| (!with-char+ source-code//size source-code g!end g!char (!number-output source-code start g!end <codec> <tag>)) + (!if-digit?+ g!char + (recur (!inc g!end)) + [] + (!number-output source-code start g!end <codec> <tag>)))))] [parse-nat n.decimal #.Nat] [parse-rev rev.decimal #.Rev] ) (template: (!parse-signed source-code//size offset where source-code @aliases @end) - (let [g!offset/1 (!inc offset)] - (<| (!with-char+ source-code//size source-code g!offset/1 g!char/1 @end) - (if (!digit? g!char/1) - (parse-signed offset [where (!inc/2 offset) source-code]) - (!parse-full-name offset [where (!inc offset) source-code] where @aliases #.Identifier))))) + (<| (let [g!offset/1 (!inc offset)]) + (!with-char+ source-code//size source-code g!offset/1 g!char/1 @end) + (!if-digit? g!char/1 + (parse-signed source-code//size offset where (!inc/2 offset) source-code) + (!parse-full-name offset [where (!inc offset) source-code] where @aliases #.Identifier)))) (with-expansions [<output> (#.Right [[(update@ #.column (|>> (!n/+ (!n/- start end))) where) end source-code] (!clip start end source-code)])] - (def: (parse-name-part start [where offset source-code]) - (-> Offset (Parser Text)) + (template: (parse-name-part start where offset source-code) + ## (-> Offset (Parser Text)) (let [source-code//size ("lux text size" source-code)] (loop [end offset] (<| (!with-char+ source-code//size source-code end char <output>) - (if (!name-char? char) - (recur (!inc end)) - <output>)))))) + (!if-name-char?|tail char + (recur (!inc end)) + <output>)))))) (template: (!parse-half-name @offset @char @module) - (cond (!name-char?|head @char) - (!letE [source' name] (..parse-name-part @offset [where (!inc @offset) source-code]) - (#.Right [source' [@module name]])) - - ## else - (!failure ..!parse-half-name where @offset source-code))) - -(`` (def: (parse-short-name current-module [where offset/0 source-code]) - (-> Text (Parser Name)) - (<| (!with-char source-code offset/0 char/0 - (!end-of-file where offset/0 source-code current-module)) + (!if-name-char?|head @char + (!letE [source' name] (..parse-name-part @offset where (!inc @offset) source-code) + (#.Right [source' [@module name]])) + (!failure ..!parse-half-name where @offset source-code))) + +(`` (def: (parse-short-name source-code//size current-module [where offset/0 source-code]) + (-> Nat Text (Parser Name)) + (<| (!with-char+ source-code//size source-code offset/0 char/0 + (!end-of-file where offset/0 source-code current-module)) (if (!n/= (char (~~ (static ..name-separator))) char/0) - (let [offset/1 (!inc offset/0)] - (<| (!with-char source-code offset/1 char/1 - (!end-of-file where offset/1 source-code current-module)) - (!parse-half-name offset/1 char/1 current-module))) - (!parse-half-name offset/0 char/0 ..prelude))))) - -(template: (!parse-short-name @current-module @source @where @tag) - (!letE [source' name] (..parse-short-name @current-module @source) + (<| (let [offset/1 (!inc offset/0)]) + (!with-char+ source-code//size source-code offset/1 char/1 + (!end-of-file where offset/1 source-code current-module)) + (!parse-half-name offset/1 char/1 current-module)) + (!parse-half-name offset/0 char/0 (static ..prelude)))))) + +(template: (!parse-short-name source-code//size @current-module @source @where @tag) + (!letE [source' name] (..parse-short-name source-code//size @current-module @source) (#.Right [source' [@where (@tag name)]]))) (with-expansions [<simple> (as-is (#.Right [source' ["" simple]]))] (`` (def: (parse-full-name aliases start source) (-> Aliases Offset (Parser Name)) - (<| (!letE [source' simple] (..parse-name-part start source)) + (<| (!letE [source' simple] (let [[where offset source-code] source] + (..parse-name-part start where offset source-code))) (let [[where' offset' source-code'] source']) (!with-char source-code' offset' char/separator <simple>) (if (!n/= (char (~~ (static ..name-separator))) char/separator) - (let [offset'' (!inc offset')] - (!letE [source'' complex] (..parse-name-part offset'' [(!forward 1 where') offset'' source-code']) - (if ("lux text =" "" complex) - (let [[where offset source-code] source] - (!failure ..parse-full-name where offset source-code)) - (#.Right [source'' [(|> aliases - (dictionary.get simple) - (maybe.default simple)) - complex]])))) + (<| (let [offset'' (!inc offset')]) + (!letE [source'' complex] (..parse-name-part offset'' (!forward 1 where') offset'' source-code')) + (if ("lux text =" "" complex) + (let [[where offset source-code] source] + (!failure ..parse-full-name where offset source-code)) + (#.Right [source'' [(|> aliases + (dictionary.get simple) + (maybe.default simple)) + complex]]))) <simple>))))) (template: (!parse-full-name @offset @source @where @aliases @tag) @@ -443,7 +465,7 @@ ## [expression ...] ## [form "(" [#* expression] ")"]) -(with-expansions [<consume-1> (as-is [where (!inc offset/0) source-code]) +(with-expansions [<consume-1> (as-is where (!inc offset/0) source-code) <move-1> (as-is [(!forward 1 where) (!inc offset/0) source-code]) <move-2> (as-is [(!forward 1 where) (!inc/2 offset/0) source-code]) <recur> (as-is (parse current-module aliases source-code//size)) @@ -488,47 +510,48 @@ ## Special code [(~~ (static ..sigil))] - (let [offset/1 (!inc offset/0)] - (<| (!with-char+ source-code//size source-code offset/1 char/1 - (!end-of-file where offset/1 source-code current-module)) - ("lux syntax char case!" char/1 - [[(~~ (static ..name-separator))] - (!parse-short-name current-module <move-2> where #.Tag) - - ## Single-line comment - [(~~ (static ..sigil))] - (case ("lux text index" (!inc offset/1) (static text.new-line) source-code) - (#.Some end) - (recur (!vertical where end source-code)) - - _ - (!end-of-file where offset/1 source-code current-module)) - - (~~ (template [<char> <bit>] - [[<char>] - (#.Right [[(update@ #.column (|>> !inc/2) where) - (!inc offset/1) - source-code] - [where (#.Bit <bit>)]])] - - ["0" #0] - ["1" #1]))] - - ## else - (cond (!name-char?|head char/1) ## Tag - (!parse-full-name offset/1 <move-2> where aliases #.Tag) - - ## else - (!failure ..parse where offset/0 source-code))))) + (<| (let [offset/1 (!inc offset/0)]) + (!with-char+ source-code//size source-code offset/1 char/1 + (!end-of-file where offset/1 source-code current-module)) + ("lux syntax char case!" char/1 + [[(~~ (static ..name-separator))] + (!parse-short-name source-code//size current-module <move-2> where #.Tag) + + ## Single-line comment + [(~~ (static ..sigil))] + (case ("lux text index" (!inc offset/1) (static text.new-line) source-code) + (#.Some end) + (recur (!vertical where end source-code)) + + _ + (!end-of-file where offset/1 source-code current-module)) + + (~~ (template [<char> <bit>] + [[<char>] + (#.Right [[(update@ #.column (|>> !inc/2) where) + (!inc offset/1) + source-code] + [where (#.Bit <bit>)]])] + + ["0" #0] + ["1" #1]))] + + ## else + (!if-name-char?|head char/1 + ## Tag + (!parse-full-name offset/1 <move-2> where aliases #.Tag) + (!failure ..parse where offset/0 source-code)))) ## Coincidentally (= ..name-separator ..frac-separator) - [(~~ (static ..name-separator))] - (let [offset/1 (!inc offset/0)] - (<| (!with-char+ source-code//size source-code offset/1 char/1 - (!end-of-file where offset/1 source-code current-module)) - (if (!digit? char/1) - (parse-rev source-code//size offset/0 where (!inc offset/1) source-code) - (!parse-short-name current-module [where offset/1 source-code] where #.Identifier)))) + [(~~ (static ..name-separator)) + ## (~~ (static ..frac-separator)) + ] + (<| (let [offset/1 (!inc offset/0)]) + (!with-char+ source-code//size source-code offset/1 char/1 + (!end-of-file where offset/1 source-code current-module)) + (!if-digit? char/1 + (parse-rev source-code//size offset/0 where (!inc offset/1) source-code) + (!parse-short-name source-code//size current-module [where offset/1 source-code] where #.Identifier))) [(~~ (static ..positive-sign)) (~~ (static ..negative-sign))] @@ -536,11 +559,11 @@ (!end-of-file where offset/0 source-code current-module))] ## else - (if (!digit? char/0) - ## Natural number - (parse-nat source-code//size offset/0 where (!inc offset/0) source-code) - ## Identifier - (!parse-full-name offset/0 <consume-1> where aliases #.Identifier)) + (!if-digit? char/0 + ## Natural number + (parse-nat source-code//size offset/0 where (!inc offset/0) source-code) + ## Identifier + (!parse-full-name offset/0 [<consume-1>] where aliases #.Identifier)) ))) ))) )) |