diff options
Diffstat (limited to '')
-rw-r--r-- | stdlib/source/lux/compiler/default/syntax.lux | 393 |
1 files changed, 225 insertions, 168 deletions
diff --git a/stdlib/source/lux/compiler/default/syntax.lux b/stdlib/source/lux/compiler/default/syntax.lux index 4d778136f..1ae6a8620 100644 --- a/stdlib/source/lux/compiler/default/syntax.lux +++ b/stdlib/source/lux/compiler/default/syntax.lux @@ -463,93 +463,93 @@ ## encoded on the parser. (def: name-separator ".") -## A Lux name is a pair of chunks of text, where the first-part -## refers to the module that gives context to the name, and the -## second part corresponds to the short of the name itself. -## The module part may be absent (by being the empty text ""), but the -## name part must always be present. -## The rules for which characters you may use are specified in terms -## of which characters you must avoid (to keep things as open-ended as -## possible). -## In particular, no white-space can be used, and neither can other -## characters which are already used by Lux as delimiters for other -## Code nodes (thereby reducing ambiguity while parsing). -## Additionally, the first character in an name's part cannot be -## a digit, to avoid confusion with regards to numbers. -(def: name-part^ - (Lexer Text) - (let [delimiters (format ..open-form ..close-form - ..open-tuple ..close-tuple - ..open-record ..close-record - ..sigil - ..text-delimiter - ..name-separator) - space (format ..white-space ..new-line) - head (l.none-of! (format ..digits delimiters space)) - tail (l.some! (l.none-of! (format delimiters space)))] - (l.slice (l.and! head tail)))) - -(def: current-module-mark Text (format ..name-separator ..name-separator)) - -(def: (name^ current-module aliases) - (-> Text Aliases (Lexer [Name Nat])) - ($_ p.either - ## When an name starts with 2 marks, its module is - ## taken to be the current-module being compiled at the moment. - ## This can be useful when mentioning names and tags - ## inside quoted/templated code in macros. - (do p.Monad<Parser> - [_ (l.this current-module-mark) - def-name name-part^] - (wrap [[current-module def-name] - ("lux i64 +" 2 (text.size def-name))])) - ## If the name is prefixed by the mark, but no module - ## part, the module is assumed to be "lux" (otherwise known as - ## the 'prelude'). - ## This makes it easy to refer to definitions in that module, - ## since it is the most fundamental module in the entire - ## standard library. - (do p.Monad<Parser> - [_ (l.this name-separator) - def-name name-part^] - (wrap [["lux" def-name] - ("lux i64 +" 1 (text.size def-name))])) - ## Not all names must be specified with a module part. - ## If that part is not provided, the name will be created - ## with the empty "" text as the module. - ## During program analysis, such names tend to be treated - ## as if their context is the current-module, but this only - ## applies to names for tags and module definitions. - ## Function arguments and local-variables may not be referred-to - ## using names with module parts, so being able to specify - ## names with empty modules helps with those use-cases. - (do p.Monad<Parser> - [first-part name-part^] - (p.either (do @ - [_ (l.this name-separator) - second-part name-part^] - (wrap [[(|> aliases (dictionary.get first-part) (maybe.default first-part)) - second-part] - ($_ "lux i64 +" - (text.size first-part) - 1 - (text.size second-part))])) - (wrap [["" first-part] - (text.size first-part)]))))) - -(do-template [<name> <pre> <tag> <length>] - [(def: #export (<name> current-module aliases) - (-> Text Aliases Syntax) - (function (_ where) - (do p.Monad<Parser> - [[value length] (<| <pre> - (name^ current-module aliases))] - (wrap [(update@ #.column (n/+ <length>) where) - [where (<tag> value)]]))))] - - [tag (p.after (l.this ..sigil)) #.Tag ("lux i64 +" 1 length)] - [identifier (|>) #.Identifier length] - ) +## ## A Lux name is a pair of chunks of text, where the first-part +## ## refers to the module that gives context to the name, and the +## ## second part corresponds to the short of the name itself. +## ## The module part may be absent (by being the empty text ""), but the +## ## name part must always be present. +## ## The rules for which characters you may use are specified in terms +## ## of which characters you must avoid (to keep things as open-ended as +## ## possible). +## ## In particular, no white-space can be used, and neither can other +## ## characters which are already used by Lux as delimiters for other +## ## Code nodes (thereby reducing ambiguity while parsing). +## ## Additionally, the first character in an name's part cannot be +## ## a digit, to avoid confusion with regards to numbers. +## (def: name-part^ +## (Lexer Text) +## (let [delimiters (format ..open-form ..close-form +## ..open-tuple ..close-tuple +## ..open-record ..close-record +## ..sigil +## ..text-delimiter +## ..name-separator) +## space (format ..white-space ..new-line) +## head (l.none-of! (format ..digits delimiters space)) +## tail (l.some! (l.none-of! (format delimiters space)))] +## (l.slice (l.and! head tail)))) + +## (def: current-module-mark Text (format ..name-separator ..name-separator)) + +## (def: (name^ current-module aliases) +## (-> Text Aliases (Lexer [Name Nat])) +## ($_ p.either +## ## When an name starts with 2 marks, its module is +## ## taken to be the current-module being compiled at the moment. +## ## This can be useful when mentioning names and tags +## ## inside quoted/templated code in macros. +## (do p.Monad<Parser> +## [_ (l.this current-module-mark) +## def-name name-part^] +## (wrap [[current-module def-name] +## ("lux i64 +" 2 (text.size def-name))])) +## ## If the name is prefixed by the mark, but no module +## ## part, the module is assumed to be "lux" (otherwise known as +## ## the 'prelude'). +## ## This makes it easy to refer to definitions in that module, +## ## since it is the most fundamental module in the entire +## ## standard library. +## (do p.Monad<Parser> +## [_ (l.this name-separator) +## def-name name-part^] +## (wrap [["lux" def-name] +## ("lux i64 +" 1 (text.size def-name))])) +## ## Not all names must be specified with a module part. +## ## If that part is not provided, the name will be created +## ## with the empty "" text as the module. +## ## During program analysis, such names tend to be treated +## ## as if their context is the current-module, but this only +## ## applies to names for tags and module definitions. +## ## Function arguments and local-variables may not be referred-to +## ## using names with module parts, so being able to specify +## ## names with empty modules helps with those use-cases. +## (do p.Monad<Parser> +## [first-part name-part^] +## (p.either (do @ +## [_ (l.this name-separator) +## second-part name-part^] +## (wrap [[(|> aliases (dictionary.get first-part) (maybe.default first-part)) +## second-part] +## ($_ "lux i64 +" +## (text.size first-part) +## 1 +## (text.size second-part))])) +## (wrap [["" first-part] +## (text.size first-part)]))))) + +## (do-template [<name> <pre> <tag> <length>] +## [(def: #export (<name> current-module aliases) +## (-> Text Aliases Syntax) +## (function (_ where) +## (do p.Monad<Parser> +## [[value length] (<| <pre> +## (name^ current-module aliases))] +## (wrap [(update@ #.column (n/+ <length>) where) +## [where (<tag> value)]]))))] + +## [tag (p.after (l.this ..sigil)) #.Tag ("lux i64 +" 1 length)] +## [identifier (|>) #.Identifier length] +## ) ## (do-template [<name> <value>] ## [(def: <name> @@ -595,8 +595,8 @@ (..text where)) ## (<| (..timed "identifier") ## (..identifier current-module aliases where)) - (<| (..timed "tag") - (..tag current-module aliases where)) + ## (<| (..timed "tag") + ## (..tag current-module aliases where)) ## (<| (..timed "form") ## (..form ast' where)) ## (<| (..timed "tuple") @@ -610,28 +610,36 @@ (p.fail (ex.construct unrecognized-input where)))) )))) -(type: Simple - (-> Source (Error [Source Code]))) +(type: Tracker + {#new-line Offset}) -(type: Reader - (-> Text Aliases Simple)) +(def: fresh-tracker + Tracker + {#new-line 0}) + +(type: (Simple a) + (-> Tracker Source (Error [Tracker Source a]))) + +(type: (Reader a) + (-> Text Aliases (Simple a))) (do-template [<name> <extension>] [(template: (<name> value) (<extension> value 1))] - [inc! "lux i64 +"] - [dec! "lux i64 -"] + [!inc "lux i64 +"] + [!dec "lux i64 -"] ) (do-template [<name> <close> <tag>] - [(def: (<name> read-code source) - (-> Simple Simple) - (loop [source source + [(def: (<name> read-code tracker source) + (-> (Simple Code) (Simple Code)) + (loop [tracker tracker + source source stack (: (List Code) #.Nil)] - (case (read-code source) - (#error.Success [source' top]) - (recur source' (#.Cons top stack)) + (case (read-code tracker source) + (#error.Success [tracker' source' top]) + (recur tracker' source' (#.Cons top stack)) (#error.Error error) (let [[where offset source-code] source] @@ -639,8 +647,9 @@ (#.Some char) (`` (case char (^ (char (~~ (static <close>)))) - (#error.Success [[(update@ #.column inc where) - (inc! offset) + (#error.Success [tracker + [(update@ #.column inc where) + (!inc offset) source-code] [where (<tag> (list.reverse stack))]]) @@ -657,35 +666,36 @@ [read-tuple ..close-tuple #.Tuple] ) -(template: (clip! from to text) +(template: (!clip from to text) ## TODO: Optimize away "maybe.assume" (maybe.assume ("lux text clip" text from to))) -(def: (read-text [where offset source-code]) - Simple +(def: (read-text tracker [where offset source-code]) + (Simple Code) (case ("lux text index" source-code (static ..text-delimiter) offset) (#.Some end) - (#error.Success [[(update@ #.column (n/+ ("lux i64 -" end offset)) where) - (inc! end) + (#error.Success [tracker + [(update@ #.column (n/+ ("lux i64 -" end offset)) where) + (!inc end) source-code] [where - (#.Text (clip! offset end source-code))]]) + (#.Text (!clip offset end source-code))]]) _ (ex.throw unrecognized-input where))) -(def: digit-bottom Nat (dec! (char "0"))) -(def: digit-top Nat (inc! (char "9"))) +(def: digit-bottom Nat (!dec (char "0"))) +(def: digit-top Nat (!inc (char "9"))) -(template: (digit? char) +(template: (!digit? char) (and ("lux int <" (:coerce Int (static ..digit-bottom)) (:coerce Int char)) ("lux int <" (:coerce Int char) (:coerce Int (static ..digit-top))))) -(`` (template: (digit?+ char) - (or (digit? char) +(`` (template: (!digit?+ char) + (or (!digit? char) ("lux i64 =" (.char (~~ (static ..digit-separator))) char)))) -(`` (template: (name-char? char) +(`` (template: (!strict-name-char? char) (not (or ("lux i64 =" (.char (~~ (static ..white-space))) char) ("lux i64 =" (.char (~~ (static ..new-line))) char) @@ -703,42 +713,48 @@ ("lux i64 =" (.char (~~ (static ..text-delimiter))) char) ("lux i64 =" (.char (~~ (static ..sigil))) char))))) -(template: (name-char?+ char) - (or (name-char? char) - (digit? char))) +(template: (!name-char?|head char) + (and (!strict-name-char? char) + (not (!digit? char)))) -(with-expansions [<output> (case (:: number.Codec<Text,Nat> decode (clip! start end source-code)) +(template: (!name-char? char) + (or (!strict-name-char? char) + (!digit? char))) + +(with-expansions [<output> (case (:: number.Codec<Text,Nat> decode (!clip start end source-code)) (#error.Success output) - (#error.Success [[(update@ #.column (n/+ ("lux i64 -" end start)) where) + (#error.Success [tracker + [(update@ #.column (n/+ ("lux i64 -" end start)) where) end source-code] [where (#.Nat output)]]) (#error.Error error) (#error.Error error))] - (def: (read-nat start [where offset source-code]) - (-> Offset Simple) + (def: (read-nat start tracker [where offset source-code]) + (-> Offset (Simple Code)) (loop [end offset] (case ("lux text char" source-code end) (#.Some char) - (if (digit?+ char) - (recur (inc! end)) + (if (!digit?+ char) + (recur (!inc end)) <output>) _ <output>)))) -(with-expansions [<output> (#error.Success [[(update@ #.column (n/+ ("lux i64 -" end start)) where) +(with-expansions [<output> (#error.Success [tracker + [(update@ #.column (n/+ ("lux i64 -" end start)) where) end source-code] - [where (#.Identifier ["" (clip! start end source-code)])]])] - (def: (read-name start [where offset source-code]) - (-> Offset Simple) + ["" (!clip start end source-code)]])] + (def: (read-name start tracker [where offset source-code]) + (-> Offset (Simple Name)) (loop [end offset] (case ("lux text char" source-code end) (#.Some char) - (cond (name-char?+ char) - (recur (inc! end)) + (cond (!name-char? char) + (recur (!inc end)) ## else <output>) @@ -746,65 +762,99 @@ _ <output>)))) -(template: (leap-bit! value) +(template: (!leap-bit value) ("lux i64 +" value 2)) -(with-expansions [<consume-1> (as-is [where (inc! offset) source-code])] - (def: (read-code current-module aliases source) - Reader +(template: (!new-line where) + (let [[where::file where::line where::column] where] + [where::file (!inc where::line) 0])) + +(template: (!read-name @offset @tracker @source @where @tag) + (case (..read-name @offset @tracker @source) + (#error.Success [tracker' source' name]) + (#error.Success [tracker' source' [@where (@tag name)]]) + + (#error.Error error) + (#error.Error error))) + +(with-expansions [<consume-1> (as-is [where (!inc offset) source-code]) + <forceful-end> (as-is (recur tracker + [where ("lux text size" source-code) source-code]))] + (def: (read-code current-module aliases tracker source) + (Reader Code) (let [read-code' (read-code current-module aliases)] - (loop [[where offset source-code] source] + (loop [tracker tracker + [where offset source-code] source] (case ("lux text char" source-code offset) - (#.Some char) - (`` (case char + (#.Some current) + (`` (case current + ## White-space (^template [<char> <direction>] (^ (char <char>)) - (recur [(update@ <direction> inc where) - (inc! offset) + (recur tracker + [(update@ <direction> inc where) + (!inc offset) source-code])) ([(~~ (static ..white-space)) #.column] [(~~ (static ..carriage-return)) #.column]) (^ (char (~~ (static ..new-line)))) - (let [[where::file where::line where::column] where] - (recur [[where::file (inc! where::line) 0] - (inc! offset) - source-code])) - + (recur tracker [(!new-line where) (!inc offset) source-code]) + + ## Form (^ (char (~~ (static ..open-form)))) - (read-form read-code' <consume-1>) + (read-form read-code' tracker <consume-1>) + ## Tuple (^ (char (~~ (static ..open-tuple)))) - (read-tuple read-code' <consume-1>) + (read-tuple read-code' tracker <consume-1>) + ## Text (^ (char (~~ (static ..text-delimiter)))) - (read-text <consume-1>) + (read-text tracker <consume-1>) + ## Special code (^ (char (~~ (static ..sigil)))) - (case ("lux text char" source-code (inc! offset)) - (#.Some next) - (case next - (^template [<char> <bit>] - (^ (char <char>)) - (#error.Success [[(update@ #.column (|>> leap-bit!) where) - (leap-bit! offset) - source-code] - [where (#.Bit <bit>)]])) - (["0" #0] - ["1" #1]) - + (let [offset' (!inc offset)] + (case ("lux text char" source-code offset') + (#.Some next) + (case next + (^template [<char> <bit>] + (^ (char <char>)) + (#error.Success [tracker + [(update@ #.column (|>> !leap-bit) where) + (!leap-bit offset) + source-code] + [where (#.Bit <bit>)]])) + (["0" #0] + ["1" #1]) + + ## Single-line comment + (^ (char (~~ (static ..sigil)))) + (case ("lux text index" source-code (static ..new-line) offset') + (#.Some end) + (recur tracker [(!new-line where) (!inc end) source-code]) + + _ + <forceful-end>) + + _ + (cond (!name-char?|head next) ## Tag + (!read-name offset tracker <consume-1> where #.Tag) + + ## else + (ex.throw unrecognized-input where))) + _ - (ex.throw unrecognized-input where)) - - _ - (ex.throw end-of-file current-module)) + (ex.throw end-of-file current-module))) _ - (cond (digit? char) - (read-nat offset <consume-1>) + (cond (!digit? current) ## Natural number + (read-nat offset tracker <consume-1>) - (name-char? char) - (read-name offset <consume-1>) + ## Identifier + (!name-char?|head current) + (!read-name offset tracker <consume-1> where #.Identifier) ## else (ex.throw unrecognized-input where)))) @@ -813,7 +863,14 @@ (ex.throw end-of-file current-module)))))) ## [where offset source-code] -(def: #export read Reader read-code) +(def: #export (read current-module aliases source) + (-> Text Aliases Source (Error [Source Code])) + (case (read-code current-module aliases fresh-tracker source) + (#error.Error error) + (#error.Error error) + + (#error.Success [tracker' source' output]) + (#error.Success [source' output]))) ## (def: #export (read current-module aliases source) ## (-> Text Aliases Source (Error [Source Code])) |