diff options
Diffstat (limited to 'stdlib/source/lux/compiler/default/syntax.lux')
-rw-r--r-- | stdlib/source/lux/compiler/default/syntax.lux | 1064 |
1 files changed, 509 insertions, 555 deletions
diff --git a/stdlib/source/lux/compiler/default/syntax.lux b/stdlib/source/lux/compiler/default/syntax.lux index 5f2d6d93b..52ac38720 100644 --- a/stdlib/source/lux/compiler/default/syntax.lux +++ b/stdlib/source/lux/compiler/default/syntax.lux @@ -22,179 +22,104 @@ ## updated cursor pointing to the end position, after the parser was run. ## Lux Code nodes/tokens are annotated with cursor meta-data -## (file-name, line, column) to keep track of their provenance and +## [file-name, line, column] to keep track of their provenance and ## location, which is helpful for documentation and debugging. (.module: - [lux (#- nat int rev true false) + [lux (#- int rev) [control monad ["p" parser ("parser/." Monad<Parser>)] ["ex" exception (#+ exception:)]] [data - ["e" error] + ["." error (#+ Error)] ["." number] - ["." product] - ["." maybe] ["." text - ["l" lexer (#+ Lexer)] + ["l" lexer (#+ Offset Lexer)] format] [collection - ["." row (#+ Row)] - ["." dictionary (#+ Dictionary)]]] - ["." function]]) + ["." list] + ["." dictionary (#+ Dictionary)]]]]) + +## TODO: Optimize how forms, tuples & records are parsed in the end. +## There is repeated-work going on when parsing the white-space before the +## closing parenthesis/bracket/brace. +## That repeated-work should be avoided. + +## TODO: Implement "lux syntax char case!" as a custom extension. +## That way, it should be possible to obtain the char without wrapping +## it into a java.lang.Long, thereby improving performance. + +## TODO: Make an extension to take advantage of java/lang/String::indexOf<int,int> +## to get better performance than the current "lux text index" extension. + +(type: Char Nat) + +(do-template [<name> <extension> <diff>] + [(template: (<name> value) + (<extension> value <diff>))] + + [!inc "lux i64 +" 1] + [!inc/2 "lux i64 +" 2] + [!dec "lux i64 -" 1] + ) + +(template: (!clip from to text) + ("lux text clip" text from to)) + +(do-template [<name> <extension>] + [(template: (<name> reference subject) + (<extension> subject reference))] + + [!n/= "lux i64 ="] + [!i/< "lux int <"] + ) + +(do-template [<name> <extension>] + [(template: (<name> param subject) + (<extension> subject param))] + + [!n/+ "lux i64 +"] + [!n/- "lux i64 -"] + ) + +(type: #export Syntax + (-> Cursor (Lexer [Cursor Code]))) (type: #export Aliases (Dictionary Text Text)) (def: #export no-aliases Aliases (dictionary.new text.Hash<Text>)) -(def: white-space Text "\t\v \r\f") -(def: new-line Text "\n") - -## This is the parser for white-space. -## Whenever a new-line is encountered, the column gets reset to 0, and -## the line gets incremented. -## It operates recursively in order to produce the longest continuous -## chunk of white-space. -(def: (space^ where) - (-> Cursor (Lexer [Cursor Text])) - (p.either (do p.Monad<Parser> - [content (l.many (l.one-of white-space))] - (wrap [(update@ #.column (n/+ (text.size content)) where) - content])) - ## New-lines must be handled as a separate case to ensure line - ## information is handled properly. - (do p.Monad<Parser> - [content (l.many (l.one-of new-line))] - (wrap [(|> where - (update@ #.line (n/+ (text.size content))) - (set@ #.column 0)) - content])) - )) - -## Single-line comments can start anywhere, but only go up to the -## next new-line. -(def: (single-line-comment^ where) - (-> Cursor (Lexer [Cursor Text])) - (do p.Monad<Parser> - [_ (l.this "##") - comment (l.some (l.none-of new-line)) - _ (l.this new-line)] - (wrap [(|> where - (update@ #.line inc) - (set@ #.column 0)) - comment]))) - -## This is just a helper parser to find text which doesn't run into -## any special character sequences for multi-line comments. -(def: comment-bound^ - (Lexer Any) - ($_ p.either - (l.this new-line) - (l.this ")#") - (l.this "#("))) - -## Multi-line comments are bounded by #( these delimiters, #(and, they may -## also be nested)# )#. -## Multi-line comment syntax must be balanced. -## That is, any nested comment must have matched delimiters. -## Unbalanced comments ought to be rejected as invalid code. -(def: (multi-line-comment^ where) - (-> Cursor (Lexer [Cursor Text])) - (do p.Monad<Parser> - [_ (l.this "#(")] - (loop [comment "" - where (update@ #.column (n/+ 2) where)] - ($_ p.either - ## These are normal chunks of commented text. - (do @ - [chunk (l.many (l.not comment-bound^))] - (recur (format comment chunk) - (|> where - (update@ #.column (n/+ (text.size chunk)))))) - ## This is a special rule to handle new-lines within - ## comments properly. - (do @ - [_ (l.this new-line)] - (recur (format comment new-line) - (|> where - (update@ #.line inc) - (set@ #.column 0)))) - ## This is the rule for handling nested sub-comments. - ## Ultimately, the whole comment is just treated as text - ## (the comment must respect the syntax structure, but the - ## output produced is just a block of text). - ## That is why the sub-comment is covered in delimiters - ## and then appended to the rest of the comment text. - (do @ - [[sub-where sub-comment] (multi-line-comment^ where)] - (recur (format comment "#(" sub-comment ")#") - sub-where)) - ## Finally, this is the rule for closing the comment. - (do @ - [_ (l.this ")#")] - (wrap [(update@ #.column (n/+ 2) where) - comment])) - )))) - -## This is the only parser that should be used directly by other -## parsers, since all comments must be treated as either being -## single-line or multi-line. -## That is, there is no syntactic rule prohibiting one type of comment -## from being used in any situation (alternatively, forcing one type -## of comment to be the only usable one). -(def: (comment^ where) - (-> Cursor (Lexer [Cursor Text])) - (p.either (single-line-comment^ where) - (multi-line-comment^ where))) - -## To simplify parsing, I remove any left-padding that an Code token -## may have prior to parsing the token itself. -## Left-padding is assumed to be either white-space or a comment. -## The cursor gets updated, but the padding gets ignored. -(def: (left-padding^ where) - (-> Cursor (Lexer Cursor)) - ($_ p.either - (do p.Monad<Parser> - [[where comment] (comment^ where)] - (left-padding^ where)) - (do p.Monad<Parser> - [[where white-space] (space^ where)] - (left-padding^ where)) - (:: p.Monad<Parser> wrap where))) - -## Escaped character sequences follow the usual syntax of -## back-slash followed by a letter (e.g. \n). -## Unicode escapes are possible, with hexadecimal sequences between 1 -## and 4 characters long (e.g. \u12aB). -## Escaped characters may show up in Char and Text literals. -(def: escaped-char^ - (Lexer [Nat Text]) - (p.after (l.this "\\") - (do p.Monad<Parser> - [code l.any] - (case code - ## Handle special cases. - "t" (wrap [2 "\t"]) - "v" (wrap [2 "\v"]) - "b" (wrap [2 "\b"]) - "n" (wrap [2 "\n"]) - "r" (wrap [2 "\r"]) - "f" (wrap [2 "\f"]) - "\"" (wrap [2 "\""]) - "\\" (wrap [2 "\\"]) - - ## Handle unicode escapes. - "u" - (do p.Monad<Parser> - [code (l.between 1 4 l.hexadecimal)] - (wrap (case (:: number.Hex@Codec<Text,Nat> decode code) - (#.Right value) - [(n/+ 2 (text.size code)) (text.from-code value)] - - _ - (undefined)))) - - _ - (p.fail (format "Invalid escaping syntax: " (%t code))))))) +(def: #export prelude "lux") + +(def: #export space " ") + +(def: #export text-delimiter text.double-quote) + +(def: #export open-form "(") +(def: #export close-form ")") + +(def: #export open-tuple "[") +(def: #export close-tuple "]") + +(def: #export open-record "{") +(def: #export close-record "}") + +(def: #export sigil "#") + +(def: #export digit-separator "_") + +(def: #export positive-sign "+") +(def: #export negative-sign "-") + +(def: #export frac-separator ".") + +## The parts of an name are separated by a single mark. +## E.g. module.short. +## Only one such mark may be used in an name, since there +## can only be 2 parts to an name (the module [before the +## mark], and the short [after the mark]). +## There are also some extra rules regarding name syntax, +## encoded on the parser. +(def: #export name-separator ".") ## These are very simple parsers that just cut chunks of text in ## specific shapes and then use decoders already present in the @@ -211,73 +136,8 @@ (def: sign^ (l.one-of "+-")) -(do-template [<name> <tag> <lexer> <codec>] - [(def: #export (<name> where) - (-> Cursor (Lexer [Cursor Code])) - (do p.Monad<Parser> - [chunk <lexer>] - (case (:: <codec> decode chunk) - (#.Left error) - (p.fail error) - - (#.Right value) - (wrap [(update@ #.column (n/+ (text.size chunk)) where) - [where (<tag> value)]]))))] - - [int #.Int - (l.and sign^ rich-digits^) - number.Codec<Text,Int>] - - [rev #.Rev - (l.and (l.one-of ".") - rich-digits^) - number.Codec<Text,Rev>] - ) - -(def: (nat-char where) - (-> Cursor (Lexer [Cursor Code])) - (do p.Monad<Parser> - [_ (l.this "#\"") - [where' char] (: (Lexer [Cursor Text]) - ($_ p.either - ## Normal text characters. - (do @ - [normal (l.none-of "\\\"\n")] - (wrap [(|> where - (update@ #.column inc)) - normal])) - ## Must handle escaped - ## chars separately. - (do @ - [[chars-consumed char] escaped-char^] - (wrap [(|> where - (update@ #.column (n/+ chars-consumed))) - char])))) - _ (l.this "\"") - #let [char (maybe.assume (text.nth 0 char))]] - (wrap [(|> where' - (update@ #.column inc)) - [where (#.Nat char)]]))) - -(def: (normal-nat where) - (-> Cursor (Lexer [Cursor Code])) - (do p.Monad<Parser> - [chunk rich-digits^] - (case (:: number.Codec<Text,Nat> decode chunk) - (#.Left error) - (p.fail error) - - (#.Right value) - (wrap [(update@ #.column (n/+ (text.size chunk)) where) - [where (#.Nat value)]])))) - -(def: #export (nat where) - (-> Cursor (Lexer [Cursor Code])) - (p.either (normal-nat where) - (nat-char where))) - -(def: (normal-frac where) - (-> Cursor (Lexer [Cursor Code])) +(def: #export (frac where) + Syntax (do p.Monad<Parser> [chunk ($_ l.and sign^ @@ -297,341 +157,435 @@ (wrap [(update@ #.column (n/+ (text.size chunk)) where) [where (#.Frac value)]])))) -(def: frac-ratio-fragment - (Lexer Frac) - (<| (p.codec number.Codec<Text,Frac>) - (:: p.Monad<Parser> map (function (_ digits) - (format digits ".0"))) - rich-digits^)) - -(def: (ratio-frac where) - (-> Cursor (Lexer [Cursor Code])) - (do p.Monad<Parser> - [chunk ($_ l.and - (p.default "" (l.one-of "-")) - rich-digits^ - (l.one-of "/") - rich-digits^) - value (l.local chunk - (do @ - [signed? (l.this? "-") - numerator frac-ratio-fragment - _ (l.this? "/") - denominator frac-ratio-fragment - _ (p.assert "Denominator cannot be 0." - (not (f/= +0.0 denominator)))] - (wrap (|> numerator - (f/* (if signed? -1.0 +1.0)) - (f// denominator)))))] - (wrap [(update@ #.column (n/+ (text.size chunk)) where) - [where (#.Frac value)]]))) - -(def: #export (frac where) - (-> Cursor (Lexer [Cursor Code])) - (p.either (normal-frac where) - (ratio-frac where))) - -## This parser looks so complex because text in Lux can be multi-line -## and there are rules regarding how this is handled. -(def: #export (text where) - (-> Cursor (Lexer [Cursor Code])) - (do p.Monad<Parser> - [## Lux text "is delimited by double-quotes", as usual in most - ## programming languages. - _ (l.this "\"") - ## I must know what column the text body starts at (which is - ## always 1 column after the left-delimiting quote). - ## This is important because, when procesing subsequent lines, - ## they must all start at the same column, being left-padded with - ## as many spaces as necessary to be column-aligned. - ## This helps ensure that the formatting on the text in the - ## source-code matches the formatting of the Text value. - #let [offset-column (inc (get@ #.column where))] - [where' text-read] (: (Lexer [Cursor Text]) - ## I must keep track of how much of the - ## text body has been read, how far the - ## cursor has progressed, and whether I'm - ## processing a subsequent line, or just - ## processing normal text body. - (loop [text-read "" - where (|> where - (update@ #.column inc)) - must-have-offset? #0] - (p.either (if must-have-offset? - ## If I'm at the start of a - ## new line, I must ensure the - ## space-offset is at least - ## as great as the column of - ## the text's body's column, - ## to ensure they are aligned. - (do @ - [offset (l.many (l.one-of " ")) - #let [offset-size (text.size offset)]] - (if (n/>= offset-column offset-size) - ## Any extra offset - ## becomes part of the - ## text's body. - (recur (|> offset - (text.split offset-column) - (maybe.default (undefined)) - product.right - (format text-read)) - (|> where - (update@ #.column (n/+ offset-size))) - #0) - (p.fail (format "Each line of a multi-line text must have an appropriate offset!\n" - "Expected: " (%i (.int offset-column)) " columns.\n" - " Actual: " (%i (.int offset-size)) " columns.\n")))) - ($_ p.either - ## Normal text characters. - (do @ - [normal (l.many (l.none-of "\\\"\n"))] - (recur (format text-read normal) - (|> where - (update@ #.column (n/+ (text.size normal)))) - #0)) - ## Must handle escaped - ## chars separately. - (do @ - [[chars-consumed char] escaped-char^] - (recur (format text-read char) - (|> where - (update@ #.column (n/+ chars-consumed))) - #0)) - ## The text ends when it - ## reaches the right-delimiter. - (do @ - [_ (l.this "\"")] - (wrap [(update@ #.column inc where) - text-read])))) - ## If a new-line is - ## encountered, it gets - ## appended to the value and - ## the loop is alerted that the - ## next line must have an offset. - (do @ - [_ (l.this new-line)] - (recur (format text-read new-line) - (|> where - (update@ #.line inc) - (set@ #.column 0)) - #1)))))] - (wrap [where' - [where (#.Text text-read)]]))) - -## Form and tuple syntax is mostly the same, differing only in the -## delimiters involved. -## They may have an arbitrary number of arbitrary Code nodes as elements. -(do-template [<name> <tag> <open> <close>] - [(def: (<name> where ast) - (-> Cursor - (-> Cursor (Lexer [Cursor Code])) - (Lexer [Cursor Code])) - (do p.Monad<Parser> - [_ (l.this <open>) - [where' elems] (loop [elems (: (Row Code) - row.empty) - where where] - (p.either (do @ - [## Must update the cursor as I - ## go along, to keep things accurate. - [where' elem] (ast where)] - (recur (row.add elem elems) - where')) - (do @ - [## Must take into account any - ## padding present before the - ## end-delimiter. - where' (left-padding^ where) - _ (l.this <close>)] - (wrap [(update@ #.column inc where') - (row.to-list elems)]))))] - (wrap [where' - [where (<tag> elems)]])))] - - [form #.Form "(" ")"] - [tuple #.Tuple "[" "]"] - ) - -## Records are almost (syntactically) the same as forms and tuples, -## with the exception that their elements must come in pairs (as in -## key-value pairs). -## Semantically, though, records and tuples are just 2 different -## representations for the same thing (a tuple). -## In normal Lux syntax, the key position in the pair will be a tag -## Code node, however, record Code nodes allow any Code node to occupy -## this position, since it may be useful when processing Code syntax in -## macros. -(def: (record where ast) - (-> Cursor - (-> Cursor (Lexer [Cursor Code])) - (Lexer [Cursor Code])) - (do p.Monad<Parser> - [_ (l.this "{") - [where' elems] (loop [elems (: (Row [Code Code]) - row.empty) - where where] - (p.either (do @ - [[where' key] (ast where) - [where' val] (ast where')] - (recur (row.add [key val] elems) - where')) - (do @ - [where' (left-padding^ where) - _ (l.this "}")] - (wrap [(update@ #.column inc where') - (row.to-list elems)]))))] - (wrap [where' - [where (#.Record elems)]]))) - -## The parts of an name are separated by a single mark. -## E.g. module.short. -## Only one such mark may be used in an name, since there -## can only be 2 parts to an name (the module [before the -## mark], and the short [after the mark]). -## There are also some extra rules regarding name syntax, -## encoded on the parser. -(def: name-separator Text ".") - -## A Lux name is a pair of chunks of text, where the first-part -## refers to the module that gives context to the name, and the -## second part corresponds to the short of the name itself. -## The module part may be absent (by being the empty text ""), but the -## name part must always be present. -## The rules for which characters you may use are specified in terms -## of which characters you must avoid (to keep things as open-ended as -## possible). -## In particular, no white-space can be used, and neither can other -## characters which are already used by Lux as delimiters for other -## Code nodes (thereby reducing ambiguity while parsing). -## Additionally, the first character in an name's part cannot be -## a digit, to avoid confusion with regards to numbers. -(def: name-part^ - (Lexer Text) - (do p.Monad<Parser> - [#let [digits "0123456789" - delimiters (format "()[]{}#\"" name-separator) - space (format white-space new-line) - head-lexer (l.none-of (format digits delimiters space)) - tail-lexer (l.some (l.none-of (format delimiters space)))] - head head-lexer - tail tail-lexer] - (wrap (format head tail)))) - -(def: current-module-mark Text (format name-separator name-separator)) - -(def: (name^ current-module aliases) - (-> Text Aliases (Lexer [Name Nat])) - ($_ p.either - ## When an name starts with 2 marks, its module is - ## taken to be the current-module being compiled at the moment. - ## This can be useful when mentioning names and tags - ## inside quoted/templated code in macros. - (do p.Monad<Parser> - [_ (l.this current-module-mark) - def-name name-part^] - (wrap [[current-module def-name] - (n/+ 2 (text.size def-name))])) - ## If the name is prefixed by the mark, but no module - ## part, the module is assumed to be "lux" (otherwise known as - ## the 'prelude'). - ## This makes it easy to refer to definitions in that module, - ## since it is the most fundamental module in the entire - ## standard library. - (do p.Monad<Parser> - [_ (l.this name-separator) - def-name name-part^] - (wrap [["lux" def-name] - (inc (text.size def-name))])) - ## Not all names must be specified with a module part. - ## If that part is not provided, the name will be created - ## with the empty "" text as the module. - ## During program analysis, such names tend to be treated - ## as if their context is the current-module, but this only - ## applies to names for tags and module definitions. - ## Function arguments and local-variables may not be referred-to - ## using names with module parts, so being able to specify - ## names with empty modules helps with those use-cases. - (do p.Monad<Parser> - [first-part name-part^] - (p.either (do @ - [_ (l.this name-separator) - second-part name-part^] - (wrap [[(|> aliases (dictionary.get first-part) (maybe.default first-part)) - second-part] - ($_ n/+ - (text.size first-part) - 1 - (text.size second-part))])) - (wrap [["" first-part] - (text.size first-part)]))))) - -(do-template [<name> <pre> <tag> <length>] - [(def: #export (<name> current-module aliases where) - (-> Text Aliases Cursor (Lexer [Cursor Code])) - (do p.Monad<Parser> - [[value length] (<| <pre> - (name^ current-module aliases))] - (wrap [(update@ #.column (|>> (n/+ <length>)) where) - [where (<tag> value)]])))] - - [tag (p.after (l.this "#")) #.Tag (n/+ 1 length)] - [identifier (|>) #.Identifier length] - ) +(exception: #export (end-of-file {module Text}) + (ex.report ["Module" (%t module)])) -(do-template [<name> <value>] - [(def: <name> - (Lexer Bit) - (:: p.Monad<Parser> map (function.constant <value>) (l.this (%b <value>))))] +(def: amount-of-input-shown 64) - [false #0] - [true #1] - ) +(exception: #export (unrecognized-input {[file line column] Cursor} {context Text} {input Text} {offset Offset}) + (let [end-offset (|> offset (n/+ amount-of-input-shown) (n/min ("lux text size" input)))] + (ex.report ["File" file] + ["Line" (%n line)] + ["Column" (%n column)] + ["Context" (%t context)] + ["Input" (!clip offset end-offset input)]))) -(def: #export (bit where) - (-> Cursor (Lexer [Cursor Code])) - (do p.Monad<Parser> - [value (p.either ..false ..true)] - (wrap [(update@ #.column (|>> (n/+ 2)) where) - [where (#.Bit value)]]))) +(exception: #export (text-cannot-contain-new-lines {text Text}) + (ex.report ["Text" (%t text)])) -(exception: #export (end-of-file {module Text}) - (ex.report ["Module" (%t module)])) +(exception: #export (invalid-escape-syntax) + "") -(exception: #export (unrecognized-input {[file line column] Cursor}) - (ex.report ["File" (%t file)] - ["Line" (%n line)] - ["Column" (%n column)])) +(exception: #export (cannot-close-composite-expression {closing-char Char}) + (ex.report ["Closing Character" (text.from-code closing-char)])) (def: (ast current-module aliases) - (-> Text Aliases Cursor (Lexer [Cursor Code])) + (-> Text Aliases Syntax) (function (ast' where) - (do p.Monad<Parser> - [where (left-padding^ where)] - ($_ p.either - (..form where ast') - (..tuple where ast') - (..record where ast') - (..text where) - (..nat where) - (..frac where) - (..int where) - (..rev where) - (..bit where) - (..identifier current-module aliases where) - (..tag current-module aliases where) - (do @ - [end? l.end?] - (if end? - (p.fail (ex.construct end-of-file current-module)) - (p.fail (ex.construct unrecognized-input where)))) - )))) - -(def: #export (read current-module aliases [where offset source-code]) - (-> Text Aliases Source (e.Error [Source Code])) - (case (p.run [offset source-code] (ast current-module aliases where)) - (#e.Error error) - (#e.Error error) - - (#e.Success [[offset' remaining] [where' output]]) - (#e.Success [[where' offset' remaining] output]))) + ($_ p.either + (..frac where) + ))) + +(type: Parser + (-> Source (Error [Source Code]))) + +(template: (!with-char+ @source-code-size @source-code @offset @char @else @body) + (if (!i/< (:coerce Int @source-code-size) + (:coerce Int @offset)) + (let [@char ("lux text char" @source-code @offset)] + @body) + @else)) + +(template: (!with-char @source-code @offset @char @else @body) + (!with-char+ ("lux text size" @source-code) @source-code @offset @char @else @body)) + +(def: close-signal "CLOSE") + +(def: (read-close closing-char source-code//size source-code offset) + (-> Char Nat Text Offset (Error Offset)) + (loop [end offset] + (<| (!with-char+ source-code//size source-code end char (ex.throw cannot-close-composite-expression closing-char) + (if (!n/= closing-char char) + (#error.Success (!inc end)) + (`` ("lux syntax char case!" char + [[(~~ (static ..space)) + (~~ (static text.carriage-return)) + (~~ (static text.new-line))] + (recur (!inc end))] + + ## else + (ex.throw cannot-close-composite-expression closing-char)))))))) + +(`` (do-template [<name> <close> <tag> <context>] + [(def: (<name> parse source) + (-> Parser Parser) + (let [[_ _ source-code] source + source-code//size ("lux text size" source-code)] + (loop [source source + stack (: (List Code) #.Nil)] + (case (parse source) + (#error.Success [source' top]) + (recur source' (#.Cons top stack)) + + (#error.Error error) + (let [[where offset _] source] + (case (read-close (char <close>) source-code//size source-code offset) + (#error.Success offset') + (#error.Success [[(update@ #.column inc where) offset' source-code] + [where (<tag> (list.reverse stack))]]) + + (#error.Error error) + (#error.Error error)))))))] + + ## Form and tuple syntax is mostly the same, differing only in the + ## delimiters involved. + ## They may have an arbitrary number of arbitrary Code nodes as elements. + [parse-form (~~ (static ..close-form)) #.Form "Form"] + [parse-tuple (~~ (static ..close-tuple)) #.Tuple "Tuple"] + )) + +(def: (parse-record parse source) + (-> Parser Parser) + (let [[_ _ source-code] source + source-code//size ("lux text size" source-code)] + (loop [source source + stack (: (List [Code Code]) #.Nil)] + (case (parse source) + (#error.Success [sourceF field]) + (case (parse sourceF) + (#error.Success [sourceFV value]) + (recur sourceFV (#.Cons [field value] stack)) + + (#error.Error error) + (#error.Error error)) + + (#error.Error error) + (let [[where offset _] source] + (<| (!with-char+ source-code//size source-code offset closing-char (#error.Error error)) + (case (read-close (`` (char (~~ (static ..close-record)))) source-code//size source-code offset) + (#error.Success offset') + (#error.Success [[(update@ #.column inc where) offset' source-code] + [where (#.Record (list.reverse stack))]]) + + (#error.Error error) + (#error.Error error)))))))) + +(template: (!guarantee-no-new-lines content body) + (case ("lux text index" content (static text.new-line) 0) + (#.Some g!_) + (ex.throw ..text-cannot-contain-new-lines content) + + g!_ + body)) + +(template: (!read-text where offset source-code) + (case ("lux text index" source-code (static ..text-delimiter) offset) + (#.Some g!end) + (let [g!content (!clip offset g!end source-code)] + (<| (!guarantee-no-new-lines g!content) + (#error.Success [[(update@ #.column (n/+ (!n/- offset g!end)) where) + (!inc g!end) + source-code] + [where + (#.Text g!content)]]))) + + _ + (ex.throw unrecognized-input [where "Text" source-code offset]))) + +(def: digit-bottom Nat (!dec (char "0"))) +(def: digit-top Nat (!inc (char "9"))) + +(template: (!digit? char) + (and (!i/< (:coerce Int char) (:coerce Int (static ..digit-bottom))) + (!i/< (:coerce Int (static ..digit-top)) (:coerce Int char)))) + +(`` (template: (!digit?+ char) + (or (!digit? char) + ("lux i64 =" (.char (~~ (static ..digit-separator))) char)))) + +(`` (template: (!strict-name-char? char) + (not (or ("lux i64 =" (.char (~~ (static ..space))) char) + ("lux i64 =" (.char (~~ (static text.new-line))) char) + + ("lux i64 =" (.char (~~ (static ..name-separator))) char) + + ("lux i64 =" (.char (~~ (static ..open-form))) char) + ("lux i64 =" (.char (~~ (static ..close-form))) char) + + ("lux i64 =" (.char (~~ (static ..open-tuple))) char) + ("lux i64 =" (.char (~~ (static ..close-tuple))) char) + + ("lux i64 =" (.char (~~ (static ..open-record))) char) + ("lux i64 =" (.char (~~ (static ..close-record))) char) + + ("lux i64 =" (.char (~~ (static ..text-delimiter))) char) + ("lux i64 =" (.char (~~ (static ..sigil))) char))))) + +(template: (!name-char?|head char) + (and (!strict-name-char? char) + (not (!digit? char)))) + +(template: (!name-char? char) + (or (!strict-name-char? char) + (!digit? char))) + +(template: (!number-output <start> <end> <codec> <tag>) + (case (:: <codec> decode (!clip <start> <end> source-code)) + (#error.Success output) + (#error.Success [[(update@ #.column (n/+ (!n/- <start> <end>)) where) + <end> + source-code] + [where (<tag> output)]]) + + (#error.Error error) + (#error.Error error))) + +(def: no-exponent Offset 0) + +(with-expansions [<int-output> (as-is (!number-output start end number.Codec<Text,Int> #.Int)) + <frac-output> (as-is (!number-output start end number.Codec<Text,Frac> #.Frac)) + <failure> (ex.throw unrecognized-input [where "Frac" source-code offset])] + (def: (parse-frac source-code//size start [where offset source-code]) + (-> Nat Offset Parser) + (loop [end offset + exponent ..no-exponent] + (<| (!with-char+ source-code//size source-code end char/0 <frac-output>) + (cond (!digit?+ char/0) + (recur (!inc end) exponent) + + (and (or (!n/= (char "e") char/0) + (!n/= (char "E") char/0)) + (not (is? ..no-exponent exponent))) + (<| (!with-char+ source-code//size source-code (!inc end) char/1 <failure>) + (if (or (!n/= (`` (char (~~ (static ..positive-sign)))) char/1) + (!n/= (`` (char (~~ (static ..negative-sign)))) char/1)) + (<| (!with-char+ source-code//size source-code (!n/+ 2 end) char/2 <failure>) + (if (!digit?+ char/2) + (recur (!n/+ 3 end) char/0) + <failure>)) + <failure>)) + + ## else + <frac-output>)))) + + (def: (parse-signed start [where offset source-code]) + (-> Offset Parser) + (let [source-code//size ("lux text size" source-code)] + (loop [end offset] + (<| (!with-char+ source-code//size source-code end char <int-output>) + (cond (!digit?+ char) + (recur (!inc end)) + + (!n/= (`` (.char (~~ (static ..frac-separator)))) + char) + (parse-frac source-code//size start [where (!inc end) source-code]) + + ## else + <int-output>)))))) + +(do-template [<name> <codec> <tag>] + [(template: (<name> source-code//size start where offset source-code) + (loop [g!end offset] + (<| (!with-char+ source-code//size source-code g!end g!char (!number-output start g!end <codec> <tag>)) + (if (!digit?+ g!char) + (recur (!inc g!end)) + (!number-output start g!end <codec> <tag>)))))] + + [!parse-nat number.Codec<Text,Nat> #.Nat] + [!parse-rev number.Codec<Text,Rev> #.Rev] + ) + +(template: (!parse-signed source-code//size offset where source-code @end) + (let [g!offset/1 (!inc offset)] + (<| (!with-char+ source-code//size source-code g!offset/1 g!char/1 @end) + (if (!digit? g!char/1) + (parse-signed offset [where (!inc/2 offset) source-code]) + (!parse-full-name offset [where (!inc offset) source-code] where #.Identifier))))) + +(with-expansions [<output> (#error.Success [[(update@ #.column (n/+ (!n/- start end)) where) + end + source-code] + (!clip start end source-code)])] + (def: (parse-name-part start [where offset source-code]) + (-> Offset Source (Error [Source Text])) + (let [source-code//size ("lux text size" source-code)] + (loop [end offset] + (<| (!with-char+ source-code//size source-code end char <output>) + (if (!name-char? char) + (recur (!inc end)) + <output>)))))) + +(template: (!new-line where) + (let [[where::file where::line where::column] where] + [where::file (!inc where::line) 0])) + +(with-expansions [<end> (ex.throw end-of-file current-module) + <failure> (ex.throw unrecognized-input [where "General" source-code offset/0]) + <close!> (#error.Error (`` (~~ (static close-signal)))) + <consume-1> (as-is [where (!inc offset/0) source-code]) + <consume-2> (as-is [where (!inc/2 offset/0) source-code])] + + (template: (!parse-half-name @offset @char @module) + (cond (!name-char?|head @char) + (case (..parse-name-part @offset [where (!inc @offset) source-code]) + (#error.Success [source' name]) + (#error.Success [source' [@module name]]) + + (#error.Error error) + (#error.Error error)) + + ## else + <failure>)) + + (`` (def: (parse-short-name current-module [where offset/0 source-code]) + (-> Text Source (Error [Source Name])) + (<| (!with-char source-code offset/0 char/0 <end>) + (if (!n/= (char (~~ (static ..name-separator))) char/0) + (let [offset/1 (!inc offset/0)] + (<| (!with-char source-code offset/1 char/1 <end>) + (!parse-half-name offset/1 char/1 current-module))) + (!parse-half-name offset/0 char/0 ..prelude))))) + + (template: (!parse-short-name @current-module @source @where @tag) + (case (..parse-short-name @current-module @source) + (#error.Success [source' name]) + (#error.Success [source' [@where (@tag name)]]) + + (#error.Error error) + (#error.Error error))) + + (with-expansions [<simple> (as-is (#error.Success [source' ["" simple]]))] + (`` (def: (parse-full-name start source) + (-> Offset Source (Error [Source Name])) + (case (..parse-name-part start source) + (#error.Success [source' simple]) + (let [[where' offset' source-code'] source'] + (<| (!with-char source-code' offset' char/separator <simple>) + (if (!n/= (char (~~ (static ..name-separator))) char/separator) + (let [offset'' (!inc offset')] + (case (..parse-name-part offset'' [where' offset'' source-code']) + (#error.Success [source'' complex]) + (#error.Success [source'' [simple complex]]) + + (#error.Error error) + (#error.Error error))) + <simple>))) + + (#error.Error error) + (#error.Error error))))) + + (template: (!parse-full-name @offset @source @where @tag) + (case (..parse-full-name @offset @source) + (#error.Success [source' full-name]) + (#error.Success [source' [@where (@tag full-name)]]) + + (#error.Error error) + (#error.Error error))) + + (`` (template: (<<closers>>) + [(~~ (static ..close-form)) + (~~ (static ..close-tuple)) + (~~ (static ..close-record))])) + + (with-expansions [<parse> (as-is (parse current-module aliases source-code//size)) + <horizontal-move> (as-is (recur [(update@ #.column inc where) + (!inc offset/0) + source-code]))] + (def: #export (parse current-module aliases source-code//size) + (-> Text Aliases Nat (-> Source (Error [Source Code]))) + ## The "exec []" is only there to avoid function fusion. + ## This is to preserve the loop as much as possible and keep it tight. + (exec [] + (function (recur [where offset/0 source-code]) + (<| (!with-char+ source-code//size source-code offset/0 char/0 <end>) + ## The space was singled-out for special treatment + ## because of how common it is. + (`` (if (!n/= (char (~~ (static ..space))) char/0) + <horizontal-move> + ("lux syntax char case!" char/0 + [## New line + [(~~ (static text.carriage-return))] + <horizontal-move> + + [(~~ (static text.new-line))] + (recur [(!new-line where) (!inc offset/0) source-code]) + + ## Form + [(~~ (static ..open-form))] + (parse-form <parse> <consume-1>) + + ## Tuple + [(~~ (static ..open-tuple))] + (parse-tuple <parse> <consume-1>) + + ## Record + [(~~ (static ..open-record))] + (parse-record <parse> <consume-1>) + + ## Text + [(~~ (static ..text-delimiter))] + (let [offset/1 (!inc offset/0)] + (!read-text where offset/1 source-code)) + + ## Special code + [(~~ (static ..sigil))] + (let [offset/1 (!inc offset/0)] + (<| (!with-char+ source-code//size source-code offset/1 char/1 <end>) + ("lux syntax char case!" char/1 + [(~~ (do-template [<char> <bit>] + [[<char>] + (#error.Success [[(update@ #.column (|>> !inc/2) where) + (!inc offset/1) + source-code] + [where (#.Bit <bit>)]])] + + ["0" #0] + ["1" #1])) + + ## Single-line comment + [(~~ (static ..sigil))] + (case ("lux text index" source-code (static text.new-line) offset/1) + (#.Some end) + (recur [(!new-line where) (!inc end) source-code]) + + _ + <end>) + + [(~~ (static ..name-separator))] + (!parse-short-name current-module <consume-2> where #.Tag)] + + ## else + (cond (!name-char?|head char/1) ## Tag + (!parse-full-name offset/1 <consume-2> where #.Tag) + + ## else + <failure>)))) + + ## Coincidentally (= name-separator frac-separator) + [(~~ (static ..name-separator))] + (let [offset/1 (!inc offset/0)] + (<| (!with-char+ source-code//size source-code offset/1 char/1 <end>) + (if (!digit? char/1) + (let [offset/2 (!inc offset/1)] + (!parse-rev source-code//size offset/0 where offset/2 source-code)) + (!parse-short-name current-module [where offset/1 source-code] where #.Identifier)))) + + [(~~ (static ..positive-sign)) + (~~ (static ..negative-sign))] + (!parse-signed source-code//size offset/0 where source-code <end>) + + ## Invalid characters at this point... + (~~ (<<closers>>)) + <close!>] + + ## else + (if (!digit? char/0) + ## Natural number + (let [offset/1 (!inc offset/0)] + (!parse-nat source-code//size offset/0 where offset/1 source-code)) + ## Identifier + (!parse-full-name offset/0 <consume-1> where #.Identifier)) + ))) + ))) + )) + ) |