From cf9e65352cb477982754c98fafe0a3a98a42670d Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Fri, 10 Jan 2020 20:56:36 -0400 Subject: Re-located Lux's parser. --- README.md | 2 +- stdlib/source/lux/tool/compiler/default/init.lux | 10 +- .../source/lux/tool/compiler/default/platform.lux | 6 +- stdlib/source/lux/tool/compiler/default/syntax.lux | 548 --------------------- .../lux/tool/compiler/language/lux/syntax.lux | 548 +++++++++++++++++++++ stdlib/source/program/compositor.lux | 6 +- 6 files changed, 563 insertions(+), 557 deletions(-) delete mode 100644 stdlib/source/lux/tool/compiler/default/syntax.lux create mode 100644 stdlib/source/lux/tool/compiler/language/lux/syntax.lux diff --git a/README.md b/README.md index 5050b487e..0a5e9c159 100644 --- a/README.md +++ b/README.md @@ -178,4 +178,4 @@ I'll be putting there tasks that people can contribute to; both in the compiler Writing libraries in Lux will also help a lot in making this a more practical language for day to day use. -##### Copyright (c) 2014-2018 Eduardo Julian. All rights reserved. +##### Copyright (c) 2014-2020 Eduardo Julian. All rights reserved. diff --git a/stdlib/source/lux/tool/compiler/default/init.lux b/stdlib/source/lux/tool/compiler/default/init.lux index dc94f5507..370894d26 100644 --- a/stdlib/source/lux/tool/compiler/default/init.lux +++ b/stdlib/source/lux/tool/compiler/default/init.lux @@ -16,8 +16,10 @@ [world ["." file]]] ["." // - ["#." syntax (#+ Aliases)] ["/#" // (#+ Instancer) + [language + [lux + ["." syntax (#+ Aliases)]]] ["#." analysis [macro (#+ Expander)] ["#/." evaluation]] @@ -80,7 +82,7 @@ (-> Module Aliases Source (///analysis.Operation Reader)) (function (_ [bundle state]) (#try.Success [[bundle state] - (//syntax.parse current-module aliases ("lux text size" source-code))]))) + (syntax.parse current-module aliases ("lux text size" source-code))]))) (def: (read source reader) (-> Source Reader (///analysis.Operation [Source Code])) @@ -175,7 +177,7 @@ (#try.Success [state (#.Some source&requirements&buffer)]) (#try.Failure error) - (if (ex.match? //syntax.end-of-file error) + (if (ex.match? syntax.end-of-file error) (#try.Success [state #.None]) (ex.with-stack ///.cannot-compile module (#try.Failure error))))))) @@ -204,7 +206,7 @@ (..begin dependencies hash input)) #let [module (get@ #///.module input)]] (loop [iteration (<| (///phase.run' state) - (..iterate expander module source buffer //syntax.no-aliases))] + (..iterate expander module source buffer syntax.no-aliases))] (do @ [[state ?source&requirements&buffer] iteration] (case ?source&requirements&buffer diff --git a/stdlib/source/lux/tool/compiler/default/platform.lux b/stdlib/source/lux/tool/compiler/default/platform.lux index 3c26cc232..c6df2955e 100644 --- a/stdlib/source/lux/tool/compiler/default/platform.lux +++ b/stdlib/source/lux/tool/compiler/default/platform.lux @@ -18,8 +18,10 @@ ["." file (#+ File)]]] ["." // #_ ["#." init] - ["#." syntax] ["/#" // + [language + [lux + ["." syntax]]] ["#." analysis [macro (#+ Expander)]] ["#." generation (#+ Buffer)] @@ -151,7 +153,7 @@ { state} {(///.Compiler .Module Any) - ((//init.compiler expander //syntax.prelude) //init.key (list))})] + ((//init.compiler expander syntax.prelude) //init.key (list))})] (loop [module source-module [archive state] [archive state]] (if (archive.archived? archive module) diff --git a/stdlib/source/lux/tool/compiler/default/syntax.lux b/stdlib/source/lux/tool/compiler/default/syntax.lux deleted file mode 100644 index 0b938b49a..000000000 --- a/stdlib/source/lux/tool/compiler/default/syntax.lux +++ /dev/null @@ -1,548 +0,0 @@ -## This is LuxC's parser. -## It takes the source code of a Lux file in raw text form and -## extracts the syntactic structure of the code from it. -## It only produces Lux Code nodes, and thus removes any white-space -## and comments while processing its inputs. - -## Another important aspect of the parser is that it keeps track of -## its position within the input data. -## That is, the parser takes into account the line and column -## information in the input text (it doesn't really touch the -## file-name aspect of the cursor, leaving it intact in whatever -## base-line cursor it is given). - -## This particular piece of functionality is not located in one -## function, but it is instead scattered throughout several parsers, -## since the logic for how to update the cursor varies, depending on -## what is being parsed, and the rules involved. - -## You will notice that several parsers have a "where" parameter, that -## tells them the cursor position prior to the parser being run. -## They are supposed to produce some parsed output, alongside an -## updated cursor pointing to the end position, after the parser was run. - -## Lux Code nodes/tokens are annotated with cursor meta-data -## [file-name, line, column] to keep track of their provenance and -## location, which is helpful for documentation and debugging. -(.module: - [lux #* - [abstract - monad] - [control - ["." exception (#+ exception:)] - [parser - [text (#+ Offset)]]] - [data - ["." maybe] - [number - ["n" nat] - ["." int] - ["." rev] - ["." frac]] - ["." text - ["%" format (#+ format)]] - [collection - ["." list] - ["." dictionary (#+ Dictionary)]]] - [macro - ["." template]]]) - -## TODO: Implement "lux syntax char case!" as a custom extension. -## That way, it should be possible to obtain the char without wrapping -## it into a java.lang.Long, thereby improving performance. - -## TODO: Make an extension to take advantage of java/lang/String::indexOf -## to get better performance than the current "lux text index" extension. - -## TODO: Instead of always keeping a "where" cursor variable, keep the -## individual components (i.e. file, line and column) separate, so -## that updated the "where" only involved updating the components, and -## producing the cursors only involved building them, without any need -## for pattern-matching and de-structuring. - -(type: Char Nat) - -(template [ ] - [(template: ( value) - ( value))] - - [!inc "lux i64 +" 1] - [!inc/2 "lux i64 +" 2] - [!dec "lux i64 -" 1] - ) - -(template: (!clip from to text) - ("lux text clip" from to text)) - -(template [ ] - [(template: ( reference subject) - ( reference subject))] - - [!n/= "lux i64 ="] - [!i/< "lux i64 <"] - ) - -(template [ ] - [(template: ( param subject) - ( param subject))] - - [!n/+ "lux i64 +"] - [!n/- "lux i64 -"] - ) - -(type: #export Aliases (Dictionary Text Text)) -(def: #export no-aliases Aliases (dictionary.new text.hash)) - -(def: #export prelude "lux") - -(def: #export space " ") - -(def: #export text-delimiter text.double-quote) - -(def: #export open-form "(") -(def: #export close-form ")") - -(def: #export open-tuple "[") -(def: #export close-tuple "]") - -(def: #export open-record "{") -(def: #export close-record "}") - -(def: #export sigil "#") - -(def: #export digit-separator ",") - -(def: #export positive-sign "+") -(def: #export negative-sign "-") - -(def: #export frac-separator ".") - -## The parts of an name are separated by a single mark. -## E.g. module.short. -## Only one such mark may be used in an name, since there -## can only be 2 parts to an name (the module [before the -## mark], and the short [after the mark]). -## There are also some extra rules regarding name syntax, -## encoded on the parser. -(def: #export name-separator ".") - -(exception: #export (end-of-file {module Text}) - (exception.report - ["Module" (%.text module)])) - -(def: amount-of-input-shown 64) - -(def: (input-at start input) - (-> Offset Text Text) - (let [end (|> start (!n/+ amount-of-input-shown) (n.min ("lux text size" input)))] - (!clip start end input))) - -(exception: #export (unrecognized-input {[file line column] Cursor} {context Text} {input Text} {offset Offset}) - (exception.report - ["File" file] - ["Line" (%.nat line)] - ["Column" (%.nat column)] - ["Context" (%.text context)] - ["Input" (input-at offset input)])) - -(exception: #export (text-cannot-contain-new-lines {text Text}) - (exception.report - ["Text" (%.text text)])) - -(template: (!failure parser where offset source-code) - (#.Left [[where offset source-code] - (exception.construct unrecognized-input [where (%.name (name-of parser)) source-code offset])])) - -(template: (!end-of-file where offset source-code current-module) - (#.Left [[where offset source-code] - (exception.construct ..end-of-file current-module)])) - -(type: (Parser a) - (-> Source (Either [Source Text] [Source a]))) - -(template: (!with-char+ @source-code-size @source-code @offset @char @else @body) - (if (!i/< (:coerce Int @source-code-size) - (:coerce Int @offset)) - (let [@char ("lux text char" @offset @source-code)] - @body) - @else)) - -(template: (!with-char @source-code @offset @char @else @body) - (!with-char+ ("lux text size" @source-code) @source-code @offset @char @else @body)) - -(template: (!letE ) - (case - (#.Right ) - - - (#.Left error) - (#.Left error))) - -(template: (!horizontal where offset source-code) - [(update@ #.column inc where) - (!inc offset) - source-code]) - -(template: (!new-line where) - ## (-> Cursor Cursor) - (let [[where::file where::line where::column] where] - [where::file (!inc where::line) 0])) - -(template: (!forward length where) - ## (-> Nat Cursor Cursor) - (let [[where::file where::line where::column] where] - [where::file where::line (!n/+ length where::column)])) - -(template: (!vertical where offset source-code) - [(!new-line where) - (!inc offset) - source-code]) - -(def: close-signal - (template.with-locals [g!close-signal] - (template.text [g!close-signal]))) - -(template [ ] - [(def: ( parse source) - (-> (Parser Code) (Parser Code)) - (let [[where offset source-code] source] - (loop [source (: Source [(!forward 1 where) offset source-code]) - stack (: (List Code) #.Nil)] - (case (parse source) - (#.Right [source' top]) - (recur source' (#.Cons top stack)) - - (#.Left [source' error]) - (if (is? error) - (#.Right [source' - [where ( (list.reverse stack))]]) - (#.Left [source' error]))))))] - - ## Form and tuple syntax is mostly the same, differing only in the - ## delimiters involved. - ## They may have an arbitrary number of arbitrary Code nodes as elements. - [parse-form ..close-form #.Form "Form"] - [parse-tuple ..close-tuple #.Tuple "Tuple"] - ) - -(def: (parse-record parse source) - (-> (Parser Code) (Parser Code)) - (let [[where offset source-code] source] - (loop [source (: Source [(!forward 1 where) offset source-code]) - stack (: (List [Code Code]) #.Nil)] - (case (parse source) - (#.Right [sourceF field]) - (!letE [sourceFV value] (parse sourceF) - (recur sourceFV (#.Cons [field value] stack))) - - (#.Left [source' error]) - (if (is? ..close-record error) - (#.Right [source' - [where (#.Record (list.reverse stack))]]) - (#.Left [source' error])))))) - -(template: (!guarantee-no-new-lines where offset source-code content body) - (case ("lux text index" 0 (static text.new-line) content) - #.None - body - - g!_ - (#.Left [[where offset source-code] - (exception.construct ..text-cannot-contain-new-lines content)]))) - -(def: (parse-text where offset source-code) - (-> Cursor Nat Text (Either [Source Text] [Source Code])) - (case ("lux text index" offset (static ..text-delimiter) source-code) - (#.Some g!end) - (let [g!content (!clip offset g!end source-code)] - (<| (!guarantee-no-new-lines where offset source-code g!content) - (#.Right [[(let [size (!n/- offset g!end)] - (update@ #.column (|>> (!n/+ size) (!n/+ 2)) where)) - (!inc g!end) - source-code] - [where - (#.Text g!content)]]))) - - _ - (!failure ..parse-text where offset source-code))) - -(def: digit-bottom Nat (!dec (char "0"))) -(def: digit-top Nat (!inc (char "9"))) - -(template: (!digit? char) - (and (!i/< (:coerce Int char) (:coerce Int (static ..digit-bottom))) - (!i/< (:coerce Int (static ..digit-top)) (:coerce Int char)))) - -(`` (template: (!digit?+ char) - (or (!digit? char) - ("lux i64 =" (.char (~~ (static ..digit-separator))) char)))) - -(with-expansions [ (template [] - [("lux i64 =" (.char (~~ (static ))) char) - #0] - - [..space] [text.new-line] - [..name-separator] - [..open-form] [..close-form] - [..open-tuple] [..close-tuple] - [..open-record] [..close-record] - [..text-delimiter] - [..sigil])] - (`` (template: (!strict-name-char? char) - (cond - ## else - #1)))) - -(template: (!name-char?|head char) - (and (!strict-name-char? char) - (not (!digit? char)))) - -(template: (!name-char? char) - (or (!strict-name-char? char) - (!digit? char))) - -(template: (!number-output ) - (case (|> source-code - (!clip ) - (text.replace-all ..digit-separator "") - (:: decode)) - (#.Right output) - (#.Right [[(update@ #.column (|>> (!n/+ (!n/- ))) where) - - source-code] - [where ( output)]]) - - (#.Left error) - (#.Left [[where source-code] - error]))) - -(def: no-exponent Offset 0) - -(with-expansions [ (as-is (!number-output start end int.decimal #.Int)) - (as-is (!number-output start end frac.decimal #.Frac)) - (!failure ..parse-frac where offset source-code)] - (def: (parse-frac source-code//size start [where offset source-code]) - (-> Nat Offset (Parser Code)) - (loop [end offset - exponent ..no-exponent] - (<| (!with-char+ source-code//size source-code end char/0 ) - (cond (!digit?+ char/0) - (recur (!inc end) exponent) - - (and (or (!n/= (char "e") char/0) - (!n/= (char "E") char/0)) - (not (is? ..no-exponent exponent))) - (<| (!with-char+ source-code//size source-code (!inc end) char/1 ) - (if (or (!n/= (`` (char (~~ (static ..positive-sign)))) char/1) - (!n/= (`` (char (~~ (static ..negative-sign)))) char/1)) - (<| (!with-char+ source-code//size source-code (!n/+ 2 end) char/2 ) - (if (!digit?+ char/2) - (recur (!n/+ 3 end) char/0) - )) - )) - - ## else - )))) - - (def: (parse-signed start [where offset source-code]) - (-> Offset (Parser Code)) - (let [source-code//size ("lux text size" source-code)] - (loop [end offset] - (<| (!with-char+ source-code//size source-code end char ) - (cond (!digit?+ char) - (recur (!inc end)) - - (!n/= (`` (.char (~~ (static ..frac-separator)))) - char) - (parse-frac source-code//size start [where (!inc end) source-code]) - - ## else - )))))) - -(template [ ] - [(def: ( source-code//size start where offset source-code) - (-> Nat Nat Cursor Nat Text (Either [Source Text] [Source Code])) - (loop [g!end offset] - (<| (!with-char+ source-code//size source-code g!end g!char (!number-output start g!end )) - (if (!digit?+ g!char) - (recur (!inc g!end)) - (!number-output start g!end )))))] - - [parse-nat n.decimal #.Nat] - [parse-rev rev.decimal #.Rev] - ) - -(template: (!parse-signed source-code//size offset where source-code @aliases @end) - (let [g!offset/1 (!inc offset)] - (<| (!with-char+ source-code//size source-code g!offset/1 g!char/1 @end) - (if (!digit? g!char/1) - (parse-signed offset [where (!inc/2 offset) source-code]) - (!parse-full-name offset [where (!inc offset) source-code] where @aliases #.Identifier))))) - -(with-expansions [ (#.Right [[(update@ #.column (|>> (!n/+ (!n/- start end))) where) - end - source-code] - (!clip start end source-code)])] - (def: (parse-name-part start [where offset source-code]) - (-> Offset (Parser Text)) - (let [source-code//size ("lux text size" source-code)] - (loop [end offset] - (<| (!with-char+ source-code//size source-code end char ) - (if (!name-char? char) - (recur (!inc end)) - )))))) - -(template: (!parse-half-name @offset @char @module) - (cond (!name-char?|head @char) - (!letE [source' name] (..parse-name-part @offset [where (!inc @offset) source-code]) - (#.Right [source' [@module name]])) - - ## else - (!failure ..!parse-half-name where @offset source-code))) - -(`` (def: (parse-short-name current-module [where offset/0 source-code]) - (-> Text (Parser Name)) - (<| (!with-char source-code offset/0 char/0 - (!end-of-file where offset/0 source-code current-module)) - (if (!n/= (char (~~ (static ..name-separator))) char/0) - (let [offset/1 (!inc offset/0)] - (<| (!with-char source-code offset/1 char/1 - (!end-of-file where offset/1 source-code current-module)) - (!parse-half-name offset/1 char/1 current-module))) - (!parse-half-name offset/0 char/0 ..prelude))))) - -(template: (!parse-short-name @current-module @source @where @tag) - (!letE [source' name] (..parse-short-name @current-module @source) - (#.Right [source' [@where (@tag name)]]))) - -(with-expansions [ (as-is (#.Right [source' ["" simple]]))] - (`` (def: (parse-full-name aliases start source) - (-> Aliases Offset (Parser Name)) - (!letE [source' simple] (..parse-name-part start source) - (let [[where' offset' source-code'] source'] - (<| (!with-char source-code' offset' char/separator ) - (if (!n/= (char (~~ (static ..name-separator))) char/separator) - (let [offset'' (!inc offset')] - (!letE [source'' complex] (..parse-name-part offset'' [(!forward 1 where') offset'' source-code']) - (if ("lux text =" "" complex) - (let [[where offset source-code] source] - (!failure ..parse-full-name where offset source-code)) - (#.Right [source'' [(|> aliases - (dictionary.get simple) - (maybe.default simple)) - complex]])))) - ))))))) - -(template: (!parse-full-name @offset @source @where @aliases @tag) - (!letE [source' full-name] (..parse-full-name @aliases @offset @source) - (#.Right [source' [@where (@tag full-name)]]))) - -## TODO: Grammar macro for specifying syntax. -## (grammar: lux-grammar -## [expression ...] -## [form "(" [#* expression] ")"]) - -(with-expansions [ (as-is [where (!inc offset/0) source-code]) - (as-is [(!forward 1 where) (!inc offset/0) source-code]) - (as-is [(!forward 1 where) (!inc/2 offset/0) source-code]) - (as-is (parse current-module aliases source-code//size)) - (as-is (recur (!horizontal where offset/0 source-code)))] - - (template: (!close closer) - (#.Left [ closer])) - - (def: #export (parse current-module aliases source-code//size) - (-> Text Aliases Nat (Parser Code)) - ## The "exec []" is only there to avoid function fusion. - ## This is to preserve the loop as much as possible and keep it tight. - (exec [] - (function (recur [where offset/0 source-code]) - (<| (!with-char+ source-code//size source-code offset/0 char/0 - (!end-of-file where offset/0 source-code current-module)) - (with-expansions [ (template [ ] - [[(~~ (static ))] - ( ) - - [(~~ (static ))] - (!close )] - - [..open-form ..close-form parse-form] - [..open-tuple ..close-tuple parse-tuple] - [..open-record ..close-record parse-record] - )] - ## TODO: Add ..space as just another case for "lux syntax char case!" ASAP. - ## It"s currently failing for some reason. - (`` (if (!n/= (char (~~ (static ..space))) char/0) - - ("lux syntax char case!" char/0 - [[(~~ (static text.carriage-return))] - - - ## New line - [(~~ (static text.new-line))] - (recur (!vertical where offset/0 source-code)) - - - - ## Text - [(~~ (static ..text-delimiter))] - (parse-text where (!inc offset/0) source-code) - - ## Special code - [(~~ (static ..sigil))] - (let [offset/1 (!inc offset/0)] - (<| (!with-char+ source-code//size source-code offset/1 char/1 - (!end-of-file where offset/1 source-code current-module)) - ("lux syntax char case!" char/1 - [[(~~ (static ..name-separator))] - (!parse-short-name current-module where #.Tag) - - ## Single-line comment - [(~~ (static ..sigil))] - (case ("lux text index" (!inc offset/1) (static text.new-line) source-code) - (#.Some end) - (recur (!vertical where end source-code)) - - _ - (!end-of-file where offset/1 source-code current-module)) - - (~~ (template [ ] - [[] - (#.Right [[(update@ #.column (|>> !inc/2) where) - (!inc offset/1) - source-code] - [where (#.Bit )]])] - - ["0" #0] - ["1" #1]))] - - ## else - (cond (!name-char?|head char/1) ## Tag - (!parse-full-name offset/1 where aliases #.Tag) - - ## else - (!failure ..parse where offset/0 source-code))))) - - ## Coincidentally (= name-separator frac-separator) - [(~~ (static ..name-separator))] - (let [offset/1 (!inc offset/0)] - (<| (!with-char+ source-code//size source-code offset/1 char/1 - (!end-of-file where offset/1 source-code current-module)) - (if (!digit? char/1) - (parse-rev source-code//size offset/0 where (!inc offset/1) source-code) - (!parse-short-name current-module [where offset/1 source-code] where #.Identifier)))) - - [(~~ (static ..positive-sign)) - (~~ (static ..negative-sign))] - (!parse-signed source-code//size offset/0 where source-code aliases - (!end-of-file where offset/0 source-code current-module))] - - ## else - (if (!digit? char/0) - ## Natural number - (parse-nat source-code//size offset/0 where (!inc offset/0) source-code) - ## Identifier - (!parse-full-name offset/0 where aliases #.Identifier)) - )))) - ))) - )) diff --git a/stdlib/source/lux/tool/compiler/language/lux/syntax.lux b/stdlib/source/lux/tool/compiler/language/lux/syntax.lux new file mode 100644 index 000000000..0b938b49a --- /dev/null +++ b/stdlib/source/lux/tool/compiler/language/lux/syntax.lux @@ -0,0 +1,548 @@ +## This is LuxC's parser. +## It takes the source code of a Lux file in raw text form and +## extracts the syntactic structure of the code from it. +## It only produces Lux Code nodes, and thus removes any white-space +## and comments while processing its inputs. + +## Another important aspect of the parser is that it keeps track of +## its position within the input data. +## That is, the parser takes into account the line and column +## information in the input text (it doesn't really touch the +## file-name aspect of the cursor, leaving it intact in whatever +## base-line cursor it is given). + +## This particular piece of functionality is not located in one +## function, but it is instead scattered throughout several parsers, +## since the logic for how to update the cursor varies, depending on +## what is being parsed, and the rules involved. + +## You will notice that several parsers have a "where" parameter, that +## tells them the cursor position prior to the parser being run. +## They are supposed to produce some parsed output, alongside an +## updated cursor pointing to the end position, after the parser was run. + +## Lux Code nodes/tokens are annotated with cursor meta-data +## [file-name, line, column] to keep track of their provenance and +## location, which is helpful for documentation and debugging. +(.module: + [lux #* + [abstract + monad] + [control + ["." exception (#+ exception:)] + [parser + [text (#+ Offset)]]] + [data + ["." maybe] + [number + ["n" nat] + ["." int] + ["." rev] + ["." frac]] + ["." text + ["%" format (#+ format)]] + [collection + ["." list] + ["." dictionary (#+ Dictionary)]]] + [macro + ["." template]]]) + +## TODO: Implement "lux syntax char case!" as a custom extension. +## That way, it should be possible to obtain the char without wrapping +## it into a java.lang.Long, thereby improving performance. + +## TODO: Make an extension to take advantage of java/lang/String::indexOf +## to get better performance than the current "lux text index" extension. + +## TODO: Instead of always keeping a "where" cursor variable, keep the +## individual components (i.e. file, line and column) separate, so +## that updated the "where" only involved updating the components, and +## producing the cursors only involved building them, without any need +## for pattern-matching and de-structuring. + +(type: Char Nat) + +(template [ ] + [(template: ( value) + ( value))] + + [!inc "lux i64 +" 1] + [!inc/2 "lux i64 +" 2] + [!dec "lux i64 -" 1] + ) + +(template: (!clip from to text) + ("lux text clip" from to text)) + +(template [ ] + [(template: ( reference subject) + ( reference subject))] + + [!n/= "lux i64 ="] + [!i/< "lux i64 <"] + ) + +(template [ ] + [(template: ( param subject) + ( param subject))] + + [!n/+ "lux i64 +"] + [!n/- "lux i64 -"] + ) + +(type: #export Aliases (Dictionary Text Text)) +(def: #export no-aliases Aliases (dictionary.new text.hash)) + +(def: #export prelude "lux") + +(def: #export space " ") + +(def: #export text-delimiter text.double-quote) + +(def: #export open-form "(") +(def: #export close-form ")") + +(def: #export open-tuple "[") +(def: #export close-tuple "]") + +(def: #export open-record "{") +(def: #export close-record "}") + +(def: #export sigil "#") + +(def: #export digit-separator ",") + +(def: #export positive-sign "+") +(def: #export negative-sign "-") + +(def: #export frac-separator ".") + +## The parts of an name are separated by a single mark. +## E.g. module.short. +## Only one such mark may be used in an name, since there +## can only be 2 parts to an name (the module [before the +## mark], and the short [after the mark]). +## There are also some extra rules regarding name syntax, +## encoded on the parser. +(def: #export name-separator ".") + +(exception: #export (end-of-file {module Text}) + (exception.report + ["Module" (%.text module)])) + +(def: amount-of-input-shown 64) + +(def: (input-at start input) + (-> Offset Text Text) + (let [end (|> start (!n/+ amount-of-input-shown) (n.min ("lux text size" input)))] + (!clip start end input))) + +(exception: #export (unrecognized-input {[file line column] Cursor} {context Text} {input Text} {offset Offset}) + (exception.report + ["File" file] + ["Line" (%.nat line)] + ["Column" (%.nat column)] + ["Context" (%.text context)] + ["Input" (input-at offset input)])) + +(exception: #export (text-cannot-contain-new-lines {text Text}) + (exception.report + ["Text" (%.text text)])) + +(template: (!failure parser where offset source-code) + (#.Left [[where offset source-code] + (exception.construct unrecognized-input [where (%.name (name-of parser)) source-code offset])])) + +(template: (!end-of-file where offset source-code current-module) + (#.Left [[where offset source-code] + (exception.construct ..end-of-file current-module)])) + +(type: (Parser a) + (-> Source (Either [Source Text] [Source a]))) + +(template: (!with-char+ @source-code-size @source-code @offset @char @else @body) + (if (!i/< (:coerce Int @source-code-size) + (:coerce Int @offset)) + (let [@char ("lux text char" @offset @source-code)] + @body) + @else)) + +(template: (!with-char @source-code @offset @char @else @body) + (!with-char+ ("lux text size" @source-code) @source-code @offset @char @else @body)) + +(template: (!letE ) + (case + (#.Right ) + + + (#.Left error) + (#.Left error))) + +(template: (!horizontal where offset source-code) + [(update@ #.column inc where) + (!inc offset) + source-code]) + +(template: (!new-line where) + ## (-> Cursor Cursor) + (let [[where::file where::line where::column] where] + [where::file (!inc where::line) 0])) + +(template: (!forward length where) + ## (-> Nat Cursor Cursor) + (let [[where::file where::line where::column] where] + [where::file where::line (!n/+ length where::column)])) + +(template: (!vertical where offset source-code) + [(!new-line where) + (!inc offset) + source-code]) + +(def: close-signal + (template.with-locals [g!close-signal] + (template.text [g!close-signal]))) + +(template [ ] + [(def: ( parse source) + (-> (Parser Code) (Parser Code)) + (let [[where offset source-code] source] + (loop [source (: Source [(!forward 1 where) offset source-code]) + stack (: (List Code) #.Nil)] + (case (parse source) + (#.Right [source' top]) + (recur source' (#.Cons top stack)) + + (#.Left [source' error]) + (if (is? error) + (#.Right [source' + [where ( (list.reverse stack))]]) + (#.Left [source' error]))))))] + + ## Form and tuple syntax is mostly the same, differing only in the + ## delimiters involved. + ## They may have an arbitrary number of arbitrary Code nodes as elements. + [parse-form ..close-form #.Form "Form"] + [parse-tuple ..close-tuple #.Tuple "Tuple"] + ) + +(def: (parse-record parse source) + (-> (Parser Code) (Parser Code)) + (let [[where offset source-code] source] + (loop [source (: Source [(!forward 1 where) offset source-code]) + stack (: (List [Code Code]) #.Nil)] + (case (parse source) + (#.Right [sourceF field]) + (!letE [sourceFV value] (parse sourceF) + (recur sourceFV (#.Cons [field value] stack))) + + (#.Left [source' error]) + (if (is? ..close-record error) + (#.Right [source' + [where (#.Record (list.reverse stack))]]) + (#.Left [source' error])))))) + +(template: (!guarantee-no-new-lines where offset source-code content body) + (case ("lux text index" 0 (static text.new-line) content) + #.None + body + + g!_ + (#.Left [[where offset source-code] + (exception.construct ..text-cannot-contain-new-lines content)]))) + +(def: (parse-text where offset source-code) + (-> Cursor Nat Text (Either [Source Text] [Source Code])) + (case ("lux text index" offset (static ..text-delimiter) source-code) + (#.Some g!end) + (let [g!content (!clip offset g!end source-code)] + (<| (!guarantee-no-new-lines where offset source-code g!content) + (#.Right [[(let [size (!n/- offset g!end)] + (update@ #.column (|>> (!n/+ size) (!n/+ 2)) where)) + (!inc g!end) + source-code] + [where + (#.Text g!content)]]))) + + _ + (!failure ..parse-text where offset source-code))) + +(def: digit-bottom Nat (!dec (char "0"))) +(def: digit-top Nat (!inc (char "9"))) + +(template: (!digit? char) + (and (!i/< (:coerce Int char) (:coerce Int (static ..digit-bottom))) + (!i/< (:coerce Int (static ..digit-top)) (:coerce Int char)))) + +(`` (template: (!digit?+ char) + (or (!digit? char) + ("lux i64 =" (.char (~~ (static ..digit-separator))) char)))) + +(with-expansions [ (template [] + [("lux i64 =" (.char (~~ (static ))) char) + #0] + + [..space] [text.new-line] + [..name-separator] + [..open-form] [..close-form] + [..open-tuple] [..close-tuple] + [..open-record] [..close-record] + [..text-delimiter] + [..sigil])] + (`` (template: (!strict-name-char? char) + (cond + ## else + #1)))) + +(template: (!name-char?|head char) + (and (!strict-name-char? char) + (not (!digit? char)))) + +(template: (!name-char? char) + (or (!strict-name-char? char) + (!digit? char))) + +(template: (!number-output ) + (case (|> source-code + (!clip ) + (text.replace-all ..digit-separator "") + (:: decode)) + (#.Right output) + (#.Right [[(update@ #.column (|>> (!n/+ (!n/- ))) where) + + source-code] + [where ( output)]]) + + (#.Left error) + (#.Left [[where source-code] + error]))) + +(def: no-exponent Offset 0) + +(with-expansions [ (as-is (!number-output start end int.decimal #.Int)) + (as-is (!number-output start end frac.decimal #.Frac)) + (!failure ..parse-frac where offset source-code)] + (def: (parse-frac source-code//size start [where offset source-code]) + (-> Nat Offset (Parser Code)) + (loop [end offset + exponent ..no-exponent] + (<| (!with-char+ source-code//size source-code end char/0 ) + (cond (!digit?+ char/0) + (recur (!inc end) exponent) + + (and (or (!n/= (char "e") char/0) + (!n/= (char "E") char/0)) + (not (is? ..no-exponent exponent))) + (<| (!with-char+ source-code//size source-code (!inc end) char/1 ) + (if (or (!n/= (`` (char (~~ (static ..positive-sign)))) char/1) + (!n/= (`` (char (~~ (static ..negative-sign)))) char/1)) + (<| (!with-char+ source-code//size source-code (!n/+ 2 end) char/2 ) + (if (!digit?+ char/2) + (recur (!n/+ 3 end) char/0) + )) + )) + + ## else + )))) + + (def: (parse-signed start [where offset source-code]) + (-> Offset (Parser Code)) + (let [source-code//size ("lux text size" source-code)] + (loop [end offset] + (<| (!with-char+ source-code//size source-code end char ) + (cond (!digit?+ char) + (recur (!inc end)) + + (!n/= (`` (.char (~~ (static ..frac-separator)))) + char) + (parse-frac source-code//size start [where (!inc end) source-code]) + + ## else + )))))) + +(template [ ] + [(def: ( source-code//size start where offset source-code) + (-> Nat Nat Cursor Nat Text (Either [Source Text] [Source Code])) + (loop [g!end offset] + (<| (!with-char+ source-code//size source-code g!end g!char (!number-output start g!end )) + (if (!digit?+ g!char) + (recur (!inc g!end)) + (!number-output start g!end )))))] + + [parse-nat n.decimal #.Nat] + [parse-rev rev.decimal #.Rev] + ) + +(template: (!parse-signed source-code//size offset where source-code @aliases @end) + (let [g!offset/1 (!inc offset)] + (<| (!with-char+ source-code//size source-code g!offset/1 g!char/1 @end) + (if (!digit? g!char/1) + (parse-signed offset [where (!inc/2 offset) source-code]) + (!parse-full-name offset [where (!inc offset) source-code] where @aliases #.Identifier))))) + +(with-expansions [ (#.Right [[(update@ #.column (|>> (!n/+ (!n/- start end))) where) + end + source-code] + (!clip start end source-code)])] + (def: (parse-name-part start [where offset source-code]) + (-> Offset (Parser Text)) + (let [source-code//size ("lux text size" source-code)] + (loop [end offset] + (<| (!with-char+ source-code//size source-code end char ) + (if (!name-char? char) + (recur (!inc end)) + )))))) + +(template: (!parse-half-name @offset @char @module) + (cond (!name-char?|head @char) + (!letE [source' name] (..parse-name-part @offset [where (!inc @offset) source-code]) + (#.Right [source' [@module name]])) + + ## else + (!failure ..!parse-half-name where @offset source-code))) + +(`` (def: (parse-short-name current-module [where offset/0 source-code]) + (-> Text (Parser Name)) + (<| (!with-char source-code offset/0 char/0 + (!end-of-file where offset/0 source-code current-module)) + (if (!n/= (char (~~ (static ..name-separator))) char/0) + (let [offset/1 (!inc offset/0)] + (<| (!with-char source-code offset/1 char/1 + (!end-of-file where offset/1 source-code current-module)) + (!parse-half-name offset/1 char/1 current-module))) + (!parse-half-name offset/0 char/0 ..prelude))))) + +(template: (!parse-short-name @current-module @source @where @tag) + (!letE [source' name] (..parse-short-name @current-module @source) + (#.Right [source' [@where (@tag name)]]))) + +(with-expansions [ (as-is (#.Right [source' ["" simple]]))] + (`` (def: (parse-full-name aliases start source) + (-> Aliases Offset (Parser Name)) + (!letE [source' simple] (..parse-name-part start source) + (let [[where' offset' source-code'] source'] + (<| (!with-char source-code' offset' char/separator ) + (if (!n/= (char (~~ (static ..name-separator))) char/separator) + (let [offset'' (!inc offset')] + (!letE [source'' complex] (..parse-name-part offset'' [(!forward 1 where') offset'' source-code']) + (if ("lux text =" "" complex) + (let [[where offset source-code] source] + (!failure ..parse-full-name where offset source-code)) + (#.Right [source'' [(|> aliases + (dictionary.get simple) + (maybe.default simple)) + complex]])))) + ))))))) + +(template: (!parse-full-name @offset @source @where @aliases @tag) + (!letE [source' full-name] (..parse-full-name @aliases @offset @source) + (#.Right [source' [@where (@tag full-name)]]))) + +## TODO: Grammar macro for specifying syntax. +## (grammar: lux-grammar +## [expression ...] +## [form "(" [#* expression] ")"]) + +(with-expansions [ (as-is [where (!inc offset/0) source-code]) + (as-is [(!forward 1 where) (!inc offset/0) source-code]) + (as-is [(!forward 1 where) (!inc/2 offset/0) source-code]) + (as-is (parse current-module aliases source-code//size)) + (as-is (recur (!horizontal where offset/0 source-code)))] + + (template: (!close closer) + (#.Left [ closer])) + + (def: #export (parse current-module aliases source-code//size) + (-> Text Aliases Nat (Parser Code)) + ## The "exec []" is only there to avoid function fusion. + ## This is to preserve the loop as much as possible and keep it tight. + (exec [] + (function (recur [where offset/0 source-code]) + (<| (!with-char+ source-code//size source-code offset/0 char/0 + (!end-of-file where offset/0 source-code current-module)) + (with-expansions [ (template [ ] + [[(~~ (static ))] + ( ) + + [(~~ (static ))] + (!close )] + + [..open-form ..close-form parse-form] + [..open-tuple ..close-tuple parse-tuple] + [..open-record ..close-record parse-record] + )] + ## TODO: Add ..space as just another case for "lux syntax char case!" ASAP. + ## It"s currently failing for some reason. + (`` (if (!n/= (char (~~ (static ..space))) char/0) + + ("lux syntax char case!" char/0 + [[(~~ (static text.carriage-return))] + + + ## New line + [(~~ (static text.new-line))] + (recur (!vertical where offset/0 source-code)) + + + + ## Text + [(~~ (static ..text-delimiter))] + (parse-text where (!inc offset/0) source-code) + + ## Special code + [(~~ (static ..sigil))] + (let [offset/1 (!inc offset/0)] + (<| (!with-char+ source-code//size source-code offset/1 char/1 + (!end-of-file where offset/1 source-code current-module)) + ("lux syntax char case!" char/1 + [[(~~ (static ..name-separator))] + (!parse-short-name current-module where #.Tag) + + ## Single-line comment + [(~~ (static ..sigil))] + (case ("lux text index" (!inc offset/1) (static text.new-line) source-code) + (#.Some end) + (recur (!vertical where end source-code)) + + _ + (!end-of-file where offset/1 source-code current-module)) + + (~~ (template [ ] + [[] + (#.Right [[(update@ #.column (|>> !inc/2) where) + (!inc offset/1) + source-code] + [where (#.Bit )]])] + + ["0" #0] + ["1" #1]))] + + ## else + (cond (!name-char?|head char/1) ## Tag + (!parse-full-name offset/1 where aliases #.Tag) + + ## else + (!failure ..parse where offset/0 source-code))))) + + ## Coincidentally (= name-separator frac-separator) + [(~~ (static ..name-separator))] + (let [offset/1 (!inc offset/0)] + (<| (!with-char+ source-code//size source-code offset/1 char/1 + (!end-of-file where offset/1 source-code current-module)) + (if (!digit? char/1) + (parse-rev source-code//size offset/0 where (!inc offset/1) source-code) + (!parse-short-name current-module [where offset/1 source-code] where #.Identifier)))) + + [(~~ (static ..positive-sign)) + (~~ (static ..negative-sign))] + (!parse-signed source-code//size offset/0 where source-code aliases + (!end-of-file where offset/0 source-code current-module))] + + ## else + (if (!digit? char/0) + ## Natural number + (parse-nat source-code//size offset/0 where (!inc offset/0) source-code) + ## Identifier + (!parse-full-name offset/0 where aliases #.Identifier)) + )))) + ))) + )) diff --git a/stdlib/source/program/compositor.lux b/stdlib/source/program/compositor.lux index 8324df002..4faa66057 100644 --- a/stdlib/source/program/compositor.lux +++ b/stdlib/source/program/compositor.lux @@ -35,8 +35,10 @@ ["." phase [extension (#+ Extender)]] [default - ["." platform (#+ Platform)] - ["." syntax]] + ["." platform (#+ Platform)]] + [language + [lux + ["." syntax]]] [meta ["." archive (#+ Archive)]]] ## ["." interpreter] -- cgit v1.2.3