From b5c854fb5ac1ead274f4ae0c657da66df957f14e Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Wed, 15 Nov 2017 20:57:47 -0400 Subject: - Moved "luxc/lang/syntax" to "lux/lang/syntax". - Minor refactoring. --- new-luxc/source/luxc/lang/analysis/common.lux | 2 +- new-luxc/source/luxc/lang/analysis/function.lux | 2 +- new-luxc/source/luxc/lang/analysis/inference.lux | 2 +- .../source/luxc/lang/analysis/procedure/common.lux | 8 +- .../luxc/lang/analysis/procedure/host.jvm.lux | 14 +- new-luxc/source/luxc/lang/analysis/structure.lux | 28 +- new-luxc/source/luxc/lang/analysis/type.lux | 4 +- new-luxc/source/luxc/lang/eval.lux | 2 +- new-luxc/source/luxc/lang/syntax.lux | 623 --------------------- new-luxc/source/luxc/lang/translation.lux | 12 +- 10 files changed, 37 insertions(+), 660 deletions(-) delete mode 100644 new-luxc/source/luxc/lang/syntax.lux (limited to 'new-luxc/source/luxc/lang') diff --git a/new-luxc/source/luxc/lang/analysis/common.lux b/new-luxc/source/luxc/lang/analysis/common.lux index 2f3e3a37d..95355d62f 100644 --- a/new-luxc/source/luxc/lang/analysis/common.lux +++ b/new-luxc/source/luxc/lang/analysis/common.lux @@ -14,7 +14,7 @@ (All [a] (-> (Meta a) (Meta [Type a]))) (do meta;Monad [[_ varT] (&;with-type-env tc;var) - analysis (&;with-expected-type varT + analysis (&;with-type varT action) knownT (&;with-type-env (tc;clean varT))] (wrap [knownT analysis]))) diff --git a/new-luxc/source/luxc/lang/analysis/function.lux b/new-luxc/source/luxc/lang/analysis/function.lux index 5403026cb..611e5c8a4 100644 --- a/new-luxc/source/luxc/lang/analysis/function.lux +++ b/new-luxc/source/luxc/lang/analysis/function.lux @@ -77,7 +77,7 @@ ## also to themselves, through a local variable. (&scope;with-local [func-name expectedT]) (&scope;with-local [arg-name inputT]) - (&;with-expected-type outputT) + (&;with-type outputT) (analyse body)) _ diff --git a/new-luxc/source/luxc/lang/analysis/inference.lux b/new-luxc/source/luxc/lang/analysis/inference.lux index 080a6c620..cccaa774a 100644 --- a/new-luxc/source/luxc/lang/analysis/inference.lux +++ b/new-luxc/source/luxc/lang/analysis/inference.lux @@ -113,7 +113,7 @@ (function [_] (Cannot-Infer-Argument (format "Inferred Type: " (%type inputT) "\n" " Argument: " (%code argC)))) - (&;with-expected-type inputT + (&;with-type inputT (analyse argC)))] (wrap [outputT' (list& argA args'A)])) diff --git a/new-luxc/source/luxc/lang/analysis/procedure/common.lux b/new-luxc/source/luxc/lang/analysis/procedure/common.lux index be77e643c..a643cb76a 100644 --- a/new-luxc/source/luxc/lang/analysis/procedure/common.lux +++ b/new-luxc/source/luxc/lang/analysis/procedure/common.lux @@ -56,7 +56,7 @@ [_ (&;infer outputT) argsA (monad;map @ (function [[argT argC]] - (&;with-expected-type argT + (&;with-type argT (analyse argC))) (list;zip2 inputsT+ args))] (wrap (la;procedure proc argsA))) @@ -98,7 +98,7 @@ (do meta;Monad [[var-id varT] (&;with-type-env tc;var) _ (&;infer (type (Either Text varT))) - opA (&;with-expected-type (type (io;IO varT)) + opA (&;with-type (type (io;IO varT)) (analyse opC))] (wrap (la;procedure proc (list opA)))) @@ -148,7 +148,7 @@ (^ (list valueC)) (do meta;Monad [_ (&;infer (type Type)) - valueA (&;with-expected-type Type + valueA (&;with-type Type (analyse valueC))] (wrap valueA)) @@ -342,7 +342,7 @@ (do meta;Monad [[var-id varT] (&;with-type-env tc;var) _ (&;infer (type (Atom varT))) - initA (&;with-expected-type varT + initA (&;with-type varT (analyse initC))] (wrap (la;procedure proc (list initA)))) diff --git a/new-luxc/source/luxc/lang/analysis/procedure/host.jvm.lux b/new-luxc/source/luxc/lang/analysis/procedure/host.jvm.lux index c6a456441..8f5382d2b 100644 --- a/new-luxc/source/luxc/lang/analysis/procedure/host.jvm.lux +++ b/new-luxc/source/luxc/lang/analysis/procedure/host.jvm.lux @@ -197,7 +197,7 @@ (do meta;Monad [_ (&;infer Nat) [var-id varT] (&;with-type-env tc;var) - arrayA (&;with-expected-type (type (Array varT)) + arrayA (&;with-type (type (Array varT)) (analyse arrayC))] (wrap (la;procedure proc (list arrayA)))) @@ -210,7 +210,7 @@ (case args (^ (list lengthC)) (do meta;Monad - [lengthA (&;with-expected-type Nat + [lengthA (&;with-type Nat (analyse lengthC)) expectedT meta;expected-type [level elem-class] (: (Meta [Nat Text]) @@ -303,12 +303,12 @@ (do meta;Monad [[var-id varT] (&;with-type-env tc;var) _ (&;infer varT) - arrayA (&;with-expected-type (type (Array varT)) + arrayA (&;with-type (type (Array varT)) (analyse arrayC)) ?elemT (&;with-type-env (tc;read var-id)) [elemT elem-class] (box-array-element-type (maybe;default varT ?elemT)) - idxA (&;with-expected-type Nat + idxA (&;with-type Nat (analyse idxC))] (wrap (la;procedure proc (list (code;text elem-class) idxA arrayA)))) @@ -323,14 +323,14 @@ (do meta;Monad [[var-id varT] (&;with-type-env tc;var) _ (&;infer (type (Array varT))) - arrayA (&;with-expected-type (type (Array varT)) + arrayA (&;with-type (type (Array varT)) (analyse arrayC)) ?elemT (&;with-type-env (tc;read var-id)) [valueT elem-class] (box-array-element-type (maybe;default varT ?elemT)) - idxA (&;with-expected-type Nat + idxA (&;with-type Nat (analyse idxC)) - valueA (&;with-expected-type valueT + valueA (&;with-type valueT (analyse valueC))] (wrap (la;procedure proc (list (code;text elem-class) idxA valueA arrayA)))) diff --git a/new-luxc/source/luxc/lang/analysis/structure.lux b/new-luxc/source/luxc/lang/analysis/structure.lux index 3048d4a4e..76b5a3a42 100644 --- a/new-luxc/source/luxc/lang/analysis/structure.lux +++ b/new-luxc/source/luxc/lang/analysis/structure.lux @@ -45,7 +45,7 @@ (case (list;nth tag flat) (#;Some variant-type) (do @ - [valueA (&;with-expected-type variant-type + [valueA (&;with-type variant-type (analyse valueC)) temp &scope;next-local] (wrap (la;sum tag type-size temp valueA))) @@ -54,7 +54,7 @@ (&common;variant-out-of-bounds-error expectedT type-size tag))) (#;Named name unnamedT) - (&;with-expected-type unnamedT + (&;with-type unnamedT (analyse-sum analyse tag valueC)) (#;Var id) @@ -63,7 +63,7 @@ (tc;read id))] (case ?expectedT' (#;Some expectedT') - (&;with-expected-type expectedT' + (&;with-type expectedT' (analyse-sum analyse tag valueC)) _ @@ -79,7 +79,7 @@ ( _) (do @ [[instance-id instanceT] (&;with-type-env )] - (&;with-expected-type (maybe;assume (type;apply (list instanceT) expectedT)) + (&;with-type (maybe;assume (type;apply (list instanceT) expectedT)) (analyse-sum analyse tag valueC)))) ([#;UnivQ tc;existential] [#;ExQ tc;var]) @@ -91,7 +91,7 @@ [?funT' (&;with-type-env (tc;read funT-id))] (case ?funT' (#;Some funT') - (&;with-expected-type (#;Apply inputT funT') + (&;with-type (#;Apply inputT funT') (analyse-sum analyse tag valueC)) _ @@ -105,7 +105,7 @@ (&;throw Not-Quantified-Type (%type funT)) (#;Some outputT) - (&;with-expected-type outputT + (&;with-type outputT (analyse-sum analyse tag valueC)))) _ @@ -123,14 +123,14 @@ ## If the tuple runs out, whatever expression is the last gets ## matched to the remaining type. [tailT (#;Cons tailC #;Nil)] - (&;with-expected-type tailT + (&;with-type tailT (analyse tailC)) ## If the type and the code are still ongoing, match each ## sub-expression to its corresponding type. [(#;Product leftT rightT) (#;Cons leftC rightC)] (do @ - [leftA (&;with-expected-type leftT + [leftA (&;with-type leftT (analyse leftC)) rightA (recur rightT rightC)] (wrap (` [(~ leftA) (~ rightA)]))) @@ -155,7 +155,7 @@ [tailT tailC] (do @ [g!tail (meta;gensym "tail")] - (&;with-expected-type tailT + (&;with-type tailT (analyse (` ("lux case" [(~@ tailC)] (~ g!tail) (~ g!tail)))))) @@ -173,7 +173,7 @@ (analyse-typed-product analyse membersC) (#;Named name unnamedT) - (&;with-expected-type unnamedT + (&;with-type unnamedT (analyse-product analyse membersC)) (#;Var id) @@ -182,7 +182,7 @@ (tc;read id))] (case ?expectedT' (#;Some expectedT') - (&;with-expected-type expectedT' + (&;with-type expectedT' (analyse-product analyse membersC)) _ @@ -199,7 +199,7 @@ ( _) (do @ [[instance-id instanceT] (&;with-type-env )] - (&;with-expected-type (maybe;assume (type;apply (list instanceT) expectedT)) + (&;with-type (maybe;assume (type;apply (list instanceT) expectedT)) (analyse-product analyse membersC)))) ([#;UnivQ tc;existential] [#;ExQ tc;var]) @@ -211,7 +211,7 @@ [?funT' (&;with-type-env (tc;read funT-id))] (case ?funT' (#;Some funT') - (&;with-expected-type (#;Apply inputT funT') + (&;with-type (#;Apply inputT funT') (analyse-product analyse membersC)) _ @@ -224,7 +224,7 @@ (&;throw Not-Quantified-Type (%type funT)) (#;Some outputT) - (&;with-expected-type outputT + (&;with-type outputT (analyse-product analyse membersC)))) _ diff --git a/new-luxc/source/luxc/lang/analysis/type.lux b/new-luxc/source/luxc/lang/analysis/type.lux index 4184dd0c0..eb3de08de 100644 --- a/new-luxc/source/luxc/lang/analysis/type.lux +++ b/new-luxc/source/luxc/lang/analysis/type.lux @@ -15,7 +15,7 @@ [actualT (eval Type type) #let [actualT (:! Type actualT)] _ (&;infer actualT)] - (&;with-expected-type actualT + (&;with-type actualT (analyse value)))) (def: #export (analyse-coerce analyse eval type value) @@ -23,5 +23,5 @@ (do meta;Monad [actualT (eval Type type) _ (&;infer (:! Type actualT))] - (&;with-expected-type Top + (&;with-type Top (analyse value)))) diff --git a/new-luxc/source/luxc/lang/eval.lux b/new-luxc/source/luxc/lang/eval.lux index 20c3acaeb..265320dbe 100644 --- a/new-luxc/source/luxc/lang/eval.lux +++ b/new-luxc/source/luxc/lang/eval.lux @@ -11,7 +11,7 @@ (def: #export (eval type exprC) &;Eval (do meta;Monad - [exprA (&;with-expected-type type + [exprA (&;with-type type (expressionA;analyser eval exprC)) #let [exprS (expressionS;synthesize exprA)] exprI (expressionT;translate exprS)] diff --git a/new-luxc/source/luxc/lang/syntax.lux b/new-luxc/source/luxc/lang/syntax.lux deleted file mode 100644 index 9fe4939a2..000000000 --- a/new-luxc/source/luxc/lang/syntax.lux +++ /dev/null @@ -1,623 +0,0 @@ -## This is the LuxC's parser. -## It takes the source code of a Lux file in raw text form and -## extracts the syntactic structure of the code from it. -## It only produces Lux Code nodes, and thus removes any white-space -## and comments while processing its inputs. - -## Another important aspect of the parser is that it keeps track of -## its position within the input data. -## That is, the parser takes into account the line and column -## information in the input text (it doesn't really touch the -## file-name aspect of the cursor, leaving it intact in whatever -## base-line cursor it is given). - -## This particular piece of functionality is not located in one -## function, but it is instead scattered throughout several parsers, -## since the logic for how to update the cursor varies, depending on -## what is being parsed, and the rules involved. - -## You will notice that several parsers have a "where" parameter, that -## tells them the cursor position prior to the parser being run. -## They are supposed to produce some parsed output, alongside an -## updated cursor pointing to the end position, after the parser was run. - -## Lux Code nodes/tokens are annotated with cursor meta-data -## (file-name, line, column) to keep track of their provenance and -## location, which is helpful for documentation and debugging. -(;module: - lux - (lux (control monad - ["p" parser "p/" Monad] - ["ex" exception #+ exception:]) - (data [bool] - [text] - ["e" error] - [number] - [product] - [maybe] - (text ["l" lexer] - format) - (coll [sequence #+ Sequence])))) - -(def: white-space Text "\t\v \r\f") -(def: new-line Text "\n") - -## This is the parser for white-space. -## Whenever a new-line is encountered, the column gets reset to 0, and -## the line gets incremented. -## It operates recursively in order to produce the longest continuous -## chunk of white-space. -(def: (space^ where) - (-> Cursor (l;Lexer [Cursor Text])) - (p;either (do p;Monad - [content (l;many (l;one-of white-space))] - (wrap [(update@ #;column (n.+ (text;size content)) where) - content])) - ## New-lines must be handled as a separate case to ensure line - ## information is handled properly. - (do p;Monad - [content (l;many (l;one-of new-line))] - (wrap [(|> where - (update@ #;line (n.+ (text;size content))) - (set@ #;column +0)) - content])) - )) - -## Single-line comments can start anywhere, but only go up to the -## next new-line. -(def: (single-line-comment^ where) - (-> Cursor (l;Lexer [Cursor Text])) - (do p;Monad - [_ (l;this "##") - comment (l;some (l;none-of new-line)) - _ (l;this new-line)] - (wrap [(|> where - (update@ #;line n.inc) - (set@ #;column +0)) - comment]))) - -## This is just a helper parser to find text which doesn't run into -## any special character sequences for multi-line comments. -(def: comment-bound^ - (l;Lexer Unit) - ($_ p;either - (l;this new-line) - (l;this ")#") - (l;this "#("))) - -## Multi-line comments are bounded by #( these delimiters, #(and, they may -## also be nested)# )#. -## Multi-line comment syntax must be balanced. -## That is, any nested comment must have matched delimiters. -## Unbalanced comments ought to be rejected as invalid code. -(def: (multi-line-comment^ where) - (-> Cursor (l;Lexer [Cursor Text])) - (do p;Monad - [_ (l;this "#(")] - (loop [comment "" - where (update@ #;column (n.+ +2) where)] - ($_ p;either - ## These are normal chunks of commented text. - (do @ - [chunk (l;many (l;not comment-bound^))] - (recur (format comment chunk) - (|> where - (update@ #;column (n.+ (text;size chunk)))))) - ## This is a special rule to handle new-lines within - ## comments properly. - (do @ - [_ (l;this new-line)] - (recur (format comment new-line) - (|> where - (update@ #;line n.inc) - (set@ #;column +0)))) - ## This is the rule for handling nested sub-comments. - ## Ultimately, the whole comment is just treated as text - ## (the comment must respect the syntax structure, but the - ## output produced is just a block of text). - ## That is why the sub-comment is covered in delimiters - ## and then appended to the rest of the comment text. - (do @ - [[sub-where sub-comment] (multi-line-comment^ where)] - (recur (format comment "#(" sub-comment ")#") - sub-where)) - ## Finally, this is the rule for closing the comment. - (do @ - [_ (l;this ")#")] - (wrap [(update@ #;column (n.+ +2) where) - comment])) - )))) - -## This is the only parser that should be used directly by other -## parsers, since all comments must be treated as either being -## single-line or multi-line. -## That is, there is no syntactic rule prohibiting one type of comment -## from being used in any situation (alternatively, forcing one type -## of comment to be the only usable one). -(def: (comment^ where) - (-> Cursor (l;Lexer [Cursor Text])) - (p;either (single-line-comment^ where) - (multi-line-comment^ where))) - -## To simplify parsing, I remove any left-padding that an Code token -## may have prior to parsing the token itself. -## Left-padding is assumed to be either white-space or a comment. -## The cursor gets updated, but the padding gets ignored. -(def: (left-padding^ where) - (-> Cursor (l;Lexer Cursor)) - ($_ p;either - (do p;Monad - [[where comment] (comment^ where)] - (left-padding^ where)) - (do p;Monad - [[where white-space] (space^ where)] - (left-padding^ where)) - (:: p;Monad wrap where))) - -## Escaped character sequences follow the usual syntax of -## back-slash followed by a letter (e.g. \n). -## Unicode escapes are possible, with hexadecimal sequences between 1 -## and 4 characters long (e.g. \u12aB). -## Escaped characters may show up in Char and Text literals. -(def: escaped-char^ - (l;Lexer [Nat Text]) - (p;after (l;this "\\") - (do p;Monad - [code l;any] - (case code - ## Handle special cases. - "t" (wrap [+2 "\t"]) - "v" (wrap [+2 "\v"]) - "b" (wrap [+2 "\b"]) - "n" (wrap [+2 "\n"]) - "r" (wrap [+2 "\r"]) - "f" (wrap [+2 "\f"]) - "\"" (wrap [+2 "\""]) - "\\" (wrap [+2 "\\"]) - - ## Handle unicode escapes. - "u" - (do p;Monad - [code (l;between +1 +4 l;hexadecimal)] - (wrap (case (|> code (format "+") (:: number;Hex@Codec decode)) - (#;Right value) - [(n.+ +2 (text;size code)) (text;from-code value)] - - _ - (undefined)))) - - _ - (p;fail (format "Invalid escaping syntax: " (%t code))))))) - -## These are very simple parsers that just cut chunks of text in -## specific shapes and then use decoders already present in the -## standard library to actually produce the values from the literals. -(def: rich-digit - (l;Lexer Text) - (p;either l;decimal - (p;after (l;this "_") (p/wrap "")))) - -(def: rich-digits^ - (l;Lexer Text) - (l;seq l;decimal - (l;some rich-digit))) - -(def: (marker^ token) - (-> Text (l;Lexer Text)) - (p;after (l;this token) (p/wrap token))) - -(do-template [ ] - [(def: #export ( where) - (-> Cursor (l;Lexer [Cursor Code])) - (do p;Monad - [chunk ] - (case (:: decode chunk) - (#;Left error) - (p;fail error) - - (#;Right value) - (wrap [(update@ #;column (n.+ (text;size chunk)) where) - [where ( value)]]))))] - - [bool #;Bool - (p;either (marker^ "true") (marker^ "false")) - bool;Codec] - - [int #;Int - (l;seq (p;default "" (l;one-of "-")) - rich-digits^) - number;Codec] - - [deg #;Deg - (l;seq (l;one-of ".") - rich-digits^) - number;Codec] - ) - -(def: (nat-char where) - (-> Cursor (l;Lexer [Cursor Code])) - (do p;Monad - [_ (l;this "#\"") - [where' char] (: (l;Lexer [Cursor Text]) - ($_ p;either - ## Normal text characters. - (do @ - [normal (l;none-of "\\\"\n")] - (wrap [(|> where - (update@ #;column n.inc)) - normal])) - ## Must handle escaped - ## chars separately. - (do @ - [[chars-consumed char] escaped-char^] - (wrap [(|> where - (update@ #;column (n.+ chars-consumed))) - char])))) - _ (l;this "\"") - #let [char (maybe;assume (text;nth +0 char))]] - (wrap [(|> where' - (update@ #;column n.inc)) - [where (#;Nat char)]]))) - -(def: (normal-nat where) - (-> Cursor (l;Lexer [Cursor Code])) - (do p;Monad - [chunk (l;seq (l;one-of "+") - rich-digits^)] - (case (:: number;Codec decode chunk) - (#;Left error) - (p;fail error) - - (#;Right value) - (wrap [(update@ #;column (n.+ (text;size chunk)) where) - [where (#;Nat value)]])))) - -(def: #export (nat where) - (-> Cursor (l;Lexer [Cursor Code])) - (p;either (normal-nat where) - (nat-char where))) - -(def: (normal-frac where) - (-> Cursor (l;Lexer [Cursor Code])) - (do p;Monad - [chunk ($_ l;seq - (p;default "" (l;one-of "-")) - rich-digits^ - (l;one-of ".") - rich-digits^ - (p;default "" - ($_ l;seq - (l;one-of "eE") - (p;default "" (l;one-of "+-")) - rich-digits^)))] - (case (:: number;Codec decode chunk) - (#;Left error) - (p;fail error) - - (#;Right value) - (wrap [(update@ #;column (n.+ (text;size chunk)) where) - [where (#;Frac value)]])))) - -(def: frac-ratio-fragment - (l;Lexer Frac) - (<| (p;codec number;Codec) - (:: p;Monad map (function [digits] - (format digits ".0"))) - rich-digits^)) - -(def: (ratio-frac where) - (-> Cursor (l;Lexer [Cursor Code])) - (do p;Monad - [chunk ($_ l;seq - (p;default "" (l;one-of "-")) - rich-digits^ - (l;one-of "/") - rich-digits^) - value (l;local chunk - (do @ - [signed? (l;this? "-") - numerator frac-ratio-fragment - _ (l;this? "/") - denominator frac-ratio-fragment - _ (p;assert "Denominator cannot be 0." - (not (f.= 0.0 denominator)))] - (wrap (|> numerator - (f.* (if signed? -1.0 1.0)) - (f./ denominator)))))] - (wrap [(update@ #;column (n.+ (text;size chunk)) where) - [where (#;Frac value)]]))) - -(def: #export (frac where) - (-> Cursor (l;Lexer [Cursor Code])) - (p;either (normal-frac where) - (ratio-frac where))) - -## This parser looks so complex because text in Lux can be multi-line -## and there are rules regarding how this is handled. -(def: #export (text where) - (-> Cursor (l;Lexer [Cursor Code])) - (do p;Monad - [## Lux text "is delimited by double-quotes", as usual in most - ## programming languages. - _ (l;this "\"") - ## I must know what column the text body starts at (which is - ## always 1 column after the left-delimiting quote). - ## This is important because, when procesing subsequent lines, - ## they must all start at the same column, being left-padded with - ## as many spaces as necessary to be column-aligned. - ## This helps ensure that the formatting on the text in the - ## source-code matches the formatting of the Text value. - #let [offset-column (n.inc (get@ #;column where))] - [where' text-read] (: (l;Lexer [Cursor Text]) - ## I must keep track of how much of the - ## text body has been read, how far the - ## cursor has progressed, and whether I'm - ## processing a subsequent line, or just - ## processing normal text body. - (loop [text-read "" - where (|> where - (update@ #;column n.inc)) - must-have-offset? false] - (p;either (if must-have-offset? - ## If I'm at the start of a - ## new line, I must ensure the - ## space-offset is at least - ## as great as the column of - ## the text's body's column, - ## to ensure they are aligned. - (do @ - [offset (l;many (l;one-of " ")) - #let [offset-size (text;size offset)]] - (if (n.>= offset-column offset-size) - ## Any extra offset - ## becomes part of the - ## text's body. - (recur (|> offset - (text;split offset-column) - (maybe;default (undefined)) - product;right - (format text-read)) - (|> where - (update@ #;column (n.+ offset-size))) - false) - (p;fail (format "Each line of a multi-line text must have an appropriate offset!\n" - "Expected: " (%i (nat-to-int offset-column)) " columns.\n" - " Actual: " (%i (nat-to-int offset-size)) " columns.\n")))) - ($_ p;either - ## Normal text characters. - (do @ - [normal (l;many (l;none-of "\\\"\n"))] - (recur (format text-read normal) - (|> where - (update@ #;column (n.+ (text;size normal)))) - false)) - ## Must handle escaped - ## chars separately. - (do @ - [[chars-consumed char] escaped-char^] - (recur (format text-read char) - (|> where - (update@ #;column (n.+ chars-consumed))) - false)) - ## The text ends when it - ## reaches the right-delimiter. - (do @ - [_ (l;this "\"")] - (wrap [(update@ #;column n.inc where) - text-read])))) - ## If a new-line is - ## encountered, it gets - ## appended to the value and - ## the loop is alerted that the - ## next line must have an offset. - (do @ - [_ (l;this new-line)] - (recur (format text-read new-line) - (|> where - (update@ #;line n.inc) - (set@ #;column +0)) - true)))))] - (wrap [where' - [where (#;Text text-read)]]))) - -## Form and tuple syntax is mostly the same, differing only in the -## delimiters involved. -## They may have an arbitrary number of arbitrary Code nodes as elements. -(do-template [ ] - [(def: ( where ast) - (-> Cursor - (-> Cursor (l;Lexer [Cursor Code])) - (l;Lexer [Cursor Code])) - (do p;Monad - [_ (l;this ) - [where' elems] (loop [elems (: (Sequence Code) - sequence;empty) - where where] - (p;either (do @ - [## Must update the cursor as I - ## go along, to keep things accurate. - [where' elem] (ast where)] - (recur (sequence;add elem elems) - where')) - (do @ - [## Must take into account any - ## padding present before the - ## end-delimiter. - where' (left-padding^ where) - _ (l;this )] - (wrap [(update@ #;column n.inc where') - (sequence;to-list elems)]))))] - (wrap [where' - [where ( elems)]])))] - - [form #;Form "(" ")"] - [tuple #;Tuple "[" "]"] - ) - -## Records are almost (syntactically) the same as forms and tuples, -## with the exception that their elements must come in pairs (as in -## key-value pairs). -## Semantically, though, records and tuples are just 2 different -## representations for the same thing (a tuple). -## In normal Lux syntax, the key position in the pair will be a tag -## Code node, however, record Code nodes allow any Code node to occupy -## this position, since it may be useful when processing Code syntax in -## macros. -(def: (record where ast) - (-> Cursor - (-> Cursor (l;Lexer [Cursor Code])) - (l;Lexer [Cursor Code])) - (do p;Monad - [_ (l;this "{") - [where' elems] (loop [elems (: (Sequence [Code Code]) - sequence;empty) - where where] - (p;either (do @ - [[where' key] (ast where) - [where' val] (ast where')] - (recur (sequence;add [key val] elems) - where')) - (do @ - [where' (left-padding^ where) - _ (l;this "}")] - (wrap [(update@ #;column n.inc where') - (sequence;to-list elems)]))))] - (wrap [where' - [where (#;Record elems)]]))) - -## The parts of an identifier are separated by a single mark. -## E.g. module;name. -## Only one such mark may be used in an identifier, since there -## can only be 2 parts to an identifier (the module [before the -## mark], and the name [after the mark]). -## There are also some extra rules regarding identifier syntax, -## encoded on the parser. -(def: identifier-separator Text ";") - -## A Lux identifier is a pair of chunks of text, where the first-part -## refers to the module that gives context to the identifier, and the -## second part corresponds to the name of the identifier itself. -## The module part may be absent (by being the empty text ""), but the -## name part must always be present. -## The rules for which characters you may use are specified in terms -## of which characters you must avoid (to keep things as open-ended as -## possible). -## In particular, no white-space can be used, and neither can other -## characters which are already used by Lux as delimiters for other -## Code nodes (thereby reducing ambiguity while parsing). -## Additionally, the first character in an identifier's part cannot be -## a digit, to avoid confusion with regards to numbers. -(def: ident-part^ - (l;Lexer Text) - (do p;Monad - [#let [digits "0123456789" - delimiters (format "()[]{}#\"" identifier-separator) - space (format white-space new-line) - head-lexer (l;none-of (format digits delimiters space)) - tail-lexer (l;some (l;none-of (format delimiters space)))] - head head-lexer - tail tail-lexer] - (wrap (format head tail)))) - -(def: current-module-mark Text (format identifier-separator identifier-separator)) - -(def: (ident^ current-module) - (-> Text (l;Lexer [Ident Nat])) - ($_ p;either - ## When an identifier starts with 2 marks, its module is - ## taken to be the current-module being compiled at the moment. - ## This can be useful when mentioning identifiers and tags - ## inside quoted/templated code in macros. - (do p;Monad - [_ (l;this current-module-mark) - def-name ident-part^] - (wrap [[current-module def-name] - (n.+ +2 (text;size def-name))])) - ## If the identifier is prefixed by the mark, but no module - ## part, the module is assumed to be "lux" (otherwise known as - ## the 'prelude'). - ## This makes it easy to refer to definitions in that module, - ## since it is the most fundamental module in the entire - ## standard library. - (do p;Monad - [_ (l;this identifier-separator) - def-name ident-part^] - (wrap [["lux" def-name] - (n.inc (text;size def-name))])) - ## Not all identifiers must be specified with a module part. - ## If that part is not provided, the identifier will be created - ## with the empty "" text as the module. - ## During program analysis, such identifiers tend to be treated - ## as if their context is the current-module, but this only - ## applies to identifiers for tags and module definitions. - ## Function arguments and local-variables may not be referred-to - ## using identifiers with module parts, so being able to specify - ## identifiers with empty modules helps with those use-cases. - (do p;Monad - [first-part ident-part^] - (p;either (do @ - [_ (l;this identifier-separator) - second-part ident-part^] - (wrap [[first-part second-part] - ($_ n.+ - (text;size first-part) - +1 - (text;size second-part))])) - (wrap [["" first-part] - (text;size first-part)]))))) - -## The only (syntactic) difference between a symbol and a tag (both -## being identifiers), is that tags must be prefixed with a hash-sign -## (i.e. #). -## Semantically, though, they are very different, with symbols being -## used to refer to module definitions and local variables, while tags -## provide the compiler with information related to data-structure -## construction and de-structuring (during pattern-matching). -(do-template [ ] - [(def: #export ( current-module where) - (-> Text Cursor (l;Lexer [Cursor Code])) - (do p;Monad - [[value length] ] - (wrap [(update@ #;column (|>. ($_ n.+ length)) where) - [where ( value)]])))] - - [symbol #;Symbol (ident^ current-module) +0] - [tag #;Tag (p;after (l;this "#") (ident^ current-module)) +1] - ) - -(exception: #export End-Of-File) -(exception: #export Unrecognized-Input) - -(def: (ast current-module) - (-> Text Cursor (l;Lexer [Cursor Code])) - (: (-> Cursor (l;Lexer [Cursor Code])) - (function ast' [where] - (do p;Monad - [where (left-padding^ where)] - ($_ p;either - (form where ast') - (tuple where ast') - (record where ast') - (bool where) - (nat where) - (frac where) - (int where) - (deg where) - (symbol current-module where) - (tag current-module where) - (text where) - (do @ - [end? l;end?] - (if end? - (p;fail (End-Of-File current-module)) - (p;fail (Unrecognized-Input current-module)))) - ))))) - -(def: #export (parse current-module [where offset source]) - (-> Text Source (e;Error [Source Code])) - (case (p;run [offset source] (ast current-module where)) - (#e;Error error) - (#e;Error error) - - (#e;Success [[offset' remaining] [where' output]]) - (#e;Success [[where' offset' remaining] output]))) diff --git a/new-luxc/source/luxc/lang/translation.lux b/new-luxc/source/luxc/lang/translation.lux index e573aa3ae..88fc25d3a 100644 --- a/new-luxc/source/luxc/lang/translation.lux +++ b/new-luxc/source/luxc/lang/translation.lux @@ -9,6 +9,7 @@ (coll [dict])) [meta] (meta (type ["tc" check])) + (lang [syntax]) [host] [io] (world [file #+ File])) @@ -18,7 +19,6 @@ [";L" host] (host [";H" macro] ["$" jvm]) - ["&;" syntax] (analysis [";A" expression] [";A" common]) (synthesis [";S" expression]) @@ -41,7 +41,7 @@ (-> Code (Meta [$;Inst Code])) (do meta;Monad [[_ annsA] (&;with-scope - (&;with-expected-type Code + (&;with-type Code (analyse annsC))) annsI (expressionT;translate (expressionS;synthesize annsA)) annsV (evalT;eval annsI)] @@ -58,7 +58,7 @@ [_ valueT valueA] (&;with-scope (if (meta;type? (:! Code annsV)) (do @ - [valueA (&;with-expected-type Type + [valueA (&;with-type Type (analyse valueC))] (wrap [Type valueA])) (commonA;with-unknown-type @@ -73,7 +73,7 @@ (^code ("lux program" (~ [_ (#;Symbol ["" program-args])]) (~ programC))) (do meta;Monad [[_ programA] (&;with-scope - (&;with-expected-type (type (io;IO Unit)) + (&;with-type (type (io;IO Unit)) (analyse programC))) programI (expressionT;translate (expressionS;synthesize programA))] (statementT;translate-program program-args programI)) @@ -111,7 +111,7 @@ ((exhaust action) compiler') (#e;Error error) - (if (ex;match? &syntax;End-Of-File error) + (if (ex;match? syntax;End-Of-File error) (#e;Success [compiler []]) (#e;Error error))))) @@ -129,7 +129,7 @@ (def: (parse current-module) (-> Text (Meta Code)) (function [compiler] - (case (&syntax;parse current-module (get@ #;source compiler)) + (case (syntax;parse current-module (get@ #;source compiler)) (#e;Error error) (#e;Error error) -- cgit v1.2.3