From b5c854fb5ac1ead274f4ae0c657da66df957f14e Mon Sep 17 00:00:00 2001
From: Eduardo Julian
Date: Wed, 15 Nov 2017 20:57:47 -0400
Subject: - Moved "luxc/lang/syntax" to "lux/lang/syntax". - Minor refactoring.
---
new-luxc/source/luxc/lang.lux | 2 +-
new-luxc/source/luxc/lang/analysis/common.lux | 2 +-
new-luxc/source/luxc/lang/analysis/function.lux | 2 +-
new-luxc/source/luxc/lang/analysis/inference.lux | 2 +-
.../source/luxc/lang/analysis/procedure/common.lux | 8 +-
.../luxc/lang/analysis/procedure/host.jvm.lux | 14 +-
new-luxc/source/luxc/lang/analysis/structure.lux | 28 +-
new-luxc/source/luxc/lang/analysis/type.lux | 4 +-
new-luxc/source/luxc/lang/eval.lux | 2 +-
new-luxc/source/luxc/lang/syntax.lux | 623 ---------------------
new-luxc/source/luxc/lang/translation.lux | 12 +-
new-luxc/test/test/luxc/lang/analysis/case.lux | 10 +-
new-luxc/test/test/luxc/lang/analysis/function.lux | 8 +-
.../test/luxc/lang/analysis/procedure/common.lux | 14 +-
.../test/luxc/lang/analysis/procedure/host.jvm.lux | 2 +-
.../test/test/luxc/lang/analysis/structure.lux | 24 +-
new-luxc/test/test/luxc/lang/syntax.lux | 233 --------
new-luxc/test/tests.lux | 3 +-
stdlib/source/lux/lang/syntax.lux | 623 +++++++++++++++++++++
stdlib/test/test/lux/lang/syntax.lux | 233 ++++++++
stdlib/test/tests.lux | 1 +
21 files changed, 925 insertions(+), 925 deletions(-)
delete mode 100644 new-luxc/source/luxc/lang/syntax.lux
delete mode 100644 new-luxc/test/test/luxc/lang/syntax.lux
create mode 100644 stdlib/source/lux/lang/syntax.lux
create mode 100644 stdlib/test/test/lux/lang/syntax.lux
diff --git a/new-luxc/source/luxc/lang.lux b/new-luxc/source/luxc/lang.lux
index 373c6b12b..844cc6755 100644
--- a/new-luxc/source/luxc/lang.lux
+++ b/new-luxc/source/luxc/lang.lux
@@ -40,7 +40,7 @@
(:: meta;Monad (~' wrap) [])
(;;throw (~ exception) (~ message)))))))
-(def: #export (with-expected-type expected action)
+(def: #export (with-type expected action)
(All [a] (-> Type (Meta a) (Meta a)))
(function [compiler]
(case (action (set@ #;expected (#;Some expected) compiler))
diff --git a/new-luxc/source/luxc/lang/analysis/common.lux b/new-luxc/source/luxc/lang/analysis/common.lux
index 2f3e3a37d..95355d62f 100644
--- a/new-luxc/source/luxc/lang/analysis/common.lux
+++ b/new-luxc/source/luxc/lang/analysis/common.lux
@@ -14,7 +14,7 @@
(All [a] (-> (Meta a) (Meta [Type a])))
(do meta;Monad
[[_ varT] (&;with-type-env tc;var)
- analysis (&;with-expected-type varT
+ analysis (&;with-type varT
action)
knownT (&;with-type-env (tc;clean varT))]
(wrap [knownT analysis])))
diff --git a/new-luxc/source/luxc/lang/analysis/function.lux b/new-luxc/source/luxc/lang/analysis/function.lux
index 5403026cb..611e5c8a4 100644
--- a/new-luxc/source/luxc/lang/analysis/function.lux
+++ b/new-luxc/source/luxc/lang/analysis/function.lux
@@ -77,7 +77,7 @@
## also to themselves, through a local variable.
(&scope;with-local [func-name expectedT])
(&scope;with-local [arg-name inputT])
- (&;with-expected-type outputT)
+ (&;with-type outputT)
(analyse body))
_
diff --git a/new-luxc/source/luxc/lang/analysis/inference.lux b/new-luxc/source/luxc/lang/analysis/inference.lux
index 080a6c620..cccaa774a 100644
--- a/new-luxc/source/luxc/lang/analysis/inference.lux
+++ b/new-luxc/source/luxc/lang/analysis/inference.lux
@@ -113,7 +113,7 @@
(function [_] (Cannot-Infer-Argument
(format "Inferred Type: " (%type inputT) "\n"
" Argument: " (%code argC))))
- (&;with-expected-type inputT
+ (&;with-type inputT
(analyse argC)))]
(wrap [outputT' (list& argA args'A)]))
diff --git a/new-luxc/source/luxc/lang/analysis/procedure/common.lux b/new-luxc/source/luxc/lang/analysis/procedure/common.lux
index be77e643c..a643cb76a 100644
--- a/new-luxc/source/luxc/lang/analysis/procedure/common.lux
+++ b/new-luxc/source/luxc/lang/analysis/procedure/common.lux
@@ -56,7 +56,7 @@
[_ (&;infer outputT)
argsA (monad;map @
(function [[argT argC]]
- (&;with-expected-type argT
+ (&;with-type argT
(analyse argC)))
(list;zip2 inputsT+ args))]
(wrap (la;procedure proc argsA)))
@@ -98,7 +98,7 @@
(do meta;Monad
[[var-id varT] (&;with-type-env tc;var)
_ (&;infer (type (Either Text varT)))
- opA (&;with-expected-type (type (io;IO varT))
+ opA (&;with-type (type (io;IO varT))
(analyse opC))]
(wrap (la;procedure proc (list opA))))
@@ -148,7 +148,7 @@
(^ (list valueC))
(do meta;Monad
[_ (&;infer (type Type))
- valueA (&;with-expected-type Type
+ valueA (&;with-type Type
(analyse valueC))]
(wrap valueA))
@@ -342,7 +342,7 @@
(do meta;Monad
[[var-id varT] (&;with-type-env tc;var)
_ (&;infer (type (Atom varT)))
- initA (&;with-expected-type varT
+ initA (&;with-type varT
(analyse initC))]
(wrap (la;procedure proc (list initA))))
diff --git a/new-luxc/source/luxc/lang/analysis/procedure/host.jvm.lux b/new-luxc/source/luxc/lang/analysis/procedure/host.jvm.lux
index c6a456441..8f5382d2b 100644
--- a/new-luxc/source/luxc/lang/analysis/procedure/host.jvm.lux
+++ b/new-luxc/source/luxc/lang/analysis/procedure/host.jvm.lux
@@ -197,7 +197,7 @@
(do meta;Monad
[_ (&;infer Nat)
[var-id varT] (&;with-type-env tc;var)
- arrayA (&;with-expected-type (type (Array varT))
+ arrayA (&;with-type (type (Array varT))
(analyse arrayC))]
(wrap (la;procedure proc (list arrayA))))
@@ -210,7 +210,7 @@
(case args
(^ (list lengthC))
(do meta;Monad
- [lengthA (&;with-expected-type Nat
+ [lengthA (&;with-type Nat
(analyse lengthC))
expectedT meta;expected-type
[level elem-class] (: (Meta [Nat Text])
@@ -303,12 +303,12 @@
(do meta;Monad
[[var-id varT] (&;with-type-env tc;var)
_ (&;infer varT)
- arrayA (&;with-expected-type (type (Array varT))
+ arrayA (&;with-type (type (Array varT))
(analyse arrayC))
?elemT (&;with-type-env
(tc;read var-id))
[elemT elem-class] (box-array-element-type (maybe;default varT ?elemT))
- idxA (&;with-expected-type Nat
+ idxA (&;with-type Nat
(analyse idxC))]
(wrap (la;procedure proc (list (code;text elem-class) idxA arrayA))))
@@ -323,14 +323,14 @@
(do meta;Monad
[[var-id varT] (&;with-type-env tc;var)
_ (&;infer (type (Array varT)))
- arrayA (&;with-expected-type (type (Array varT))
+ arrayA (&;with-type (type (Array varT))
(analyse arrayC))
?elemT (&;with-type-env
(tc;read var-id))
[valueT elem-class] (box-array-element-type (maybe;default varT ?elemT))
- idxA (&;with-expected-type Nat
+ idxA (&;with-type Nat
(analyse idxC))
- valueA (&;with-expected-type valueT
+ valueA (&;with-type valueT
(analyse valueC))]
(wrap (la;procedure proc (list (code;text elem-class) idxA valueA arrayA))))
diff --git a/new-luxc/source/luxc/lang/analysis/structure.lux b/new-luxc/source/luxc/lang/analysis/structure.lux
index 3048d4a4e..76b5a3a42 100644
--- a/new-luxc/source/luxc/lang/analysis/structure.lux
+++ b/new-luxc/source/luxc/lang/analysis/structure.lux
@@ -45,7 +45,7 @@
(case (list;nth tag flat)
(#;Some variant-type)
(do @
- [valueA (&;with-expected-type variant-type
+ [valueA (&;with-type variant-type
(analyse valueC))
temp &scope;next-local]
(wrap (la;sum tag type-size temp valueA)))
@@ -54,7 +54,7 @@
(&common;variant-out-of-bounds-error expectedT type-size tag)))
(#;Named name unnamedT)
- (&;with-expected-type unnamedT
+ (&;with-type unnamedT
(analyse-sum analyse tag valueC))
(#;Var id)
@@ -63,7 +63,7 @@
(tc;read id))]
(case ?expectedT'
(#;Some expectedT')
- (&;with-expected-type expectedT'
+ (&;with-type expectedT'
(analyse-sum analyse tag valueC))
_
@@ -79,7 +79,7 @@
( _)
(do @
[[instance-id instanceT] (&;with-type-env )]
- (&;with-expected-type (maybe;assume (type;apply (list instanceT) expectedT))
+ (&;with-type (maybe;assume (type;apply (list instanceT) expectedT))
(analyse-sum analyse tag valueC))))
([#;UnivQ tc;existential]
[#;ExQ tc;var])
@@ -91,7 +91,7 @@
[?funT' (&;with-type-env (tc;read funT-id))]
(case ?funT'
(#;Some funT')
- (&;with-expected-type (#;Apply inputT funT')
+ (&;with-type (#;Apply inputT funT')
(analyse-sum analyse tag valueC))
_
@@ -105,7 +105,7 @@
(&;throw Not-Quantified-Type (%type funT))
(#;Some outputT)
- (&;with-expected-type outputT
+ (&;with-type outputT
(analyse-sum analyse tag valueC))))
_
@@ -123,14 +123,14 @@
## If the tuple runs out, whatever expression is the last gets
## matched to the remaining type.
[tailT (#;Cons tailC #;Nil)]
- (&;with-expected-type tailT
+ (&;with-type tailT
(analyse tailC))
## If the type and the code are still ongoing, match each
## sub-expression to its corresponding type.
[(#;Product leftT rightT) (#;Cons leftC rightC)]
(do @
- [leftA (&;with-expected-type leftT
+ [leftA (&;with-type leftT
(analyse leftC))
rightA (recur rightT rightC)]
(wrap (` [(~ leftA) (~ rightA)])))
@@ -155,7 +155,7 @@
[tailT tailC]
(do @
[g!tail (meta;gensym "tail")]
- (&;with-expected-type tailT
+ (&;with-type tailT
(analyse (` ("lux case" [(~@ tailC)]
(~ g!tail)
(~ g!tail))))))
@@ -173,7 +173,7 @@
(analyse-typed-product analyse membersC)
(#;Named name unnamedT)
- (&;with-expected-type unnamedT
+ (&;with-type unnamedT
(analyse-product analyse membersC))
(#;Var id)
@@ -182,7 +182,7 @@
(tc;read id))]
(case ?expectedT'
(#;Some expectedT')
- (&;with-expected-type expectedT'
+ (&;with-type expectedT'
(analyse-product analyse membersC))
_
@@ -199,7 +199,7 @@
( _)
(do @
[[instance-id instanceT] (&;with-type-env )]
- (&;with-expected-type (maybe;assume (type;apply (list instanceT) expectedT))
+ (&;with-type (maybe;assume (type;apply (list instanceT) expectedT))
(analyse-product analyse membersC))))
([#;UnivQ tc;existential]
[#;ExQ tc;var])
@@ -211,7 +211,7 @@
[?funT' (&;with-type-env (tc;read funT-id))]
(case ?funT'
(#;Some funT')
- (&;with-expected-type (#;Apply inputT funT')
+ (&;with-type (#;Apply inputT funT')
(analyse-product analyse membersC))
_
@@ -224,7 +224,7 @@
(&;throw Not-Quantified-Type (%type funT))
(#;Some outputT)
- (&;with-expected-type outputT
+ (&;with-type outputT
(analyse-product analyse membersC))))
_
diff --git a/new-luxc/source/luxc/lang/analysis/type.lux b/new-luxc/source/luxc/lang/analysis/type.lux
index 4184dd0c0..eb3de08de 100644
--- a/new-luxc/source/luxc/lang/analysis/type.lux
+++ b/new-luxc/source/luxc/lang/analysis/type.lux
@@ -15,7 +15,7 @@
[actualT (eval Type type)
#let [actualT (:! Type actualT)]
_ (&;infer actualT)]
- (&;with-expected-type actualT
+ (&;with-type actualT
(analyse value))))
(def: #export (analyse-coerce analyse eval type value)
@@ -23,5 +23,5 @@
(do meta;Monad
[actualT (eval Type type)
_ (&;infer (:! Type actualT))]
- (&;with-expected-type Top
+ (&;with-type Top
(analyse value))))
diff --git a/new-luxc/source/luxc/lang/eval.lux b/new-luxc/source/luxc/lang/eval.lux
index 20c3acaeb..265320dbe 100644
--- a/new-luxc/source/luxc/lang/eval.lux
+++ b/new-luxc/source/luxc/lang/eval.lux
@@ -11,7 +11,7 @@
(def: #export (eval type exprC)
&;Eval
(do meta;Monad
- [exprA (&;with-expected-type type
+ [exprA (&;with-type type
(expressionA;analyser eval exprC))
#let [exprS (expressionS;synthesize exprA)]
exprI (expressionT;translate exprS)]
diff --git a/new-luxc/source/luxc/lang/syntax.lux b/new-luxc/source/luxc/lang/syntax.lux
deleted file mode 100644
index 9fe4939a2..000000000
--- a/new-luxc/source/luxc/lang/syntax.lux
+++ /dev/null
@@ -1,623 +0,0 @@
-## This is the LuxC's parser.
-## It takes the source code of a Lux file in raw text form and
-## extracts the syntactic structure of the code from it.
-## It only produces Lux Code nodes, and thus removes any white-space
-## and comments while processing its inputs.
-
-## Another important aspect of the parser is that it keeps track of
-## its position within the input data.
-## That is, the parser takes into account the line and column
-## information in the input text (it doesn't really touch the
-## file-name aspect of the cursor, leaving it intact in whatever
-## base-line cursor it is given).
-
-## This particular piece of functionality is not located in one
-## function, but it is instead scattered throughout several parsers,
-## since the logic for how to update the cursor varies, depending on
-## what is being parsed, and the rules involved.
-
-## You will notice that several parsers have a "where" parameter, that
-## tells them the cursor position prior to the parser being run.
-## They are supposed to produce some parsed output, alongside an
-## updated cursor pointing to the end position, after the parser was run.
-
-## Lux Code nodes/tokens are annotated with cursor meta-data
-## (file-name, line, column) to keep track of their provenance and
-## location, which is helpful for documentation and debugging.
-(;module:
- lux
- (lux (control monad
- ["p" parser "p/" Monad]
- ["ex" exception #+ exception:])
- (data [bool]
- [text]
- ["e" error]
- [number]
- [product]
- [maybe]
- (text ["l" lexer]
- format)
- (coll [sequence #+ Sequence]))))
-
-(def: white-space Text "\t\v \r\f")
-(def: new-line Text "\n")
-
-## This is the parser for white-space.
-## Whenever a new-line is encountered, the column gets reset to 0, and
-## the line gets incremented.
-## It operates recursively in order to produce the longest continuous
-## chunk of white-space.
-(def: (space^ where)
- (-> Cursor (l;Lexer [Cursor Text]))
- (p;either (do p;Monad
- [content (l;many (l;one-of white-space))]
- (wrap [(update@ #;column (n.+ (text;size content)) where)
- content]))
- ## New-lines must be handled as a separate case to ensure line
- ## information is handled properly.
- (do p;Monad
- [content (l;many (l;one-of new-line))]
- (wrap [(|> where
- (update@ #;line (n.+ (text;size content)))
- (set@ #;column +0))
- content]))
- ))
-
-## Single-line comments can start anywhere, but only go up to the
-## next new-line.
-(def: (single-line-comment^ where)
- (-> Cursor (l;Lexer [Cursor Text]))
- (do p;Monad
- [_ (l;this "##")
- comment (l;some (l;none-of new-line))
- _ (l;this new-line)]
- (wrap [(|> where
- (update@ #;line n.inc)
- (set@ #;column +0))
- comment])))
-
-## This is just a helper parser to find text which doesn't run into
-## any special character sequences for multi-line comments.
-(def: comment-bound^
- (l;Lexer Unit)
- ($_ p;either
- (l;this new-line)
- (l;this ")#")
- (l;this "#(")))
-
-## Multi-line comments are bounded by #( these delimiters, #(and, they may
-## also be nested)# )#.
-## Multi-line comment syntax must be balanced.
-## That is, any nested comment must have matched delimiters.
-## Unbalanced comments ought to be rejected as invalid code.
-(def: (multi-line-comment^ where)
- (-> Cursor (l;Lexer [Cursor Text]))
- (do p;Monad
- [_ (l;this "#(")]
- (loop [comment ""
- where (update@ #;column (n.+ +2) where)]
- ($_ p;either
- ## These are normal chunks of commented text.
- (do @
- [chunk (l;many (l;not comment-bound^))]
- (recur (format comment chunk)
- (|> where
- (update@ #;column (n.+ (text;size chunk))))))
- ## This is a special rule to handle new-lines within
- ## comments properly.
- (do @
- [_ (l;this new-line)]
- (recur (format comment new-line)
- (|> where
- (update@ #;line n.inc)
- (set@ #;column +0))))
- ## This is the rule for handling nested sub-comments.
- ## Ultimately, the whole comment is just treated as text
- ## (the comment must respect the syntax structure, but the
- ## output produced is just a block of text).
- ## That is why the sub-comment is covered in delimiters
- ## and then appended to the rest of the comment text.
- (do @
- [[sub-where sub-comment] (multi-line-comment^ where)]
- (recur (format comment "#(" sub-comment ")#")
- sub-where))
- ## Finally, this is the rule for closing the comment.
- (do @
- [_ (l;this ")#")]
- (wrap [(update@ #;column (n.+ +2) where)
- comment]))
- ))))
-
-## This is the only parser that should be used directly by other
-## parsers, since all comments must be treated as either being
-## single-line or multi-line.
-## That is, there is no syntactic rule prohibiting one type of comment
-## from being used in any situation (alternatively, forcing one type
-## of comment to be the only usable one).
-(def: (comment^ where)
- (-> Cursor (l;Lexer [Cursor Text]))
- (p;either (single-line-comment^ where)
- (multi-line-comment^ where)))
-
-## To simplify parsing, I remove any left-padding that an Code token
-## may have prior to parsing the token itself.
-## Left-padding is assumed to be either white-space or a comment.
-## The cursor gets updated, but the padding gets ignored.
-(def: (left-padding^ where)
- (-> Cursor (l;Lexer Cursor))
- ($_ p;either
- (do p;Monad
- [[where comment] (comment^ where)]
- (left-padding^ where))
- (do p;Monad
- [[where white-space] (space^ where)]
- (left-padding^ where))
- (:: p;Monad wrap where)))
-
-## Escaped character sequences follow the usual syntax of
-## back-slash followed by a letter (e.g. \n).
-## Unicode escapes are possible, with hexadecimal sequences between 1
-## and 4 characters long (e.g. \u12aB).
-## Escaped characters may show up in Char and Text literals.
-(def: escaped-char^
- (l;Lexer [Nat Text])
- (p;after (l;this "\\")
- (do p;Monad
- [code l;any]
- (case code
- ## Handle special cases.
- "t" (wrap [+2 "\t"])
- "v" (wrap [+2 "\v"])
- "b" (wrap [+2 "\b"])
- "n" (wrap [+2 "\n"])
- "r" (wrap [+2 "\r"])
- "f" (wrap [+2 "\f"])
- "\"" (wrap [+2 "\""])
- "\\" (wrap [+2 "\\"])
-
- ## Handle unicode escapes.
- "u"
- (do p;Monad
- [code (l;between +1 +4 l;hexadecimal)]
- (wrap (case (|> code (format "+") (:: number;Hex@Codec decode))
- (#;Right value)
- [(n.+ +2 (text;size code)) (text;from-code value)]
-
- _
- (undefined))))
-
- _
- (p;fail (format "Invalid escaping syntax: " (%t code)))))))
-
-## These are very simple parsers that just cut chunks of text in
-## specific shapes and then use decoders already present in the
-## standard library to actually produce the values from the literals.
-(def: rich-digit
- (l;Lexer Text)
- (p;either l;decimal
- (p;after (l;this "_") (p/wrap ""))))
-
-(def: rich-digits^
- (l;Lexer Text)
- (l;seq l;decimal
- (l;some rich-digit)))
-
-(def: (marker^ token)
- (-> Text (l;Lexer Text))
- (p;after (l;this token) (p/wrap token)))
-
-(do-template [ ]
- [(def: #export ( where)
- (-> Cursor (l;Lexer [Cursor Code]))
- (do p;Monad
- [chunk ]
- (case (:: decode chunk)
- (#;Left error)
- (p;fail error)
-
- (#;Right value)
- (wrap [(update@ #;column (n.+ (text;size chunk)) where)
- [where ( value)]]))))]
-
- [bool #;Bool
- (p;either (marker^ "true") (marker^ "false"))
- bool;Codec]
-
- [int #;Int
- (l;seq (p;default "" (l;one-of "-"))
- rich-digits^)
- number;Codec]
-
- [deg #;Deg
- (l;seq (l;one-of ".")
- rich-digits^)
- number;Codec]
- )
-
-(def: (nat-char where)
- (-> Cursor (l;Lexer [Cursor Code]))
- (do p;Monad
- [_ (l;this "#\"")
- [where' char] (: (l;Lexer [Cursor Text])
- ($_ p;either
- ## Normal text characters.
- (do @
- [normal (l;none-of "\\\"\n")]
- (wrap [(|> where
- (update@ #;column n.inc))
- normal]))
- ## Must handle escaped
- ## chars separately.
- (do @
- [[chars-consumed char] escaped-char^]
- (wrap [(|> where
- (update@ #;column (n.+ chars-consumed)))
- char]))))
- _ (l;this "\"")
- #let [char (maybe;assume (text;nth +0 char))]]
- (wrap [(|> where'
- (update@ #;column n.inc))
- [where (#;Nat char)]])))
-
-(def: (normal-nat where)
- (-> Cursor (l;Lexer [Cursor Code]))
- (do p;Monad
- [chunk (l;seq (l;one-of "+")
- rich-digits^)]
- (case (:: number;Codec decode chunk)
- (#;Left error)
- (p;fail error)
-
- (#;Right value)
- (wrap [(update@ #;column (n.+ (text;size chunk)) where)
- [where (#;Nat value)]]))))
-
-(def: #export (nat where)
- (-> Cursor (l;Lexer [Cursor Code]))
- (p;either (normal-nat where)
- (nat-char where)))
-
-(def: (normal-frac where)
- (-> Cursor (l;Lexer [Cursor Code]))
- (do p;Monad
- [chunk ($_ l;seq
- (p;default "" (l;one-of "-"))
- rich-digits^
- (l;one-of ".")
- rich-digits^
- (p;default ""
- ($_ l;seq
- (l;one-of "eE")
- (p;default "" (l;one-of "+-"))
- rich-digits^)))]
- (case (:: number;Codec decode chunk)
- (#;Left error)
- (p;fail error)
-
- (#;Right value)
- (wrap [(update@ #;column (n.+ (text;size chunk)) where)
- [where (#;Frac value)]]))))
-
-(def: frac-ratio-fragment
- (l;Lexer Frac)
- (<| (p;codec number;Codec)
- (:: p;Monad map (function [digits]
- (format digits ".0")))
- rich-digits^))
-
-(def: (ratio-frac where)
- (-> Cursor (l;Lexer [Cursor Code]))
- (do p;Monad
- [chunk ($_ l;seq
- (p;default "" (l;one-of "-"))
- rich-digits^
- (l;one-of "/")
- rich-digits^)
- value (l;local chunk
- (do @
- [signed? (l;this? "-")
- numerator frac-ratio-fragment
- _ (l;this? "/")
- denominator frac-ratio-fragment
- _ (p;assert "Denominator cannot be 0."
- (not (f.= 0.0 denominator)))]
- (wrap (|> numerator
- (f.* (if signed? -1.0 1.0))
- (f./ denominator)))))]
- (wrap [(update@ #;column (n.+ (text;size chunk)) where)
- [where (#;Frac value)]])))
-
-(def: #export (frac where)
- (-> Cursor (l;Lexer [Cursor Code]))
- (p;either (normal-frac where)
- (ratio-frac where)))
-
-## This parser looks so complex because text in Lux can be multi-line
-## and there are rules regarding how this is handled.
-(def: #export (text where)
- (-> Cursor (l;Lexer [Cursor Code]))
- (do p;Monad
- [## Lux text "is delimited by double-quotes", as usual in most
- ## programming languages.
- _ (l;this "\"")
- ## I must know what column the text body starts at (which is
- ## always 1 column after the left-delimiting quote).
- ## This is important because, when procesing subsequent lines,
- ## they must all start at the same column, being left-padded with
- ## as many spaces as necessary to be column-aligned.
- ## This helps ensure that the formatting on the text in the
- ## source-code matches the formatting of the Text value.
- #let [offset-column (n.inc (get@ #;column where))]
- [where' text-read] (: (l;Lexer [Cursor Text])
- ## I must keep track of how much of the
- ## text body has been read, how far the
- ## cursor has progressed, and whether I'm
- ## processing a subsequent line, or just
- ## processing normal text body.
- (loop [text-read ""
- where (|> where
- (update@ #;column n.inc))
- must-have-offset? false]
- (p;either (if must-have-offset?
- ## If I'm at the start of a
- ## new line, I must ensure the
- ## space-offset is at least
- ## as great as the column of
- ## the text's body's column,
- ## to ensure they are aligned.
- (do @
- [offset (l;many (l;one-of " "))
- #let [offset-size (text;size offset)]]
- (if (n.>= offset-column offset-size)
- ## Any extra offset
- ## becomes part of the
- ## text's body.
- (recur (|> offset
- (text;split offset-column)
- (maybe;default (undefined))
- product;right
- (format text-read))
- (|> where
- (update@ #;column (n.+ offset-size)))
- false)
- (p;fail (format "Each line of a multi-line text must have an appropriate offset!\n"
- "Expected: " (%i (nat-to-int offset-column)) " columns.\n"
- " Actual: " (%i (nat-to-int offset-size)) " columns.\n"))))
- ($_ p;either
- ## Normal text characters.
- (do @
- [normal (l;many (l;none-of "\\\"\n"))]
- (recur (format text-read normal)
- (|> where
- (update@ #;column (n.+ (text;size normal))))
- false))
- ## Must handle escaped
- ## chars separately.
- (do @
- [[chars-consumed char] escaped-char^]
- (recur (format text-read char)
- (|> where
- (update@ #;column (n.+ chars-consumed)))
- false))
- ## The text ends when it
- ## reaches the right-delimiter.
- (do @
- [_ (l;this "\"")]
- (wrap [(update@ #;column n.inc where)
- text-read]))))
- ## If a new-line is
- ## encountered, it gets
- ## appended to the value and
- ## the loop is alerted that the
- ## next line must have an offset.
- (do @
- [_ (l;this new-line)]
- (recur (format text-read new-line)
- (|> where
- (update@ #;line n.inc)
- (set@ #;column +0))
- true)))))]
- (wrap [where'
- [where (#;Text text-read)]])))
-
-## Form and tuple syntax is mostly the same, differing only in the
-## delimiters involved.
-## They may have an arbitrary number of arbitrary Code nodes as elements.
-(do-template [ ]
- [(def: ( where ast)
- (-> Cursor
- (-> Cursor (l;Lexer [Cursor Code]))
- (l;Lexer [Cursor Code]))
- (do p;Monad
- [_ (l;this )
- [where' elems] (loop [elems (: (Sequence Code)
- sequence;empty)
- where where]
- (p;either (do @
- [## Must update the cursor as I
- ## go along, to keep things accurate.
- [where' elem] (ast where)]
- (recur (sequence;add elem elems)
- where'))
- (do @
- [## Must take into account any
- ## padding present before the
- ## end-delimiter.
- where' (left-padding^ where)
- _ (l;this )]
- (wrap [(update@ #;column n.inc where')
- (sequence;to-list elems)]))))]
- (wrap [where'
- [where ( elems)]])))]
-
- [form #;Form "(" ")"]
- [tuple #;Tuple "[" "]"]
- )
-
-## Records are almost (syntactically) the same as forms and tuples,
-## with the exception that their elements must come in pairs (as in
-## key-value pairs).
-## Semantically, though, records and tuples are just 2 different
-## representations for the same thing (a tuple).
-## In normal Lux syntax, the key position in the pair will be a tag
-## Code node, however, record Code nodes allow any Code node to occupy
-## this position, since it may be useful when processing Code syntax in
-## macros.
-(def: (record where ast)
- (-> Cursor
- (-> Cursor (l;Lexer [Cursor Code]))
- (l;Lexer [Cursor Code]))
- (do p;Monad
- [_ (l;this "{")
- [where' elems] (loop [elems (: (Sequence [Code Code])
- sequence;empty)
- where where]
- (p;either (do @
- [[where' key] (ast where)
- [where' val] (ast where')]
- (recur (sequence;add [key val] elems)
- where'))
- (do @
- [where' (left-padding^ where)
- _ (l;this "}")]
- (wrap [(update@ #;column n.inc where')
- (sequence;to-list elems)]))))]
- (wrap [where'
- [where (#;Record elems)]])))
-
-## The parts of an identifier are separated by a single mark.
-## E.g. module;name.
-## Only one such mark may be used in an identifier, since there
-## can only be 2 parts to an identifier (the module [before the
-## mark], and the name [after the mark]).
-## There are also some extra rules regarding identifier syntax,
-## encoded on the parser.
-(def: identifier-separator Text ";")
-
-## A Lux identifier is a pair of chunks of text, where the first-part
-## refers to the module that gives context to the identifier, and the
-## second part corresponds to the name of the identifier itself.
-## The module part may be absent (by being the empty text ""), but the
-## name part must always be present.
-## The rules for which characters you may use are specified in terms
-## of which characters you must avoid (to keep things as open-ended as
-## possible).
-## In particular, no white-space can be used, and neither can other
-## characters which are already used by Lux as delimiters for other
-## Code nodes (thereby reducing ambiguity while parsing).
-## Additionally, the first character in an identifier's part cannot be
-## a digit, to avoid confusion with regards to numbers.
-(def: ident-part^
- (l;Lexer Text)
- (do p;Monad
- [#let [digits "0123456789"
- delimiters (format "()[]{}#\"" identifier-separator)
- space (format white-space new-line)
- head-lexer (l;none-of (format digits delimiters space))
- tail-lexer (l;some (l;none-of (format delimiters space)))]
- head head-lexer
- tail tail-lexer]
- (wrap (format head tail))))
-
-(def: current-module-mark Text (format identifier-separator identifier-separator))
-
-(def: (ident^ current-module)
- (-> Text (l;Lexer [Ident Nat]))
- ($_ p;either
- ## When an identifier starts with 2 marks, its module is
- ## taken to be the current-module being compiled at the moment.
- ## This can be useful when mentioning identifiers and tags
- ## inside quoted/templated code in macros.
- (do p;Monad
- [_ (l;this current-module-mark)
- def-name ident-part^]
- (wrap [[current-module def-name]
- (n.+ +2 (text;size def-name))]))
- ## If the identifier is prefixed by the mark, but no module
- ## part, the module is assumed to be "lux" (otherwise known as
- ## the 'prelude').
- ## This makes it easy to refer to definitions in that module,
- ## since it is the most fundamental module in the entire
- ## standard library.
- (do p;Monad
- [_ (l;this identifier-separator)
- def-name ident-part^]
- (wrap [["lux" def-name]
- (n.inc (text;size def-name))]))
- ## Not all identifiers must be specified with a module part.
- ## If that part is not provided, the identifier will be created
- ## with the empty "" text as the module.
- ## During program analysis, such identifiers tend to be treated
- ## as if their context is the current-module, but this only
- ## applies to identifiers for tags and module definitions.
- ## Function arguments and local-variables may not be referred-to
- ## using identifiers with module parts, so being able to specify
- ## identifiers with empty modules helps with those use-cases.
- (do p;Monad
- [first-part ident-part^]
- (p;either (do @
- [_ (l;this identifier-separator)
- second-part ident-part^]
- (wrap [[first-part second-part]
- ($_ n.+
- (text;size first-part)
- +1
- (text;size second-part))]))
- (wrap [["" first-part]
- (text;size first-part)])))))
-
-## The only (syntactic) difference between a symbol and a tag (both
-## being identifiers), is that tags must be prefixed with a hash-sign
-## (i.e. #).
-## Semantically, though, they are very different, with symbols being
-## used to refer to module definitions and local variables, while tags
-## provide the compiler with information related to data-structure
-## construction and de-structuring (during pattern-matching).
-(do-template [ ]
- [(def: #export ( current-module where)
- (-> Text Cursor (l;Lexer [Cursor Code]))
- (do p;Monad
- [[value length] ]
- (wrap [(update@ #;column (|>. ($_ n.+ length)) where)
- [where ( value)]])))]
-
- [symbol #;Symbol (ident^ current-module) +0]
- [tag #;Tag (p;after (l;this "#") (ident^ current-module)) +1]
- )
-
-(exception: #export End-Of-File)
-(exception: #export Unrecognized-Input)
-
-(def: (ast current-module)
- (-> Text Cursor (l;Lexer [Cursor Code]))
- (: (-> Cursor (l;Lexer [Cursor Code]))
- (function ast' [where]
- (do p;Monad
- [where (left-padding^ where)]
- ($_ p;either
- (form where ast')
- (tuple where ast')
- (record where ast')
- (bool where)
- (nat where)
- (frac where)
- (int where)
- (deg where)
- (symbol current-module where)
- (tag current-module where)
- (text where)
- (do @
- [end? l;end?]
- (if end?
- (p;fail (End-Of-File current-module))
- (p;fail (Unrecognized-Input current-module))))
- )))))
-
-(def: #export (parse current-module [where offset source])
- (-> Text Source (e;Error [Source Code]))
- (case (p;run [offset source] (ast current-module where))
- (#e;Error error)
- (#e;Error error)
-
- (#e;Success [[offset' remaining] [where' output]])
- (#e;Success [[where' offset' remaining] output])))
diff --git a/new-luxc/source/luxc/lang/translation.lux b/new-luxc/source/luxc/lang/translation.lux
index e573aa3ae..88fc25d3a 100644
--- a/new-luxc/source/luxc/lang/translation.lux
+++ b/new-luxc/source/luxc/lang/translation.lux
@@ -9,6 +9,7 @@
(coll [dict]))
[meta]
(meta (type ["tc" check]))
+ (lang [syntax])
[host]
[io]
(world [file #+ File]))
@@ -18,7 +19,6 @@
[";L" host]
(host [";H" macro]
["$" jvm])
- ["&;" syntax]
(analysis [";A" expression]
[";A" common])
(synthesis [";S" expression])
@@ -41,7 +41,7 @@
(-> Code (Meta [$;Inst Code]))
(do meta;Monad
[[_ annsA] (&;with-scope
- (&;with-expected-type Code
+ (&;with-type Code
(analyse annsC)))
annsI (expressionT;translate (expressionS;synthesize annsA))
annsV (evalT;eval annsI)]
@@ -58,7 +58,7 @@
[_ valueT valueA] (&;with-scope
(if (meta;type? (:! Code annsV))
(do @
- [valueA (&;with-expected-type Type
+ [valueA (&;with-type Type
(analyse valueC))]
(wrap [Type valueA]))
(commonA;with-unknown-type
@@ -73,7 +73,7 @@
(^code ("lux program" (~ [_ (#;Symbol ["" program-args])]) (~ programC)))
(do meta;Monad
[[_ programA] (&;with-scope
- (&;with-expected-type (type (io;IO Unit))
+ (&;with-type (type (io;IO Unit))
(analyse programC)))
programI (expressionT;translate (expressionS;synthesize programA))]
(statementT;translate-program program-args programI))
@@ -111,7 +111,7 @@
((exhaust action) compiler')
(#e;Error error)
- (if (ex;match? &syntax;End-Of-File error)
+ (if (ex;match? syntax;End-Of-File error)
(#e;Success [compiler []])
(#e;Error error)))))
@@ -129,7 +129,7 @@
(def: (parse current-module)
(-> Text (Meta Code))
(function [compiler]
- (case (&syntax;parse current-module (get@ #;source compiler))
+ (case (syntax;parse current-module (get@ #;source compiler))
(#e;Error error)
(#e;Error error)
diff --git a/new-luxc/test/test/luxc/lang/analysis/case.lux b/new-luxc/test/test/luxc/lang/analysis/case.lux
index ee8b9b74d..6d34ef4c5 100644
--- a/new-luxc/test/test/luxc/lang/analysis/case.lux
+++ b/new-luxc/test/test/luxc/lang/analysis/case.lux
@@ -169,7 +169,7 @@
($_ seq
(test "Will reject empty pattern-matching (no branches)."
(|> (&;with-scope
- (&;with-expected-type outputT
+ (&;with-type outputT
(@;analyse-case analyse inputC (list))))
check-failure))
(test "Can analyse exhaustive pattern-matching."
@@ -182,7 +182,7 @@
(#;Named [module-name record-name]
(type;tuple primitivesT)))]
(&;with-scope
- (&;with-expected-type outputT
+ (&;with-type outputT
(@;analyse-case analyse inputC exhaustive-branchesC)))))
check-success))
(test "Will reject non-exhaustive pattern-matching."
@@ -195,7 +195,7 @@
(#;Named [module-name record-name]
(type;tuple primitivesT)))]
(&;with-scope
- (&;with-expected-type outputT
+ (&;with-type outputT
(@;analyse-case analyse inputC non-exhaustive-branchesC)))))
check-failure))
(test "Will reject redundant pattern-matching."
@@ -208,7 +208,7 @@
(#;Named [module-name record-name]
(type;tuple primitivesT)))]
(&;with-scope
- (&;with-expected-type outputT
+ (&;with-type outputT
(@;analyse-case analyse inputC redundant-branchesC)))))
check-failure))
(test "Will reject pattern-matching if the bodies of the branches do not all have the same type."
@@ -221,7 +221,7 @@
(#;Named [module-name record-name]
(type;tuple primitivesT)))]
(&;with-scope
- (&;with-expected-type outputT
+ (&;with-type outputT
(@;analyse-case analyse inputC heterogeneous-branchesC)))))
check-failure))
))))
diff --git a/new-luxc/test/test/luxc/lang/analysis/function.lux b/new-luxc/test/test/luxc/lang/analysis/function.lux
index e08e7a9bd..6cddfebd2 100644
--- a/new-luxc/test/test/luxc/lang/analysis/function.lux
+++ b/new-luxc/test/test/luxc/lang/analysis/function.lux
@@ -72,16 +72,16 @@
[inputT _] gen-primitive]
($_ seq
(test "Can analyse function."
- (|> (&;with-expected-type (type (All [a] (-> a outputT)))
+ (|> (&;with-type (type (All [a] (-> a outputT)))
(@;analyse-function analyse func-name arg-name outputC))
(meta;run (init-compiler []))
succeeds?))
(test "Generic functions can always be specialized."
- (and (|> (&;with-expected-type (-> inputT outputT)
+ (and (|> (&;with-type (-> inputT outputT)
(@;analyse-function analyse func-name arg-name outputC))
(meta;run (init-compiler []))
succeeds?)
- (|> (&;with-expected-type (-> inputT inputT)
+ (|> (&;with-type (-> inputT inputT)
(@;analyse-function analyse func-name arg-name (code;symbol ["" arg-name])))
(meta;run (init-compiler []))
succeeds?)))
@@ -96,7 +96,7 @@
(meta;run (init-compiler []))
(check-type (type (All [a] (-> a a))))))
(test "The function's name is bound to the function's type."
- (|> (&;with-expected-type (type (Rec self (-> inputT self)))
+ (|> (&;with-type (type (Rec self (-> inputT self)))
(@;analyse-function analyse func-name arg-name (code;symbol ["" func-name])))
(meta;run (init-compiler []))
succeeds?))
diff --git a/new-luxc/test/test/luxc/lang/analysis/procedure/common.lux b/new-luxc/test/test/luxc/lang/analysis/procedure/common.lux
index dae39228f..3420ebb4d 100644
--- a/new-luxc/test/test/luxc/lang/analysis/procedure/common.lux
+++ b/new-luxc/test/test/luxc/lang/analysis/procedure/common.lux
@@ -28,7 +28,7 @@
[(def: ( procedure params output-type)
(-> Text (List Code) Type Bool)
(|> (&;with-scope
- (&;with-expected-type output-type
+ (&;with-type output-type
(@;analyse-procedure analyse evalL;eval procedure params)))
(meta;run (init-compiler []))
(case> (#e;Success _)
@@ -262,7 +262,7 @@
(test "Can get a value inside an array."
(|> (&scope;with-scope ""
(&scope;with-local [var-name arrayT]
- (&;with-expected-type elemT
+ (&;with-type elemT
(@;analyse-procedure analyse evalL;eval "lux array get"
(list idxC
(code;symbol ["" var-name]))))))
@@ -275,7 +275,7 @@
(test "Can put a value inside an array."
(|> (&scope;with-scope ""
(&scope;with-local [var-name arrayT]
- (&;with-expected-type arrayT
+ (&;with-type arrayT
(@;analyse-procedure analyse evalL;eval "lux array put"
(list idxC
elemC
@@ -289,7 +289,7 @@
(test "Can remove a value from an array."
(|> (&scope;with-scope ""
(&scope;with-local [var-name arrayT]
- (&;with-expected-type arrayT
+ (&;with-type arrayT
(@;analyse-procedure analyse evalL;eval "lux array remove"
(list idxC
(code;symbol ["" var-name]))))))
@@ -302,7 +302,7 @@
(test "Can query the size of an array."
(|> (&scope;with-scope ""
(&scope;with-local [var-name arrayT]
- (&;with-expected-type Nat
+ (&;with-type Nat
(@;analyse-procedure analyse evalL;eval "lux array size"
(list (code;symbol ["" var-name]))))))
(meta;run (init-compiler []))
@@ -362,7 +362,7 @@
(test "Can read the value of an atomic reference."
(|> (&scope;with-scope ""
(&scope;with-local [var-name atomT]
- (&;with-expected-type elemT
+ (&;with-type elemT
(@;analyse-procedure analyse evalL;eval "lux atom read"
(list (code;symbol ["" var-name]))))))
(meta;run (init-compiler []))
@@ -374,7 +374,7 @@
(test "Can swap the value of an atomic reference."
(|> (&scope;with-scope ""
(&scope;with-local [var-name atomT]
- (&;with-expected-type Bool
+ (&;with-type Bool
(@;analyse-procedure analyse evalL;eval "lux atom compare-and-swap"
(list elemC
elemC
diff --git a/new-luxc/test/test/luxc/lang/analysis/procedure/host.jvm.lux b/new-luxc/test/test/luxc/lang/analysis/procedure/host.jvm.lux
index 3d5da350a..783174777 100644
--- a/new-luxc/test/test/luxc/lang/analysis/procedure/host.jvm.lux
+++ b/new-luxc/test/test/luxc/lang/analysis/procedure/host.jvm.lux
@@ -36,7 +36,7 @@
(|> (do Monad
[runtime-bytecode @runtime;translate]
(&;with-scope
- (&;with-expected-type output-type
+ (&;with-type output-type
(@;analyse-procedure analyse evalL;eval procedure params))))
(meta;run (init-compiler []))
(case> (#e;Success _)
diff --git a/new-luxc/test/test/luxc/lang/analysis/structure.lux b/new-luxc/test/test/luxc/lang/analysis/structure.lux
index b299872ca..8cc95fd88 100644
--- a/new-luxc/test/test/luxc/lang/analysis/structure.lux
+++ b/new-luxc/test/test/luxc/lang/analysis/structure.lux
@@ -45,7 +45,7 @@
($_ seq
(test "Can analyse sum."
(|> (&;with-scope
- (&;with-expected-type variantT
+ (&;with-type variantT
(@;analyse-sum analyse choice valueC)))
(meta;run (init-compiler []))
(case> (^multi (#e;Success [_ sumA])
@@ -62,7 +62,7 @@
[[_ varT] (&;with-type-env tc;var)
_ (&;with-type-env
(tc;check varT variantT))]
- (&;with-expected-type varT
+ (&;with-type varT
(@;analyse-sum analyse choice valueC))))
(meta;run (init-compiler []))
(case> (^multi (#e;Success [_ sumA])
@@ -77,7 +77,7 @@
(|> (&;with-scope
(do meta;Monad
[[_ varT] (&;with-type-env tc;var)]
- (&;with-expected-type varT
+ (&;with-type varT
(@;analyse-sum analyse choice valueC))))
(meta;run (init-compiler []))
(case> (#e;Success _)
@@ -87,7 +87,7 @@
true)))
(test "Can analyse sum through existential quantification."
(|> (&;with-scope
- (&;with-expected-type (type;ex-q +1 +variantT)
+ (&;with-type (type;ex-q +1 +variantT)
(@;analyse-sum analyse +choice +valueC)))
(meta;run (init-compiler []))
(case> (#e;Success _)
@@ -97,7 +97,7 @@
false)))
(test "Can analyse sum through universal quantification."
(|> (&;with-scope
- (&;with-expected-type (type;univ-q +1 +variantT)
+ (&;with-type (type;univ-q +1 +variantT)
(@;analyse-sum analyse +choice +valueC)))
(meta;run (init-compiler []))
(case> (#e;Success _)
@@ -121,7 +121,7 @@
+tupleT (type;tuple (list/map product;left +primitives))]]
($_ seq
(test "Can analyse product."
- (|> (&;with-expected-type (type;tuple (list/map product;left primitives))
+ (|> (&;with-type (type;tuple (list/map product;left primitives))
(@;analyse-product analyse (list/map product;right primitives)))
(meta;run (init-compiler []))
(case> (#e;Success tupleA)
@@ -141,7 +141,7 @@
_
false)))
(test "Can analyse pseudo-product (singleton tuple)"
- (|> (&;with-expected-type singletonT
+ (|> (&;with-type singletonT
(analyse (` [(~ singletonC)])))
(meta;run (init-compiler []))
(case> (#e;Success singletonA)
@@ -155,7 +155,7 @@
[[_ varT] (&;with-type-env tc;var)
_ (&;with-type-env
(tc;check varT (type;tuple (list/map product;left primitives))))]
- (&;with-expected-type varT
+ (&;with-type varT
(@;analyse-product analyse (list/map product;right primitives)))))
(meta;run (init-compiler []))
(case> (#e;Success [_ tupleA])
@@ -165,7 +165,7 @@
false)))
(test "Can analyse product through existential quantification."
(|> (&;with-scope
- (&;with-expected-type (type;ex-q +1 +tupleT)
+ (&;with-type (type;ex-q +1 +tupleT)
(@;analyse-product analyse (list/map product;right +primitives))))
(meta;run (init-compiler []))
(case> (#e;Success _)
@@ -175,7 +175,7 @@
false)))
(test "Cannot analyse product through universal quantification."
(|> (&;with-scope
- (&;with-expected-type (type;univ-q +1 +tupleT)
+ (&;with-type (type;univ-q +1 +tupleT)
(@;analyse-product analyse (list/map product;right +primitives))))
(meta;run (init-compiler []))
(case> (#e;Success _)
@@ -265,7 +265,7 @@
(do meta;Monad
[_ (@module;declare-tags tags false named-polyT)]
(&;with-scope
- (&;with-expected-type variantT
+ (&;with-type variantT
(@;analyse-tagged-sum analyse [module-name other-choice-tag] other-choiceC)))))
(meta;run (init-compiler []))
(case> (^multi (#e;Success [_ _ sumA])
@@ -321,7 +321,7 @@
(do meta;Monad
[_ (@module;declare-tags tags false named-polyT)]
(&;with-scope
- (&;with-expected-type tupleT
+ (&;with-type tupleT
(@;analyse-record analyse recordC)))))
(meta;run (init-compiler []))
(case> (^multi (#e;Success [_ _ productA])
diff --git a/new-luxc/test/test/luxc/lang/syntax.lux b/new-luxc/test/test/luxc/lang/syntax.lux
deleted file mode 100644
index 0f2306eb1..000000000
--- a/new-luxc/test/test/luxc/lang/syntax.lux
+++ /dev/null
@@ -1,233 +0,0 @@
-(;module:
- lux
- (lux [io]
- (control [monad #+ do])
- (data [number]
- ["e" error]
- [text]
- (text format
- ["l" lexer])
- (coll [list]))
- ["r" math/random "r/" Monad]
- (meta [code])
- test)
- (luxc (lang ["&" syntax])))
-
-(def: default-cursor
- Cursor
- {#;module ""
- #;line +0
- #;column +0})
-
-(def: ident-part^
- (r;Random Text)
- (do r;Monad
- [#let [digits "0123456789"
- delimiters "()[]{}#;\""
- space "\t\v \n\r\f"
- invalid-range (format digits delimiters space)
- char-gen (|> r;nat
- (r;filter (function [sample]
- (not (text;contains? (text;from-code sample)
- invalid-range)))))]
- size (|> r;nat (:: @ map (|>. (n.% +20) (n.max +1))))]
- (r;text' char-gen size)))
-
-(def: ident^
- (r;Random Ident)
- (r;seq ident-part^ ident-part^))
-
-(def: code^
- (r;Random Code)
- (let [numeric^ (: (r;Random Code)
- ($_ r;either
- (|> r;bool (r/map (|>. #;Bool [default-cursor])))
- (|> r;nat (r/map (|>. #;Nat [default-cursor])))
- (|> r;int (r/map (|>. #;Int [default-cursor])))
- (|> r;deg (r/map (|>. #;Deg [default-cursor])))
- (|> r;frac (r/map (|>. #;Frac [default-cursor])))))
- textual^ (: (r;Random Code)
- ($_ r;either
- (do r;Monad
- [size (|> r;nat (r/map (n.% +20)))]
- (|> (r;text size) (r/map (|>. #;Text [default-cursor]))))
- (|> ident^ (r/map (|>. #;Symbol [default-cursor])))
- (|> ident^ (r/map (|>. #;Tag [default-cursor])))))
- simple^ (: (r;Random Code)
- ($_ r;either
- numeric^
- textual^))]
- (r;rec
- (function [code^]
- (let [multi^ (do r;Monad
- [size (|> r;nat (r/map (n.% +3)))]
- (r;list size code^))
- composite^ (: (r;Random Code)
- ($_ r;either
- (|> multi^ (r/map (|>. #;Form [default-cursor])))
- (|> multi^ (r/map (|>. #;Tuple [default-cursor])))
- (do r;Monad
- [size (|> r;nat (r/map (n.% +3)))]
- (|> (r;list size (r;seq code^ code^))
- (r/map (|>. #;Record [default-cursor]))))))]
- (r;either simple^
- composite^))))))
-
-(context: "Lux code syntax."
- (<| (times +100)
- (do @
- [sample code^
- other code^]
- ($_ seq
- (test "Can parse Lux code."
- (case (&;parse "" [default-cursor +0 (code;to-text sample)])
- (#e;Error error)
- false
-
- (#e;Success [_ parsed])
- (:: code;Eq = parsed sample)))
- (test "Can parse Lux multiple code nodes."
- (case (&;parse "" [default-cursor +0 (format (code;to-text sample) " "
- (code;to-text other))])
- (#e;Error error)
- false
-
- (#e;Success [remaining =sample])
- (case (&;parse "" remaining)
- (#e;Error error)
- false
-
- (#e;Success [_ =other])
- (and (:: code;Eq = sample =sample)
- (:: code;Eq = other =other)))))
- ))))
-
-(def: nat-to-frac
- (-> Nat Frac)
- (|>. nat-to-int int-to-frac))
-
-(context: "Frac special syntax."
- (<| (times +100)
- (do @
- [numerator (|> r;nat (:: @ map (|>. (n.% +100) nat-to-frac)))
- denominator (|> r;nat (:: @ map (|>. (n.% +100) (n.max +1) nat-to-frac)))
- signed? r;bool
- #let [expected (|> numerator (f./ denominator) (f.* (if signed? -1.0 1.0)))]]
- (test "Can parse frac ratio syntax."
- (case (&;parse "" [default-cursor +0
- (format (if signed? "-" "")
- (%i (frac-to-int numerator))
- "/"
- (%i (frac-to-int denominator)))])
- (#e;Success [_ [_ (#;Frac actual)]])
- (f.= expected actual)
-
- _
- false)
- ))))
-
-(context: "Nat special syntax."
- (<| (times +100)
- (do @
- [expected (|> r;nat (:: @ map (n.% +1_000)))]
- (test "Can parse nat char syntax."
- (case (&;parse "" [default-cursor +0
- (format "#" (%t (text;from-code expected)) "")])
- (#e;Success [_ [_ (#;Nat actual)]])
- (n.= expected actual)
-
- _
- false)
- ))))
-
-(def: comment-text^
- (r;Random Text)
- (let [char-gen (|> r;nat (r;filter (function [value]
- (not (or (text;space? value)
- (n.= (char "#") value)
- (n.= (char "(") value)
- (n.= (char ")") value))))))]
- (do r;Monad
- [size (|> r;nat (r/map (n.% +20)))]
- (r;text' char-gen size))))
-
-(def: comment^
- (r;Random Text)
- (r;either (do r;Monad
- [comment comment-text^]
- (wrap (format "## " comment "\n")))
- (r;rec (function [nested^]
- (do r;Monad
- [comment (r;either comment-text^
- nested^)]
- (wrap (format "#( " comment " )#")))))))
-
-(context: "Multi-line text & comments."
- (<| (times +100)
- (do @
- [#let [char-gen (|> r;nat (r;filter (function [value]
- (not (or (text;space? value)
- (n.= (char "\"") value))))))]
- x char-gen
- y char-gen
- z char-gen
- offset-size (|> r;nat (r/map (|>. (n.% +10) (n.max +1))))
- #let [offset (text;join-with "" (list;repeat offset-size " "))]
- sample code^
- comment comment^
- unbalanced-comment comment-text^]
- ($_ seq
- (test "Will reject invalid multi-line text."
- (let [bad-match (format (text;from-code x) "\n"
- (text;from-code y) "\n"
- (text;from-code z))]
- (case (&;parse "" [default-cursor +0
- (format "\"" bad-match "\"")])
- (#e;Error error)
- true
-
- (#e;Success [_ parsed])
- false)))
- (test "Will accept valid multi-line text"
- (let [good-input (format (text;from-code x) "\n"
- offset (text;from-code y) "\n"
- offset (text;from-code z))
- good-output (format (text;from-code x) "\n"
- (text;from-code y) "\n"
- (text;from-code z))]
- (case (&;parse "" [(|> default-cursor (update@ #;column (n.+ (n.dec offset-size))))
- +0
- (format "\"" good-input "\"")])
- (#e;Error error)
- false
-
- (#e;Success [_ parsed])
- (:: code;Eq =
- parsed
- (code;text good-output)))))
- (test "Can handle comments."
- (case (&;parse "" [default-cursor +0
- (format comment (code;to-text sample))])
- (#e;Error error)
- false
-
- (#e;Success [_ parsed])
- (:: code;Eq = parsed sample)))
- (test "Will reject unbalanced multi-line comments."
- (and (case (&;parse "" [default-cursor +0
- (format "#(" "#(" unbalanced-comment ")#"
- (code;to-text sample))])
- (#e;Error error)
- true
-
- (#e;Success [_ parsed])
- false)
- (case (&;parse "" [default-cursor +0
- (format "#(" unbalanced-comment ")#" ")#"
- (code;to-text sample))])
- (#e;Error error)
- true
-
- (#e;Success [_ parsed])
- false)))
- ))))
diff --git a/new-luxc/test/tests.lux b/new-luxc/test/tests.lux
index 88d89ad90..b36782517 100644
--- a/new-luxc/test/tests.lux
+++ b/new-luxc/test/tests.lux
@@ -5,8 +5,7 @@
(concurrency [promise])
[cli #+ program:]
[test])
- (test (luxc (lang ["_;L" syntax]
- (analysis ["_;A" primitive]
+ (test (luxc (lang (analysis ["_;A" primitive]
["_;A" structure]
["_;A" reference]
["_;A" case]
diff --git a/stdlib/source/lux/lang/syntax.lux b/stdlib/source/lux/lang/syntax.lux
new file mode 100644
index 000000000..9fe4939a2
--- /dev/null
+++ b/stdlib/source/lux/lang/syntax.lux
@@ -0,0 +1,623 @@
+## This is the LuxC's parser.
+## It takes the source code of a Lux file in raw text form and
+## extracts the syntactic structure of the code from it.
+## It only produces Lux Code nodes, and thus removes any white-space
+## and comments while processing its inputs.
+
+## Another important aspect of the parser is that it keeps track of
+## its position within the input data.
+## That is, the parser takes into account the line and column
+## information in the input text (it doesn't really touch the
+## file-name aspect of the cursor, leaving it intact in whatever
+## base-line cursor it is given).
+
+## This particular piece of functionality is not located in one
+## function, but it is instead scattered throughout several parsers,
+## since the logic for how to update the cursor varies, depending on
+## what is being parsed, and the rules involved.
+
+## You will notice that several parsers have a "where" parameter, that
+## tells them the cursor position prior to the parser being run.
+## They are supposed to produce some parsed output, alongside an
+## updated cursor pointing to the end position, after the parser was run.
+
+## Lux Code nodes/tokens are annotated with cursor meta-data
+## (file-name, line, column) to keep track of their provenance and
+## location, which is helpful for documentation and debugging.
+(;module:
+ lux
+ (lux (control monad
+ ["p" parser "p/" Monad]
+ ["ex" exception #+ exception:])
+ (data [bool]
+ [text]
+ ["e" error]
+ [number]
+ [product]
+ [maybe]
+ (text ["l" lexer]
+ format)
+ (coll [sequence #+ Sequence]))))
+
+(def: white-space Text "\t\v \r\f")
+(def: new-line Text "\n")
+
+## This is the parser for white-space.
+## Whenever a new-line is encountered, the column gets reset to 0, and
+## the line gets incremented.
+## It operates recursively in order to produce the longest continuous
+## chunk of white-space.
+(def: (space^ where)
+ (-> Cursor (l;Lexer [Cursor Text]))
+ (p;either (do p;Monad
+ [content (l;many (l;one-of white-space))]
+ (wrap [(update@ #;column (n.+ (text;size content)) where)
+ content]))
+ ## New-lines must be handled as a separate case to ensure line
+ ## information is handled properly.
+ (do p;Monad
+ [content (l;many (l;one-of new-line))]
+ (wrap [(|> where
+ (update@ #;line (n.+ (text;size content)))
+ (set@ #;column +0))
+ content]))
+ ))
+
+## Single-line comments can start anywhere, but only go up to the
+## next new-line.
+(def: (single-line-comment^ where)
+ (-> Cursor (l;Lexer [Cursor Text]))
+ (do p;Monad
+ [_ (l;this "##")
+ comment (l;some (l;none-of new-line))
+ _ (l;this new-line)]
+ (wrap [(|> where
+ (update@ #;line n.inc)
+ (set@ #;column +0))
+ comment])))
+
+## This is just a helper parser to find text which doesn't run into
+## any special character sequences for multi-line comments.
+(def: comment-bound^
+ (l;Lexer Unit)
+ ($_ p;either
+ (l;this new-line)
+ (l;this ")#")
+ (l;this "#(")))
+
+## Multi-line comments are bounded by #( these delimiters, #(and, they may
+## also be nested)# )#.
+## Multi-line comment syntax must be balanced.
+## That is, any nested comment must have matched delimiters.
+## Unbalanced comments ought to be rejected as invalid code.
+(def: (multi-line-comment^ where)
+ (-> Cursor (l;Lexer [Cursor Text]))
+ (do p;Monad
+ [_ (l;this "#(")]
+ (loop [comment ""
+ where (update@ #;column (n.+ +2) where)]
+ ($_ p;either
+ ## These are normal chunks of commented text.
+ (do @
+ [chunk (l;many (l;not comment-bound^))]
+ (recur (format comment chunk)
+ (|> where
+ (update@ #;column (n.+ (text;size chunk))))))
+ ## This is a special rule to handle new-lines within
+ ## comments properly.
+ (do @
+ [_ (l;this new-line)]
+ (recur (format comment new-line)
+ (|> where
+ (update@ #;line n.inc)
+ (set@ #;column +0))))
+ ## This is the rule for handling nested sub-comments.
+ ## Ultimately, the whole comment is just treated as text
+ ## (the comment must respect the syntax structure, but the
+ ## output produced is just a block of text).
+ ## That is why the sub-comment is covered in delimiters
+ ## and then appended to the rest of the comment text.
+ (do @
+ [[sub-where sub-comment] (multi-line-comment^ where)]
+ (recur (format comment "#(" sub-comment ")#")
+ sub-where))
+ ## Finally, this is the rule for closing the comment.
+ (do @
+ [_ (l;this ")#")]
+ (wrap [(update@ #;column (n.+ +2) where)
+ comment]))
+ ))))
+
+## This is the only parser that should be used directly by other
+## parsers, since all comments must be treated as either being
+## single-line or multi-line.
+## That is, there is no syntactic rule prohibiting one type of comment
+## from being used in any situation (alternatively, forcing one type
+## of comment to be the only usable one).
+(def: (comment^ where)
+ (-> Cursor (l;Lexer [Cursor Text]))
+ (p;either (single-line-comment^ where)
+ (multi-line-comment^ where)))
+
+## To simplify parsing, I remove any left-padding that an Code token
+## may have prior to parsing the token itself.
+## Left-padding is assumed to be either white-space or a comment.
+## The cursor gets updated, but the padding gets ignored.
+(def: (left-padding^ where)
+ (-> Cursor (l;Lexer Cursor))
+ ($_ p;either
+ (do p;Monad
+ [[where comment] (comment^ where)]
+ (left-padding^ where))
+ (do p;Monad
+ [[where white-space] (space^ where)]
+ (left-padding^ where))
+ (:: p;Monad wrap where)))
+
+## Escaped character sequences follow the usual syntax of
+## back-slash followed by a letter (e.g. \n).
+## Unicode escapes are possible, with hexadecimal sequences between 1
+## and 4 characters long (e.g. \u12aB).
+## Escaped characters may show up in Char and Text literals.
+(def: escaped-char^
+ (l;Lexer [Nat Text])
+ (p;after (l;this "\\")
+ (do p;Monad
+ [code l;any]
+ (case code
+ ## Handle special cases.
+ "t" (wrap [+2 "\t"])
+ "v" (wrap [+2 "\v"])
+ "b" (wrap [+2 "\b"])
+ "n" (wrap [+2 "\n"])
+ "r" (wrap [+2 "\r"])
+ "f" (wrap [+2 "\f"])
+ "\"" (wrap [+2 "\""])
+ "\\" (wrap [+2 "\\"])
+
+ ## Handle unicode escapes.
+ "u"
+ (do p;Monad
+ [code (l;between +1 +4 l;hexadecimal)]
+ (wrap (case (|> code (format "+") (:: number;Hex@Codec decode))
+ (#;Right value)
+ [(n.+ +2 (text;size code)) (text;from-code value)]
+
+ _
+ (undefined))))
+
+ _
+ (p;fail (format "Invalid escaping syntax: " (%t code)))))))
+
+## These are very simple parsers that just cut chunks of text in
+## specific shapes and then use decoders already present in the
+## standard library to actually produce the values from the literals.
+(def: rich-digit
+ (l;Lexer Text)
+ (p;either l;decimal
+ (p;after (l;this "_") (p/wrap ""))))
+
+(def: rich-digits^
+ (l;Lexer Text)
+ (l;seq l;decimal
+ (l;some rich-digit)))
+
+(def: (marker^ token)
+ (-> Text (l;Lexer Text))
+ (p;after (l;this token) (p/wrap token)))
+
+(do-template [ ]
+ [(def: #export ( where)
+ (-> Cursor (l;Lexer [Cursor Code]))
+ (do p;Monad
+ [chunk ]
+ (case (:: decode chunk)
+ (#;Left error)
+ (p;fail error)
+
+ (#;Right value)
+ (wrap [(update@ #;column (n.+ (text;size chunk)) where)
+ [where ( value)]]))))]
+
+ [bool #;Bool
+ (p;either (marker^ "true") (marker^ "false"))
+ bool;Codec]
+
+ [int #;Int
+ (l;seq (p;default "" (l;one-of "-"))
+ rich-digits^)
+ number;Codec]
+
+ [deg #;Deg
+ (l;seq (l;one-of ".")
+ rich-digits^)
+ number;Codec]
+ )
+
+(def: (nat-char where)
+ (-> Cursor (l;Lexer [Cursor Code]))
+ (do p;Monad
+ [_ (l;this "#\"")
+ [where' char] (: (l;Lexer [Cursor Text])
+ ($_ p;either
+ ## Normal text characters.
+ (do @
+ [normal (l;none-of "\\\"\n")]
+ (wrap [(|> where
+ (update@ #;column n.inc))
+ normal]))
+ ## Must handle escaped
+ ## chars separately.
+ (do @
+ [[chars-consumed char] escaped-char^]
+ (wrap [(|> where
+ (update@ #;column (n.+ chars-consumed)))
+ char]))))
+ _ (l;this "\"")
+ #let [char (maybe;assume (text;nth +0 char))]]
+ (wrap [(|> where'
+ (update@ #;column n.inc))
+ [where (#;Nat char)]])))
+
+(def: (normal-nat where)
+ (-> Cursor (l;Lexer [Cursor Code]))
+ (do p;Monad
+ [chunk (l;seq (l;one-of "+")
+ rich-digits^)]
+ (case (:: number;Codec decode chunk)
+ (#;Left error)
+ (p;fail error)
+
+ (#;Right value)
+ (wrap [(update@ #;column (n.+ (text;size chunk)) where)
+ [where (#;Nat value)]]))))
+
+(def: #export (nat where)
+ (-> Cursor (l;Lexer [Cursor Code]))
+ (p;either (normal-nat where)
+ (nat-char where)))
+
+(def: (normal-frac where)
+ (-> Cursor (l;Lexer [Cursor Code]))
+ (do p;Monad
+ [chunk ($_ l;seq
+ (p;default "" (l;one-of "-"))
+ rich-digits^
+ (l;one-of ".")
+ rich-digits^
+ (p;default ""
+ ($_ l;seq
+ (l;one-of "eE")
+ (p;default "" (l;one-of "+-"))
+ rich-digits^)))]
+ (case (:: number;Codec decode chunk)
+ (#;Left error)
+ (p;fail error)
+
+ (#;Right value)
+ (wrap [(update@ #;column (n.+ (text;size chunk)) where)
+ [where (#;Frac value)]]))))
+
+(def: frac-ratio-fragment
+ (l;Lexer Frac)
+ (<| (p;codec number;Codec)
+ (:: p;Monad map (function [digits]
+ (format digits ".0")))
+ rich-digits^))
+
+(def: (ratio-frac where)
+ (-> Cursor (l;Lexer [Cursor Code]))
+ (do p;Monad
+ [chunk ($_ l;seq
+ (p;default "" (l;one-of "-"))
+ rich-digits^
+ (l;one-of "/")
+ rich-digits^)
+ value (l;local chunk
+ (do @
+ [signed? (l;this? "-")
+ numerator frac-ratio-fragment
+ _ (l;this? "/")
+ denominator frac-ratio-fragment
+ _ (p;assert "Denominator cannot be 0."
+ (not (f.= 0.0 denominator)))]
+ (wrap (|> numerator
+ (f.* (if signed? -1.0 1.0))
+ (f./ denominator)))))]
+ (wrap [(update@ #;column (n.+ (text;size chunk)) where)
+ [where (#;Frac value)]])))
+
+(def: #export (frac where)
+ (-> Cursor (l;Lexer [Cursor Code]))
+ (p;either (normal-frac where)
+ (ratio-frac where)))
+
+## This parser looks so complex because text in Lux can be multi-line
+## and there are rules regarding how this is handled.
+(def: #export (text where)
+ (-> Cursor (l;Lexer [Cursor Code]))
+ (do p;Monad
+ [## Lux text "is delimited by double-quotes", as usual in most
+ ## programming languages.
+ _ (l;this "\"")
+ ## I must know what column the text body starts at (which is
+ ## always 1 column after the left-delimiting quote).
+ ## This is important because, when procesing subsequent lines,
+ ## they must all start at the same column, being left-padded with
+ ## as many spaces as necessary to be column-aligned.
+ ## This helps ensure that the formatting on the text in the
+ ## source-code matches the formatting of the Text value.
+ #let [offset-column (n.inc (get@ #;column where))]
+ [where' text-read] (: (l;Lexer [Cursor Text])
+ ## I must keep track of how much of the
+ ## text body has been read, how far the
+ ## cursor has progressed, and whether I'm
+ ## processing a subsequent line, or just
+ ## processing normal text body.
+ (loop [text-read ""
+ where (|> where
+ (update@ #;column n.inc))
+ must-have-offset? false]
+ (p;either (if must-have-offset?
+ ## If I'm at the start of a
+ ## new line, I must ensure the
+ ## space-offset is at least
+ ## as great as the column of
+ ## the text's body's column,
+ ## to ensure they are aligned.
+ (do @
+ [offset (l;many (l;one-of " "))
+ #let [offset-size (text;size offset)]]
+ (if (n.>= offset-column offset-size)
+ ## Any extra offset
+ ## becomes part of the
+ ## text's body.
+ (recur (|> offset
+ (text;split offset-column)
+ (maybe;default (undefined))
+ product;right
+ (format text-read))
+ (|> where
+ (update@ #;column (n.+ offset-size)))
+ false)
+ (p;fail (format "Each line of a multi-line text must have an appropriate offset!\n"
+ "Expected: " (%i (nat-to-int offset-column)) " columns.\n"
+ " Actual: " (%i (nat-to-int offset-size)) " columns.\n"))))
+ ($_ p;either
+ ## Normal text characters.
+ (do @
+ [normal (l;many (l;none-of "\\\"\n"))]
+ (recur (format text-read normal)
+ (|> where
+ (update@ #;column (n.+ (text;size normal))))
+ false))
+ ## Must handle escaped
+ ## chars separately.
+ (do @
+ [[chars-consumed char] escaped-char^]
+ (recur (format text-read char)
+ (|> where
+ (update@ #;column (n.+ chars-consumed)))
+ false))
+ ## The text ends when it
+ ## reaches the right-delimiter.
+ (do @
+ [_ (l;this "\"")]
+ (wrap [(update@ #;column n.inc where)
+ text-read]))))
+ ## If a new-line is
+ ## encountered, it gets
+ ## appended to the value and
+ ## the loop is alerted that the
+ ## next line must have an offset.
+ (do @
+ [_ (l;this new-line)]
+ (recur (format text-read new-line)
+ (|> where
+ (update@ #;line n.inc)
+ (set@ #;column +0))
+ true)))))]
+ (wrap [where'
+ [where (#;Text text-read)]])))
+
+## Form and tuple syntax is mostly the same, differing only in the
+## delimiters involved.
+## They may have an arbitrary number of arbitrary Code nodes as elements.
+(do-template [ ]
+ [(def: ( where ast)
+ (-> Cursor
+ (-> Cursor (l;Lexer [Cursor Code]))
+ (l;Lexer [Cursor Code]))
+ (do p;Monad
+ [_ (l;this )
+ [where' elems] (loop [elems (: (Sequence Code)
+ sequence;empty)
+ where where]
+ (p;either (do @
+ [## Must update the cursor as I
+ ## go along, to keep things accurate.
+ [where' elem] (ast where)]
+ (recur (sequence;add elem elems)
+ where'))
+ (do @
+ [## Must take into account any
+ ## padding present before the
+ ## end-delimiter.
+ where' (left-padding^ where)
+ _ (l;this )]
+ (wrap [(update@ #;column n.inc where')
+ (sequence;to-list elems)]))))]
+ (wrap [where'
+ [where ( elems)]])))]
+
+ [form #;Form "(" ")"]
+ [tuple #;Tuple "[" "]"]
+ )
+
+## Records are almost (syntactically) the same as forms and tuples,
+## with the exception that their elements must come in pairs (as in
+## key-value pairs).
+## Semantically, though, records and tuples are just 2 different
+## representations for the same thing (a tuple).
+## In normal Lux syntax, the key position in the pair will be a tag
+## Code node, however, record Code nodes allow any Code node to occupy
+## this position, since it may be useful when processing Code syntax in
+## macros.
+(def: (record where ast)
+ (-> Cursor
+ (-> Cursor (l;Lexer [Cursor Code]))
+ (l;Lexer [Cursor Code]))
+ (do p;Monad
+ [_ (l;this "{")
+ [where' elems] (loop [elems (: (Sequence [Code Code])
+ sequence;empty)
+ where where]
+ (p;either (do @
+ [[where' key] (ast where)
+ [where' val] (ast where')]
+ (recur (sequence;add [key val] elems)
+ where'))
+ (do @
+ [where' (left-padding^ where)
+ _ (l;this "}")]
+ (wrap [(update@ #;column n.inc where')
+ (sequence;to-list elems)]))))]
+ (wrap [where'
+ [where (#;Record elems)]])))
+
+## The parts of an identifier are separated by a single mark.
+## E.g. module;name.
+## Only one such mark may be used in an identifier, since there
+## can only be 2 parts to an identifier (the module [before the
+## mark], and the name [after the mark]).
+## There are also some extra rules regarding identifier syntax,
+## encoded on the parser.
+(def: identifier-separator Text ";")
+
+## A Lux identifier is a pair of chunks of text, where the first-part
+## refers to the module that gives context to the identifier, and the
+## second part corresponds to the name of the identifier itself.
+## The module part may be absent (by being the empty text ""), but the
+## name part must always be present.
+## The rules for which characters you may use are specified in terms
+## of which characters you must avoid (to keep things as open-ended as
+## possible).
+## In particular, no white-space can be used, and neither can other
+## characters which are already used by Lux as delimiters for other
+## Code nodes (thereby reducing ambiguity while parsing).
+## Additionally, the first character in an identifier's part cannot be
+## a digit, to avoid confusion with regards to numbers.
+(def: ident-part^
+ (l;Lexer Text)
+ (do p;Monad
+ [#let [digits "0123456789"
+ delimiters (format "()[]{}#\"" identifier-separator)
+ space (format white-space new-line)
+ head-lexer (l;none-of (format digits delimiters space))
+ tail-lexer (l;some (l;none-of (format delimiters space)))]
+ head head-lexer
+ tail tail-lexer]
+ (wrap (format head tail))))
+
+(def: current-module-mark Text (format identifier-separator identifier-separator))
+
+(def: (ident^ current-module)
+ (-> Text (l;Lexer [Ident Nat]))
+ ($_ p;either
+ ## When an identifier starts with 2 marks, its module is
+ ## taken to be the current-module being compiled at the moment.
+ ## This can be useful when mentioning identifiers and tags
+ ## inside quoted/templated code in macros.
+ (do p;Monad
+ [_ (l;this current-module-mark)
+ def-name ident-part^]
+ (wrap [[current-module def-name]
+ (n.+ +2 (text;size def-name))]))
+ ## If the identifier is prefixed by the mark, but no module
+ ## part, the module is assumed to be "lux" (otherwise known as
+ ## the 'prelude').
+ ## This makes it easy to refer to definitions in that module,
+ ## since it is the most fundamental module in the entire
+ ## standard library.
+ (do p;Monad
+ [_ (l;this identifier-separator)
+ def-name ident-part^]
+ (wrap [["lux" def-name]
+ (n.inc (text;size def-name))]))
+ ## Not all identifiers must be specified with a module part.
+ ## If that part is not provided, the identifier will be created
+ ## with the empty "" text as the module.
+ ## During program analysis, such identifiers tend to be treated
+ ## as if their context is the current-module, but this only
+ ## applies to identifiers for tags and module definitions.
+ ## Function arguments and local-variables may not be referred-to
+ ## using identifiers with module parts, so being able to specify
+ ## identifiers with empty modules helps with those use-cases.
+ (do p;Monad
+ [first-part ident-part^]
+ (p;either (do @
+ [_ (l;this identifier-separator)
+ second-part ident-part^]
+ (wrap [[first-part second-part]
+ ($_ n.+
+ (text;size first-part)
+ +1
+ (text;size second-part))]))
+ (wrap [["" first-part]
+ (text;size first-part)])))))
+
+## The only (syntactic) difference between a symbol and a tag (both
+## being identifiers), is that tags must be prefixed with a hash-sign
+## (i.e. #).
+## Semantically, though, they are very different, with symbols being
+## used to refer to module definitions and local variables, while tags
+## provide the compiler with information related to data-structure
+## construction and de-structuring (during pattern-matching).
+(do-template [ ]
+ [(def: #export ( current-module where)
+ (-> Text Cursor (l;Lexer [Cursor Code]))
+ (do p;Monad
+ [[value length] ]
+ (wrap [(update@ #;column (|>. ($_ n.+ length)) where)
+ [where ( value)]])))]
+
+ [symbol #;Symbol (ident^ current-module) +0]
+ [tag #;Tag (p;after (l;this "#") (ident^ current-module)) +1]
+ )
+
+(exception: #export End-Of-File)
+(exception: #export Unrecognized-Input)
+
+(def: (ast current-module)
+ (-> Text Cursor (l;Lexer [Cursor Code]))
+ (: (-> Cursor (l;Lexer [Cursor Code]))
+ (function ast' [where]
+ (do p;Monad
+ [where (left-padding^ where)]
+ ($_ p;either
+ (form where ast')
+ (tuple where ast')
+ (record where ast')
+ (bool where)
+ (nat where)
+ (frac where)
+ (int where)
+ (deg where)
+ (symbol current-module where)
+ (tag current-module where)
+ (text where)
+ (do @
+ [end? l;end?]
+ (if end?
+ (p;fail (End-Of-File current-module))
+ (p;fail (Unrecognized-Input current-module))))
+ )))))
+
+(def: #export (parse current-module [where offset source])
+ (-> Text Source (e;Error [Source Code]))
+ (case (p;run [offset source] (ast current-module where))
+ (#e;Error error)
+ (#e;Error error)
+
+ (#e;Success [[offset' remaining] [where' output]])
+ (#e;Success [[where' offset' remaining] output])))
diff --git a/stdlib/test/test/lux/lang/syntax.lux b/stdlib/test/test/lux/lang/syntax.lux
new file mode 100644
index 000000000..4db181cae
--- /dev/null
+++ b/stdlib/test/test/lux/lang/syntax.lux
@@ -0,0 +1,233 @@
+(;module:
+ lux
+ (lux [io]
+ (control [monad #+ do])
+ (data [number]
+ ["e" error]
+ [text]
+ (text format
+ ["l" lexer])
+ (coll [list]))
+ ["r" math/random "r/" Monad]
+ (meta [code])
+ (lang ["&" syntax])
+ test))
+
+(def: default-cursor
+ Cursor
+ {#;module ""
+ #;line +0
+ #;column +0})
+
+(def: ident-part^
+ (r;Random Text)
+ (do r;Monad
+ [#let [digits "0123456789"
+ delimiters "()[]{}#;\""
+ space "\t\v \n\r\f"
+ invalid-range (format digits delimiters space)
+ char-gen (|> r;nat
+ (r;filter (function [sample]
+ (not (text;contains? (text;from-code sample)
+ invalid-range)))))]
+ size (|> r;nat (:: @ map (|>. (n.% +20) (n.max +1))))]
+ (r;text' char-gen size)))
+
+(def: ident^
+ (r;Random Ident)
+ (r;seq ident-part^ ident-part^))
+
+(def: code^
+ (r;Random Code)
+ (let [numeric^ (: (r;Random Code)
+ ($_ r;either
+ (|> r;bool (r/map (|>. #;Bool [default-cursor])))
+ (|> r;nat (r/map (|>. #;Nat [default-cursor])))
+ (|> r;int (r/map (|>. #;Int [default-cursor])))
+ (|> r;deg (r/map (|>. #;Deg [default-cursor])))
+ (|> r;frac (r/map (|>. #;Frac [default-cursor])))))
+ textual^ (: (r;Random Code)
+ ($_ r;either
+ (do r;Monad
+ [size (|> r;nat (r/map (n.% +20)))]
+ (|> (r;text size) (r/map (|>. #;Text [default-cursor]))))
+ (|> ident^ (r/map (|>. #;Symbol [default-cursor])))
+ (|> ident^ (r/map (|>. #;Tag [default-cursor])))))
+ simple^ (: (r;Random Code)
+ ($_ r;either
+ numeric^
+ textual^))]
+ (r;rec
+ (function [code^]
+ (let [multi^ (do r;Monad
+ [size (|> r;nat (r/map (n.% +3)))]
+ (r;list size code^))
+ composite^ (: (r;Random Code)
+ ($_ r;either
+ (|> multi^ (r/map (|>. #;Form [default-cursor])))
+ (|> multi^ (r/map (|>. #;Tuple [default-cursor])))
+ (do r;Monad
+ [size (|> r;nat (r/map (n.% +3)))]
+ (|> (r;list size (r;seq code^ code^))
+ (r/map (|>. #;Record [default-cursor]))))))]
+ (r;either simple^
+ composite^))))))
+
+(context: "Lux code syntax."
+ (<| (times +100)
+ (do @
+ [sample code^
+ other code^]
+ ($_ seq
+ (test "Can parse Lux code."
+ (case (&;parse "" [default-cursor +0 (code;to-text sample)])
+ (#e;Error error)
+ false
+
+ (#e;Success [_ parsed])
+ (:: code;Eq = parsed sample)))
+ (test "Can parse Lux multiple code nodes."
+ (case (&;parse "" [default-cursor +0 (format (code;to-text sample) " "
+ (code;to-text other))])
+ (#e;Error error)
+ false
+
+ (#e;Success [remaining =sample])
+ (case (&;parse "" remaining)
+ (#e;Error error)
+ false
+
+ (#e;Success [_ =other])
+ (and (:: code;Eq = sample =sample)
+ (:: code;Eq = other =other)))))
+ ))))
+
+(def: nat-to-frac
+ (-> Nat Frac)
+ (|>. nat-to-int int-to-frac))
+
+(context: "Frac special syntax."
+ (<| (times +100)
+ (do @
+ [numerator (|> r;nat (:: @ map (|>. (n.% +100) nat-to-frac)))
+ denominator (|> r;nat (:: @ map (|>. (n.% +100) (n.max +1) nat-to-frac)))
+ signed? r;bool
+ #let [expected (|> numerator (f./ denominator) (f.* (if signed? -1.0 1.0)))]]
+ (test "Can parse frac ratio syntax."
+ (case (&;parse "" [default-cursor +0
+ (format (if signed? "-" "")
+ (%i (frac-to-int numerator))
+ "/"
+ (%i (frac-to-int denominator)))])
+ (#e;Success [_ [_ (#;Frac actual)]])
+ (f.= expected actual)
+
+ _
+ false)
+ ))))
+
+(context: "Nat special syntax."
+ (<| (times +100)
+ (do @
+ [expected (|> r;nat (:: @ map (n.% +1_000)))]
+ (test "Can parse nat char syntax."
+ (case (&;parse "" [default-cursor +0
+ (format "#" (%t (text;from-code expected)) "")])
+ (#e;Success [_ [_ (#;Nat actual)]])
+ (n.= expected actual)
+
+ _
+ false)
+ ))))
+
+(def: comment-text^
+ (r;Random Text)
+ (let [char-gen (|> r;nat (r;filter (function [value]
+ (not (or (text;space? value)
+ (n.= (char "#") value)
+ (n.= (char "(") value)
+ (n.= (char ")") value))))))]
+ (do r;Monad
+ [size (|> r;nat (r/map (n.% +20)))]
+ (r;text' char-gen size))))
+
+(def: comment^
+ (r;Random Text)
+ (r;either (do r;Monad
+ [comment comment-text^]
+ (wrap (format "## " comment "\n")))
+ (r;rec (function [nested^]
+ (do r;Monad
+ [comment (r;either comment-text^
+ nested^)]
+ (wrap (format "#( " comment " )#")))))))
+
+(context: "Multi-line text & comments."
+ (<| (times +100)
+ (do @
+ [#let [char-gen (|> r;nat (r;filter (function [value]
+ (not (or (text;space? value)
+ (n.= (char "\"") value))))))]
+ x char-gen
+ y char-gen
+ z char-gen
+ offset-size (|> r;nat (r/map (|>. (n.% +10) (n.max +1))))
+ #let [offset (text;join-with "" (list;repeat offset-size " "))]
+ sample code^
+ comment comment^
+ unbalanced-comment comment-text^]
+ ($_ seq
+ (test "Will reject invalid multi-line text."
+ (let [bad-match (format (text;from-code x) "\n"
+ (text;from-code y) "\n"
+ (text;from-code z))]
+ (case (&;parse "" [default-cursor +0
+ (format "\"" bad-match "\"")])
+ (#e;Error error)
+ true
+
+ (#e;Success [_ parsed])
+ false)))
+ (test "Will accept valid multi-line text"
+ (let [good-input (format (text;from-code x) "\n"
+ offset (text;from-code y) "\n"
+ offset (text;from-code z))
+ good-output (format (text;from-code x) "\n"
+ (text;from-code y) "\n"
+ (text;from-code z))]
+ (case (&;parse "" [(|> default-cursor (update@ #;column (n.+ (n.dec offset-size))))
+ +0
+ (format "\"" good-input "\"")])
+ (#e;Error error)
+ false
+
+ (#e;Success [_ parsed])
+ (:: code;Eq =
+ parsed
+ (code;text good-output)))))
+ (test "Can handle comments."
+ (case (&;parse "" [default-cursor +0
+ (format comment (code;to-text sample))])
+ (#e;Error error)
+ false
+
+ (#e;Success [_ parsed])
+ (:: code;Eq = parsed sample)))
+ (test "Will reject unbalanced multi-line comments."
+ (and (case (&;parse "" [default-cursor +0
+ (format "#(" "#(" unbalanced-comment ")#"
+ (code;to-text sample))])
+ (#e;Error error)
+ true
+
+ (#e;Success [_ parsed])
+ false)
+ (case (&;parse "" [default-cursor +0
+ (format "#(" unbalanced-comment ")#" ")#"
+ (code;to-text sample))])
+ (#e;Error error)
+ true
+
+ (#e;Success [_ parsed])
+ false)))
+ ))))
diff --git a/stdlib/test/tests.lux b/stdlib/test/tests.lux
index aa816c4d3..ea0aa72f7 100644
--- a/stdlib/test/tests.lux
+++ b/stdlib/test/tests.lux
@@ -70,6 +70,7 @@
(type ["_;" check]
["_;" auto]
["_;" object]))
+ (lang ["lang_;" syntax])
(world ["_;" blob]
["_;" file]
(net ["_;" tcp]
--
cgit v1.2.3