summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dhall_parser/build.rs13
-rw-r--r--dhall_parser/src/dhall.abnf698
2 files changed, 710 insertions, 1 deletions
diff --git a/dhall_parser/build.rs b/dhall_parser/build.rs
index 8dd5875..3ca36b5 100644
--- a/dhall_parser/build.rs
+++ b/dhall_parser/build.rs
@@ -5,7 +5,9 @@ use std::collections::HashMap;
use abnf_to_pest::{PestRuleSettings, abnf_to_pest};
fn main() -> std::io::Result<()> {
- let abnf_path = "../dhall-lang/standard/dhall.abnf";
+ // TODO: upstream changes to grammar
+ // let abnf_path = "../dhall-lang/standard/dhall.abnf";
+ let abnf_path = "src/dhall.abnf";
let visibility_path = "src/dhall.pest.visibility";
let pest_path = "src/dhall.pest";
println!("cargo:rerun-if-changed={}", abnf_path);
@@ -25,9 +27,18 @@ fn main() -> std::io::Result<()> {
rule_settings.insert(line, PestRuleSettings { visible: true, ..Default::default() });
}
}
+ rule_settings.insert("simple_label".to_owned(), PestRuleSettings {
+ visible: true,
+ replace: Some("
+ keyword_raw ~ simple_label_next_char+
+ | !keyword_raw ~ simple_label_first_char ~ simple_label_next_char*
+ ".to_owned()),
+ });
let mut file = File::create(pest_path)?;
+ writeln!(&mut file, "// AUTO-GENERATED FILE. See build.rs.")?;
writeln!(&mut file, "{}", abnf_to_pest(&data, &rule_settings)?)?;
+ writeln!(&mut file, "keyword_raw = _{{ let_raw | in_raw | if_raw | then_raw | else_raw }}")?;
writeln!(&mut file, "final_expression = {{ SOI ~ complete_expression ~ EOI }}")?;
Ok(())
diff --git a/dhall_parser/src/dhall.abnf b/dhall_parser/src/dhall.abnf
new file mode 100644
index 0000000..391741f
--- /dev/null
+++ b/dhall_parser/src/dhall.abnf
@@ -0,0 +1,698 @@
+; ABNF syntax based on RFC 5234
+;
+; The character encoding for Dhall is UTF-8
+;
+; Some notes on implementing this grammar:
+;
+; First, do not use a lexer to tokenize the file before parsing. Instead, treat
+; the individual characters of the file as the tokens to feed into the parser.
+; You should not use a lexer because Dhall's grammar supports two features which
+; cannot be correctly supported by a lexer:
+;
+; * String interpolation (i.e. "foo ${Natural/toInteger bar} baz")
+; * Nested block comments (i.e. "{- foo {- bar -} baz -}")
+;
+; Second, this grammar assumes that your parser can backtrack and/or try
+; multiple parses simultaneously. For example, consider this expression:
+;
+; List ./MyType
+;
+; A parser might first try to parse the period as the beginning of a field
+; selector, only to realize immediately afterwards that `/MyType` is not a valid
+; name for a field. A conforming parser must backtrack so that the expression
+; `./MyType` can instead be correctly interpreted as a relative path
+;
+; Third, if there are multiple valid parses then prefer the first parse
+; according to the ordering of alternatives. That is, the order of evaluation
+; of the alternatives is left-to-right.
+;
+; For example, the grammar for single quoted string literals is:
+;
+; single-quote-continue =
+; "'''" single-quote-continue
+; / "${" complete-expression "}" single-quote-continue
+; / "''${" single-quote-continue
+; / "''"
+; / %x20-10FFFF single-quote-continue
+; / tab single-quote-continue
+; / end-of-line single-quote-continue
+;
+; single-quote-literal = "''" single-quote-continue
+;
+; ... which permits valid parses for the following code:
+;
+; "''''''''''''''''"
+;
+; If you tried to parse all alternatives then there are at least two valid
+; interpretations for the above code:
+;
+; * A single quoted literal with four escape sequences of the form "'''"
+; * i.e. "''" followed by "'''" four times in a row followed by "''"
+; * Four empty single quoted literals
+; * i.e. "''''" four times in a row
+;
+; The correct interpretation is the first one because parsing the escape
+; sequence "'''" takes precedence over parsing the termination sequence "''",
+; according to the order of the alternatives in the `single-quote-continue`
+; rule.
+;
+; Some parsing libraries do not backtrack by default but allow the user to
+; selectively backtrack in certain parts of the grammar. Usually parsing
+; libraries do this to improve efficiency and error messages. Dhall's grammar
+; takes that into account by minimizing the number of rules that require the
+; parser to backtrack and comments below will highlight where you need to
+; explicitly backtrack
+;
+; Specifically, if you see an uninterrupted literal in a grammar rule such as:
+;
+; "->"
+;
+; ... or:
+;
+; %x66.6f.72.61.6c.6c
+;
+; ... then that string literal is parsed as a single unit, meaning that you
+; should backtrack if you parse only part of the literal
+;
+; In all other cases you can assume that you do not need to backtrack unless
+; there is a comment explicitly asking you to backtrack
+;
+; When parsing a repeated construct, prefer alternatives that parse as many
+; repetitions as possible. On in other words:
+;
+; [a] = a / ""
+;
+; a* = a* a / ""
+;
+; Note that the latter rule also specifies that repetition produces
+; left-associated expressions. For example, function application is
+; left-associative and all operators are left-associative when they are not
+; parenthesized.
+;
+; Additionally, try alternatives in an order that minimizes backtracking
+; according to the following rule:
+;
+; (a / b) (c / d) = a c / a d / b c / b d
+
+; NOTE: There are many line endings in the wild
+;
+; See: https://en.wikipedia.org/wiki/Newline
+;
+; For simplicity this supports Unix and Windows line-endings, which are the most
+; common
+end-of-line =
+ %x0A ; "\n"
+ / %x0D.0A ; "\r\n"
+
+tab = %x09 ; "\t"
+
+block-comment = "{-" block-comment-continue
+
+block-comment-chunk =
+ block-comment
+ / %x20-10FFFF
+ / tab
+ / end-of-line
+
+block-comment-continue = "-}" / block-comment-chunk block-comment-continue
+
+not-end-of-line = %x20-10FFFF / tab
+
+; NOTE: Slightly different from Haskell-style single-line comments because this
+; does not require a space after the dashes
+line-comment = "--" *not-end-of-line end-of-line
+
+whitespace-chunk =
+ " "
+ / tab
+ / end-of-line
+ / line-comment
+ / block-comment
+
+whitespace = *whitespace-chunk
+
+nonempty-whitespace = 1*whitespace-chunk
+
+; Uppercase or lowercase ASCII letter
+ALPHA = %x41-5A / %x61-7A
+
+; ASCII digit
+DIGIT = %x30-39 ; 0-9
+
+HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
+
+; A simple label cannot be one of the following reserved keywords:
+;
+; * if
+; * then
+; * else
+; * let
+; * in
+; * as
+; * using
+; * merge
+simple-label-first-char = ALPHA / "_"
+simple-label-next-char = ALPHA / DIGIT / "-" / "/" / "_"
+simple-label = simple-label-first-char *simple-label-next-char
+
+quoted-label = 1*(ALPHA / DIGIT / "-" / "/" / "_" / ":" / "." / "$")
+
+; NOTE: Dhall does not support Unicode labels, mainly to minimize the potential
+; for code obfuscation
+label-raw = ("`" quoted-label "`" / simple-label)
+label = label-raw whitespace
+
+; Dhall's double-quoted strings are equivalent to JSON strings except with
+; support for string interpolation (and escaping string interpolation)
+;
+; Dhall uses almost the same escaping rules as JSON (RFC7159) with one
+; exception: Dhall adds a new `\$` escape sequence for dollar signs. This
+; additional escape sequences lets you escape string interpolation by writing
+; `\${`
+;
+; > The representation of strings is similar to conventions used in the C
+; > family of programming languages. A string begins and ends with
+; > quotation marks. All Unicode characters may be placed within the
+; > quotation marks, except for the characters that must be escaped:
+; > quotation mark, reverse solidus, and the control characters (U+0000
+; > through U+001F).
+; >
+; > Any character may be escaped. If the character is in the Basic
+; > Multilingual Plane (U+0000 through U+FFFF), then it may be
+; > represented as a six-character sequence: a reverse solidus, followed
+; > by the lowercase letter u, followed by four hexadecimal digits that
+; > encode the character's code point. The hexadecimal letters A though
+; > F can be upper or lower case. So, for example, a string containing
+; > only a single reverse solidus character may be represented as
+; > "\u005C".
+; >
+; > Alternatively, there are two-character sequence escape
+; > representations of some popular characters. So, for example, a
+; > string containing only a single reverse solidus character may be
+; > represented more compactly as "\\".
+; >
+; > To escape an extended character that is not in the Basic Multilingual
+; > Plane, the character is represented as a 12-character sequence,
+; > encoding the UTF-16 surrogate pair. So, for example, a string
+; > containing only the G clef character (U+1D11E) may be represented as
+; > "\uD834\uDD1E".
+double-quote-chunk =
+ interpolation
+ ; '\'
+ / %x5C double-quote-escaped
+ / double-quote-char
+
+double-quote-escaped =
+ %x22 ; '"' quotation mark U+0022
+ / %x24 ; '$' dollar sign U+0024
+ / %x5C ; '\' reverse solidus U+005C
+ / %x2F ; '/' solidus U+002F
+ / %x62 ; 'b' backspace U+0008
+ / %x66 ; 'f' form feed U+000C
+ / %x6E ; 'n' line feed U+000A
+ / %x72 ; 'r' carriage return U+000D
+ / %x74 ; 't' tab U+0009
+ / %x75 4HEXDIG ; 'uXXXX' U+XXXX
+
+; Printable characters except double quote and backslash
+double-quote-char =
+ %x20-21
+ ; %x22 = '"'
+ / %x23-5B
+ ; %x5C = "\"
+ / %x5D-10FFFF
+
+double-quote-literal = %x22 *double-quote-chunk %x22
+
+; NOTE: The only way to end a single-quote string literal with a single quote is
+; to either interpolate the single quote, like this:
+;
+; ''ABC${"'"}''
+;
+; ... or concatenate another string, like this:
+;
+; ''ABC'' ++ "'"
+;
+; If you try to end the string literal with a single quote then you get "'''",
+; which is interpreted as an escaped pair of single quotes
+single-quote-continue =
+ interpolation single-quote-continue
+ / escaped-quote-pair single-quote-continue
+ / escaped-interpolation single-quote-continue
+ / single-quote-char single-quote-continue
+ / "''" ; End of text literal
+
+; Escape two single quotes (i.e. replace this sequence with "''")
+escaped-quote-pair = "'''"
+
+; Escape interpolation (i.e. replace this sequence with "${")
+escaped-interpolation = "''${"
+
+single-quote-char =
+ %x20-10FFFF
+ / tab
+ / end-of-line
+
+single-quote-literal = "''" end-of-line single-quote-continue
+
+; Interpolation
+interpolation = "${" complete-expression "}"
+
+text-literal-raw = (double-quote-literal / single-quote-literal)
+
+; RFC 5234 interprets string literals as case-insensitive and recommends using
+; hex instead for case-sensitive strings
+;
+; If you don't feel like reading hex, these are all the same as the rule name,
+; except without the '-raw' ending.
+if-raw = %x69.66
+then-raw = %x74.68.65.6e
+else-raw = %x65.6c.73.65
+let-raw = %x6c.65.74
+in-raw = %x69.6e
+as-raw = %x61.73
+using-raw = %x75.73.69.6e.67
+merge-raw = %x6d.65.72.67.65
+missing-raw = %x6d.69.73.73.69.6e.67
+Optional-raw = %x4f.70.74.69.6f.6e.61.6c
+Text-raw = %x54.65.78.74
+List-raw = %x4c.69.73.74
+Infinity-raw = %x49.6e.66.69.6e.69.74.79
+
+; Whitespaced rules for reserved words, to be used when matching expressions
+if = if-raw nonempty-whitespace
+then = then-raw nonempty-whitespace
+else = else-raw nonempty-whitespace
+let = let-raw nonempty-whitespace
+in = in-raw nonempty-whitespace
+as = as-raw nonempty-whitespace
+using = using-raw nonempty-whitespace
+merge = merge-raw nonempty-whitespace
+Optional = Optional-raw whitespace
+Text = Text-raw whitespace
+List = List-raw whitespace
+
+equal = "=" whitespace
+or = "||" whitespace
+plus = "+" nonempty-whitespace ; To disambiguate `f +2`
+text-append = "++" whitespace
+list-append = "#" nonempty-whitespace ; To disambiguate `http://a/a#a`
+and = "&&" whitespace
+times = "*" whitespace
+double-equal = "==" whitespace
+not-equal = "!=" whitespace
+dot = "." whitespace
+bar = "|" whitespace
+comma = "," whitespace
+at = "@" whitespace
+colon = ":" nonempty-whitespace ; To disambiguate `env:VARIABLE` from type annotations
+import-alt = "?" nonempty-whitespace ; To disambiguate `http://a/a?a`
+open-parens = "(" whitespace
+close-parens-raw = ")"
+close-parens = ")" whitespace
+open-brace = "{" whitespace
+close-brace-raw = "}"
+close-brace = "}" whitespace
+open-bracket = "[" whitespace
+close-bracket-raw = "]"
+close-bracket = "]" whitespace
+open-angle = "<" whitespace
+close-angle-raw = ">"
+close-angle = ">" whitespace
+
+combine = ( %x2227 / "/\" ) whitespace
+combine-types = ( %x2A53 / "//\\" ) whitespace
+prefer = ( %x2AFD / "//" ) whitespace
+lambda = ( %x3BB / "\" ) whitespace
+forall = ( %x2200 / %x66.6f.72.61.6c.6c ) whitespace
+arrow = ( %x2192 / "->" ) whitespace
+
+exponent = "e" [ "+" / "-" ] 1*DIGIT
+
+double-literal-raw = [ "+" / "-" ] 1*DIGIT ( "." 1*DIGIT [ exponent ] / exponent)
+
+natural-literal-raw = 1*DIGIT
+
+integer-literal-raw = ( "+" / "-" ) natural-literal-raw
+
+identifier-raw = label-raw [ whitespace at natural-literal-raw ]
+identifier = identifier-raw whitespace
+
+; Printable characters other than " ()[]{}<>/\,"
+;
+; Excluding those characters ensures that paths don't have to end with trailing
+; whitespace most of the time
+path-character =
+ ; %x20 = " "
+ %x21
+ ; %x22 = "\""
+ ; %x23 = "#"
+ / %x24-27
+ ; %x28 = "("
+ ; %x29 = ")"
+ / %x2A-2B
+ ; %x2C = ","
+ / %x2D-2E
+ ; %x2F = "/"
+ / %x30-3B
+ ; %x3C = "<"
+ / %x3D
+ ; %x3E = ">"
+ ; %x3F = "?"
+ / %x40-5A
+ ; %x5B = "["
+ ; %x5C = "\"
+ ; %x5D = "]"
+ / %x5E-7A
+ ; %x7B = "{"
+ / %x7C
+ ; %x7D = "}"
+ / %x7E
+
+quoted-path-character =
+ %x20-21
+ ; %x22 = "\""
+ / %x23-2E
+ ; %x2F = "/"
+ / %x30-10FFFF
+
+
+path-component = "/" ( 1*path-character / %x22 1*quoted-path-character %x22 )
+
+path = 1*path-component
+
+local-raw =
+ ".." path ; Relative path
+ / "." path ; Relative path
+ / "~" path ; Home-anchored path
+ ; NOTE: Backtrack if parsing this alternative fails
+ ;
+ ; This is because the first character of this alternative will be "/", but
+ ; if the second character is "/" or "\" then this should have been parsed
+ ; as an operator instead of a path
+ / path ; Absolute path
+
+; `http[s]` URI grammar based on RFC7230 and RFC 3986 with some differences
+; noted below
+
+scheme = %x68.74.74.70 [ %x73 ] ; "http" [ "s" ]
+
+; NOTE: This does not match the official grammar for a URI. Specifically, this
+; replaces `path-abempty` with `path`
+http-raw = scheme "://" authority path [ "?" query ] [ "#" fragment ]
+
+; NOTE: Backtrack if parsing the optional user info prefix fails
+authority = [ userinfo "@" ] host [ ":" port ]
+
+userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+
+host = IP-literal / IPv4address / reg-name
+
+port = *DIGIT
+
+IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+
+IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+
+; NOTE: Backtrack when parsing each alternative
+IPv6address = 6( h16 ":" ) ls32
+ / "::" 5( h16 ":" ) ls32
+ / [ h16 ] "::" 4( h16 ":" ) ls32
+ / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+ / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+ / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+ / [ *4( h16 ":" ) h16 ] "::" ls32
+ / [ *5( h16 ":" ) h16 ] "::" h16
+ / [ *6( h16 ":" ) h16 ] "::"
+
+h16 = 1*4HEXDIG
+
+ls32 = ( h16 ":" h16 ) / IPv4address
+
+IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+
+; NOTE: Backtrack when parsing these alternatives and try them in reverse order
+dec-octet = DIGIT ; 0-9
+ / %x31-39 DIGIT ; 10-99
+ / "1" 2DIGIT ; 100-199
+ / "2" %x30-34 DIGIT ; 200-249
+ / "25" %x30-35 ; 250-255
+
+reg-name = *( unreserved / pct-encoded / sub-delims )
+
+pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+
+query = *( pchar / "/" / "?" )
+
+fragment = *( pchar / "/" / "?" )
+
+pct-encoded = "%" HEXDIG HEXDIG
+
+unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+
+sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+
+http =
+ http-raw
+ [ whitespace using (import-hashed-raw / open-parens import-hashed-raw whitespace close-parens-raw) ]
+
+; Dhall supports unquoted environment variables that are Bash-compliant or
+; quoted environment variables that are POSIX-compliant
+env-raw = "env:"
+ ( bash-environment-variable
+ / %x22 posix-environment-variable %x22
+ )
+
+; Bash supports a restricted subset of POSIX environment variables. From the
+; Bash `man` page, an environment variable name is:
+;
+; > A word consisting only of alphanumeric characters and under-scores, and
+; > beginning with an alphabetic character or an under-score
+bash-environment-variable = (ALPHA / "_") *(ALPHA / DIGIT / "_")
+
+; The POSIX standard is significantly more flexible about legal environment
+; variable names, which can contain alerts (i.e. '\a'), whitespace, or
+; punctuation, for example. The POSIX standard says about environment variable
+; names:
+;
+; > The value of an environment variable is a string of characters. For a
+; > C-language program, an array of strings called the environment shall be made
+; > available when a process begins. The array is pointed to by the external
+; > variable environ, which is defined as:
+; >
+; > extern char **environ;
+; >
+; > These strings have the form name=value; names shall not contain the
+; > character '='. For values to be portable across systems conforming to IEEE
+; > Std 1003.1-2001, the value shall be composed of characters from the portable
+; > character set (except NUL and as indicated below).
+;
+; Note that the standard does not explicitly state that the name must have at
+; least one character, but `env` does not appear to support this and `env`
+; claims to be POSIX-compliant. To be safe, Dhall requires at least one
+; character like `env`
+posix-environment-variable = 1*posix-environment-variable-character
+
+; These are all the characters from the POSIX Portable Character Set except for
+; '\0' (NUL) and '='. Note that the POSIX standard does not explicitly state
+; that environment variable names cannot have NUL. However, this is implicit
+; in the fact that environment variables are passed to the program as
+; NUL-terminated `name=value` strings, which implies that the `name` portion of
+; the string cannot have NUL characters
+posix-environment-variable-character =
+ %x5C ; '\' Beginning of escape sequence
+ ( %x22 ; '"' quotation mark U+0022
+ / %x5C ; '\' reverse solidus U+005C
+ / %x61 ; 'a' alert U+0007
+ / %x62 ; 'b' backspace U+0008
+ / %x66 ; 'f' form feed U+000C
+ / %x6E ; 'n' line feed U+000A
+ / %x72 ; 'r' carriage return U+000D
+ / %x74 ; 't' tab U+0009
+ / %x76 ; 'v' vertical tab U+000B
+ )
+ ; Printable characters except double quote, backslash and equals
+ / %x20-21
+ ; %x22 = '"'
+ / %x23-3C
+ ; %x3D = '='
+ / %x3E-5B
+ ; %x5C = "\"
+ / %x5D-7E
+
+import-type-raw = missing-raw / local-raw / http / env-raw
+
+hash-raw = %x73.68.61.32.35.36.3a 64HEXDIG ; "sha256:XXX...XXX"
+
+import-hashed-raw = import-type-raw [ whitespace hash-raw ]
+
+; "http://example.com"
+; "./foo/bar"
+; "env:FOO"
+import-raw = import-hashed-raw [ whitespace as Text-raw ]
+
+; NOTE: Every rule past this point should only reference rules that end with
+; whitespace. This ensures consistent handling of whitespace in the absence of
+; a separate lexing step.
+; The exception is the rules ending in -raw, which should _not_ end in whitespace.
+; This is important to avoid the need for sequential backtracking in application-expression.
+
+; An arbitrary dhall expression. Only use in a context where parentheses or
+; keywords prevent possible ambiguity. See also atomic-expression.
+expression =
+ lambda-expression
+ / ifthenelse-expression
+ / let-expression
+ / forall-expression
+ ; NOTE: Backtrack if parsing this alternative fails
+ / arrow-expression
+ / merge-expression
+ ; NOTE: Backtrack if parsing this alternative fails since we can't tell
+ ; from the opening bracket whether or not this will be an empty list or
+ ; a non-empty list
+ / empty-list-or-optional
+ / annotated-expression
+
+; "\(x : a) -> b"
+lambda-expression = lambda open-parens label colon expression close-parens arrow expression
+
+; "if a then b else c"
+ifthenelse-expression = if expression then expression else expression
+
+; "let x : t = e1 in e2"
+; "let x = e1 in e2"
+; "let x = e1 let y = e2 in e3"
+let-expression = 1*let-binding in expression
+let-binding = let label [ colon expression ] equal expression
+
+; "forall (x : a) -> b"
+forall-expression = forall open-parens label colon expression close-parens arrow expression
+
+; "a -> b"
+arrow-expression = operator-expression arrow expression
+
+; "merge e1 e2 : t"
+; "merge e1 e2"
+merge-expression = merge atomic-expression atomic-expression [ colon application-expression ]
+
+; "[] : List t"
+; "[] : Optional t"
+; "[x] : Optional t"
+empty-list-or-optional = open-bracket (empty-collection / non-empty-optional)
+empty-collection = close-bracket colon (List / Optional) atomic-expression
+non-empty-optional = expression close-bracket colon Optional atomic-expression
+
+; "x : t"
+annotated-expression = operator-expression [ colon expression ]
+
+
+operator-expression = import-alt-expression
+
+import-alt-expression = or-expression *(import-alt or-expression)
+or-expression = plus-expression *(or plus-expression )
+plus-expression = text-append-expression *(plus text-append-expression )
+text-append-expression = list-append-expression *(text-append list-append-expression )
+list-append-expression = and-expression *(list-append and-expression )
+and-expression = combine-expression *(and combine-expression )
+combine-expression = prefer-expression *(combine prefer-expression )
+prefer-expression = combine-types-expression *(prefer combine-types-expression)
+combine-types-expression = times-expression *(combine-types times-expression )
+times-expression = equal-expression *(times equal-expression )
+equal-expression = not-equal-expression *(double-equal not-equal-expression )
+not-equal-expression = application-expression *(not-equal application-expression )
+
+; Import expressions need to be separated by some whitespace, otherwise there
+; would be ambiguity: `./ab` could be interpreted as "import the file `./ab`",
+; or "apply the import `./a` to label `b`"
+; The -raw handling is important for greedy parsers, that can't do sequential backtracking.
+application-expression = atomic-expression-raw *(nonempty-whitespace atomic-expression-raw) whitespace
+
+; An expression that does not need to be surrounded by parentheses to disambiguate
+atomic-expression = atomic-expression-raw whitespace
+atomic-expression-raw =
+ import-raw
+ / selector-expression-raw
+
+; `record.field` extracts one field of a record
+; `record.{ field0, field1, field2 }` projects out several fields of a record
+;
+; NOTE: Backtrack when parsing the `*(dot ...)`. The reason why is that you
+; can't tell from parsing just the period whether "foo." will become "foo.bar"
+; (i.e. accessing field `bar` of the record `foo`) or `foo./bar` (i.e. applying
+; the function `foo` to the relative path `./bar`)
+selector-expression-raw = primitive-expression-raw *(whitespace dot selector-raw)
+
+selector-raw = label-raw / labels-raw
+
+labels-raw = open-brace [ label *(comma label) ] close-brace-raw
+
+
+primitive-expression-raw =
+ literal-expression-raw
+ / open-brace record-type-or-literal close-brace-raw
+ / open-angle union-type-or-literal close-angle-raw
+ / non-empty-list-literal-raw
+ / parenthesized-expression-raw
+
+; NOTE: Backtrack when parsing the first three alternatives (i.e. the numeric
+; literals). This is because they share leading characters in common
+literal-expression-raw =
+ ; "2.0"
+ double-literal-raw
+
+ ; "2"
+ / natural-literal-raw
+
+ ; "+2"
+ / integer-literal-raw
+
+ ; "-Infinity"
+ / "-" Infinity-raw
+
+ ; '"ABC"'
+ / text-literal-raw
+
+ ; "x"
+ ; "x@2"
+ / identifier-raw
+
+; "{ foo = 1 , bar = True }"
+; "{ foo : Integer, bar : Bool }"
+record-type-or-literal =
+ empty-record-literal
+ / non-empty-record-type-or-literal
+ / empty-record-type
+empty-record-literal = equal
+empty-record-type = ""
+non-empty-record-type-or-literal =
+ label (non-empty-record-literal / non-empty-record-type)
+non-empty-record-type = colon expression *(comma record-type-entry)
+record-type-entry = label colon expression
+non-empty-record-literal = equal expression *(comma record-literal-entry)
+record-literal-entry = label equal expression
+
+; "< Foo : Integer | Bar : Bool >"
+; "< Foo : Integer | Bar = True >"
+union-type-or-literal =
+ non-empty-union-type-or-literal
+ / empty-union-type
+empty-union-type = ""
+non-empty-union-type-or-literal =
+ label
+ ( equal expression union-type-entries
+ / colon expression [ bar non-empty-union-type-or-literal ]
+ )
+union-type-entries = *(bar union-type-entry)
+union-type-entry = label colon expression
+
+; "[1, 2, 3]"
+; `empty-list-or-optional` handles empty lists
+non-empty-list-literal-raw = open-bracket expression *(comma expression) close-bracket-raw
+
+; "( e )"
+parenthesized-expression-raw = open-parens expression close-parens-raw
+
+
+; All expressions end with trailing whitespace. This just adds a final
+; whitespace prefix for the top-level of the program
+complete-expression = whitespace expression