/// ; ABNF syntax based on RFC 5234 /// ; /// ; The character encoding for Dhall is UTF-8 /// ; /// ; Some notes on implementing this grammar: /// ; /// ; First, do not use a lexer to tokenize the file before parsing. Instead, treat /// ; the individual characters of the file as the tokens to feed into the parser. /// ; You should not use a lexer because Dhall's grammar supports two features which /// ; cannot be correctly supported by a lexer: /// ; /// ; * String interpolation (i.e. "foo ${Natural/toInteger bar} baz") /// ; * Nested block comments (i.e. "{- foo {- bar -} baz -}") /// ; /// ; Second, this grammar assumes that your parser can backtrack and/or try /// ; multiple parses simultaneously. For example, consider this expression: /// ; /// ; List ./MyType /// ; /// ; A parser might first try to parse the period as the beginning of a field /// ; selector, only to realize immediately afterwards that `/MyType` is not a valid /// ; name for a field. A conforming parser must backtrack so that the expression /// ; `./MyType` can instead be correctly interpreted as a relative path /// ; /// ; Third, if there are multiple valid parses then prefer the first parse /// ; according to the ordering of alternatives. That is, the order of evaluation /// ; of the alternatives is left-to-right. /// ; /// ; For example, the grammar for single quoted string literals is: /// ; /// ; single-quote-continue = /// ; "'''" single-quote-continue /// ; / "${" complete-expression "}" single-quote-continue /// ; / "''${" single-quote-continue /// ; / "''" /// ; / %x20-10FFFF single-quote-continue /// ; / tab single-quote-continue /// ; / end-of-line single-quote-continue /// ; /// ; single-quote-literal = "''" single-quote-continue /// ; /// ; ... which permits valid parses for the following code: /// ; /// ; "''''''''''''''''" /// ; /// ; If you tried to parse all alternatives then there are at least two valid /// ; interpretations for the above code: /// ; /// ; * A single quoted literal with four escape sequences of the form "'''" /// ; * i.e. "''" followed by "'''" four times in a row followed by "''" /// ; * Four empty single quoted literals /// ; * i.e. "''''" four times in a row /// ; /// ; The correct interpretation is the first one because parsing the escape /// ; sequence "'''" takes precedence over parsing the termination sequence "''", /// ; according to the order of the alternatives in the `single-quote-continue` /// ; rule. /// ; /// ; Some parsing libraries do not backtrack by default but allow the user to /// ; selectively backtrack in certain parts of the grammar. Usually parsing /// ; libraries do this to improve efficiency and error messages. Dhall's grammar /// ; takes that into account by minimizing the number of rules that require the /// ; parser to backtrack and comments below will highlight where you need to /// ; explicitly backtrack /// ; /// ; Specifically, if you see an uninterrupted literal in a grammar rule such as: /// ; /// ; "->" /// ; /// ; ... or: /// ; /// ; %x66.6f.72.61.6c.6c /// ; /// ; ... then that string literal is parsed as a single unit, meaning that you /// ; should backtrack if you parse only part of the literal /// ; /// ; In all other cases you can assume that you do not need to backtrack unless /// ; there is a comment explicitly asking you to backtrack /// ; /// ; When parsing a repeated construct, prefer alternatives that parse as many /// ; repetitions as possible. On in other words: /// ; /// ; [a] = a / "" /// ; /// ; a* = a* a / "" /// ; /// ; Note that the latter rule also specifies that repetition produces /// ; left-associated expressions. For example, function application is /// ; left-associative and all operators are left-associative when they are not /// ; parenthesized. /// ; /// ; Additionally, try alternatives in an order that minimizes backtracking /// ; according to the following rule: /// ; /// ; (a / b) (c / d) = a c / a d / b c / b d /// /// ; NOTE: There are many line endings in the wild /// ; /// ; See: https://en.wikipedia.org/wiki/Newline /// ; /// ; For simplicity this supports Unix and Windows line-endings, which are the most /// ; common /// end-of-line = /// %x0A ; "\n" /// / %x0D.0A ; "\r\n" /// /// tab = %x09 ; "\t" /// /// block-comment = "{-" block-comment-continue /// /// block-comment-chunk = /// block-comment /// / %x20-10FFFF /// / tab /// / end-of-line /// /// block-comment-continue = "-}" / block-comment-chunk block-comment-continue /// /// not-end-of-line = %x20-10FFFF / tab /// /// ; NOTE: Slightly different from Haskell-style single-line comments because this /// ; does not require a space after the dashes /// line-comment = "--" *not-end-of-line end-of-line /// /// whitespace-chunk = /// " " /// / tab /// / end-of-line /// / line-comment /// / block-comment whitespace_chunk = _{ " " // | tab // | end_of_line // | line_comment // | block_comment } /// /// whitespace = *whitespace-chunk whitespace = _{ whitespace_chunk* } /// /// nonempty-whitespace = 1*whitespace-chunk nonempty_whitespace = _{ whitespace_chunk+ } /// /// ; Uppercase or lowercase ASCII letter /// ALPHA = %x41-5A / %x61-7A /// /// ; ASCII digit /// DIGIT = %x30-39 ; 0-9 /// /// HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" /// /// ; A simple label cannot be one of the following reserved names: /// ; /// ; * Bool /// ; * Optional /// ; * None /// ; * Natural /// ; * Integer /// ; * Double /// ; * Text /// ; * List /// ; * True /// ; * False /// ; * NaN /// ; * Infinity /// ; * Type /// ; * Kind /// ; * Sort /// ; * Natural/fold /// ; * Natural/build /// ; * Natural/isZero /// ; * Natural/even /// ; * Natural/odd /// ; * Natural/toInteger /// ; * Natural/show /// ; * Integer/toDouble /// ; * Integer/show /// ; * Double/show /// ; * List/build /// ; * List/fold /// ; * List/length /// ; * List/head /// ; * List/last /// ; * List/indexed /// ; * List/reverse /// ; * Optional/fold /// ; * Optional/build /// ; * Text/show /// ; * if /// ; * then /// ; * else /// ; * let /// ; * in /// ; * as /// ; * using /// ; * merge /// ; * constructors /// ; * Some /// simple-label = (ALPHA / "_") *(ALPHA / DIGIT / "-" / "/" / "_") /// /// quoted-label = 1*(ALPHA / DIGIT / "-" / "/" / "_" / ":" / "." / "$") /// /// ; NOTE: Dhall does not support Unicode labels, mainly to minimize the potential /// ; for code obfuscation /// label = ("`" quoted-label "`" / simple-label) whitespace /// /// ; Dhall's double-quoted strings are equivalent to JSON strings except with /// ; support for string interpolation (and escaping string interpolation) /// ; /// ; Dhall uses almost the same escaping rules as JSON (RFC7159) with one /// ; exception: Dhall adds a new `\$` escape sequence for dollar signs. This /// ; additional escape sequences lets you escape string interpolation by writing /// ; `\${` /// ; /// ; > The representation of strings is similar to conventions used in the C /// ; > family of programming languages. A string begins and ends with /// ; > quotation marks. All Unicode characters may be placed within the /// ; > quotation marks, except for the characters that must be escaped: /// ; > quotation mark, reverse solidus, and the control characters (U+0000 /// ; > through U+001F). /// ; > /// ; > Any character may be escaped. If the character is in the Basic /// ; > Multilingual Plane (U+0000 through U+FFFF), then it may be /// ; > represented as a six-character sequence: a reverse solidus, followed /// ; > by the lowercase letter u, followed by four hexadecimal digits that /// ; > encode the character's code point. The hexadecimal letters A though /// ; > F can be upper or lower case. So, for example, a string containing /// ; > only a single reverse solidus character may be represented as /// ; > "\u005C". /// ; > /// ; > Alternatively, there are two-character sequence escape /// ; > representations of some popular characters. So, for example, a /// ; > string containing only a single reverse solidus character may be /// ; > represented more compactly as "\\". /// ; > /// ; > To escape an extended character that is not in the Basic Multilingual /// ; > Plane, the character is represented as a 12-character sequence, /// ; > encoding the UTF-16 surrogate pair. So, for example, a string /// ; > containing only the G clef character (U+1D11E) may be represented as /// ; > "\uD834\uDD1E". /// double-quote-chunk = /// "${" complete-expression "}" ; Interpolation /// / %x5C ; '\' Beginning of escape sequence /// ( %x22 ; '"' quotation mark U+0022 /// / %x24 ; '$' dollar sign U+0024 /// / %x5C ; '\' reverse solidus U+005C /// / %x2F ; '/' solidus U+002F /// / %x62 ; 'b' backspace U+0008 /// / %x66 ; 'f' form feed U+000C /// / %x6E ; 'n' line feed U+000A /// / %x72 ; 'r' carriage return U+000D /// / %x74 ; 't' tab U+0009 /// / %x75 4HEXDIG ; 'uXXXX' U+XXXX /// ) /// ; Printable characters except double quote and backslash /// / %x20-21 /// ; %x22 = '"' /// / %x23-5B /// ; %x5C = "\" /// / %x5D-10FFFF /// /// double-quote-literal = %x22 *double-quote-chunk %x22 /// /// ; NOTE: The only way to end a single-quote string literal with a single quote is /// ; to either interpolate the single quote, like this: /// ; /// ; ''ABC${"'"}'' /// ; /// ; ... or concatenate another string, like this: /// ; /// ; ''ABC'' ++ "'" /// ; /// ; If you try to end the string literal with a single quote then you get "'''", /// ; which is interpreted as an escaped pair of single quotes /// single-quote-continue = /// ; Escape two single quotes (i.e. replace this sequence with "''") /// "'''" single-quote-continue /// ; Interpolation /// / "${" complete-expression "}" single-quote-continue /// ; Escape interpolation (i.e. replace this sequence with "${") /// / "''${" single-quote-continue /// / "''" ; End of text literal /// / %x20-10FFFF single-quote-continue /// / tab single-quote-continue /// / end-of-line single-quote-continue /// /// single-quote-literal = "''" end-of-line single-quote-continue /// /// text-literal = (double-quote-literal / single-quote-literal) whitespace /// /// ; RFC 5234 interprets string literals as case-insensitive and recommends using /// ; hex instead for case-sensitive strings /// ; /// ; If you don't feel like reading hex, these are all the same as the rule name, /// ; except without the '-raw' ending, and converting dashes in the rule name /// ; to forward slashes /// if-raw = %x69.66 /// then-raw = %x74.68.65.6e /// else-raw = %x65.6c.73.65 /// let-raw = %x6c.65.74 /// in-raw = %x69.6e /// as-raw = %x61.73 /// using-raw = %x75.73.69.6e.67 /// merge-raw = %x6d.65.72.67.65 /// missing-raw = %x6d.69.73.73.69.6e.67 /// Some-raw = %x53.6f.6d.65 /// constructors-raw = %x63.6f.6e.73.74.72.75.63.74.6f.72.73 /// Natural-fold-raw = %x4e.61.74.75.72.61.6c.2f.66.6f.6c.64 /// Natural-build-raw = %x4e.61.74.75.72.61.6c.2f.62.75.69.6c.64 /// Natural-isZero-raw = %x4e.61.74.75.72.61.6c.2f.69.73.5a.65.72.6f /// Natural-even-raw = %x4e.61.74.75.72.61.6c.2f.65.76.65.6e /// Natural-odd-raw = %x4e.61.74.75.72.61.6c.2f.6f.64.64 /// Natural-toInteger-raw = %x4e.61.74.75.72.61.6c.2f.74.6f.49.6e.74.65.67.65.72 /// Natural-show-raw = %x4e.61.74.75.72.61.6c.2f.73.68.6f.77 /// Integer-toDouble-raw = %x49.6e.74.65.67.65.72.2f.74.6f.44.6f.75.62.6c.65 /// Integer-show-raw = %x49.6e.74.65.67.65.72.2f.73.68.6f.77 /// Double-show-raw = %x44.6f.75.62.6c.65.2f.73.68.6f.77 /// List-build-raw = %x4c.69.73.74.2f.62.75.69.6c.64 /// List-fold-raw = %x4c.69.73.74.2f.66.6f.6c.64 /// List-length-raw = %x4c.69.73.74.2f.6c.65.6e.67.74.68 /// List-head-raw = %x4c.69.73.74.2f.68.65.61.64 /// List-last-raw = %x4c.69.73.74.2f.6c.61.73.74 /// List-indexed-raw = %x4c.69.73.74.2f.69.6e.64.65.78.65.64 /// List-reverse-raw = %x4c.69.73.74.2f.72.65.76.65.72.73.65 /// Optional-fold-raw = %x4f.70.74.69.6f.6e.61.6c.2f.66.6f.6c.64 /// Optional-build-raw = %x4f.70.74.69.6f.6e.61.6c.2f.62.75.69.6c.64 /// Text-show-raw = %x54.65.78.74.2f.73.68.6f.77 /// Bool-raw = %x42.6f.6f.6c /// Optional-raw = %x4f.70.74.69.6f.6e.61.6c /// None-raw = %x4e.6f.6e.65 /// Natural-raw = %x4e.61.74.75.72.61.6c /// Integer-raw = %x49.6e.74.65.67.65.72 /// Double-raw = %x44.6f.75.62.6c.65 /// Text-raw = %x54.65.78.74 /// List-raw = %x4c.69.73.74 /// True-raw = %x54.72.75.65 /// False-raw = %x46.61.6c.73.65 /// NaN-raw = %x4e.61.4e /// Infinity-raw = %x49.6e.66.69.6e.69.74.79 /// Type-raw = %x54.79.70.65 /// Kind-raw = %x4b.69.6e.64 /// Sort-raw = %x53.6f.72.74 /// /// reserved-raw = /// Bool-raw /// / Optional-raw /// / None-raw /// / Natural-raw /// / Integer-raw /// / Double-raw /// / Text-raw /// / List-raw /// / True-raw /// / False-raw /// / NaN-raw /// / Infinity-raw /// / Type-raw /// / Kind-raw /// / Sort-raw /// /// reserved-namespaced-raw = /// Natural-fold-raw /// / Natural-build-raw /// / Natural-isZero-raw /// / Natural-even-raw /// / Natural-odd-raw /// / Natural-toInteger-raw /// / Natural-show-raw /// / Integer-toDouble-raw /// / Integer-show-raw /// / Double-show-raw /// / List-build-raw /// / List-fold-raw /// / List-length-raw /// / List-head-raw /// / List-last-raw /// / List-indexed-raw /// / List-reverse-raw /// / Optional-fold-raw /// / Optional-build-raw /// / Text-show-raw /// /// reserved = reserved-raw whitespace /// reserved-namespaced = reserved-namespaced-raw whitespace /// /// ; Whitespaced rules for reserved words, to be used when matching expressions /// if = if-raw nonempty-whitespace /// then = then-raw nonempty-whitespace /// else = else-raw nonempty-whitespace /// let = let-raw nonempty-whitespace /// in = in-raw nonempty-whitespace /// as = as-raw nonempty-whitespace /// using = using-raw nonempty-whitespace /// merge = merge-raw nonempty-whitespace /// constructors = constructors-raw nonempty-whitespace /// Some = Some-raw nonempty-whitespace /// /// Optional = Optional-raw whitespace /// Text = Text-raw whitespace /// List = List-raw whitespace /// /// equal = "=" whitespace /// or = "||" whitespace /// plus = "+" nonempty-whitespace ; To disambiguate `f +2` plus = _{ "+" ~ nonempty_whitespace } /// text-append = "++" whitespace /// list-append = "#" nonempty-whitespace ; To disambiguate `http://a/a#a` /// and = "&&" whitespace /// times = "*" whitespace times = _{ "*" ~ nonempty_whitespace } /// double-equal = "==" whitespace /// not-equal = "!=" whitespace /// dot = "." whitespace /// open-brace = "{" whitespace /// close-brace = "}" whitespace /// open-bracket = "[" whitespace /// close-bracket = "]" whitespace /// open-angle = "<" whitespace /// close-angle = ">" whitespace /// bar = "|" whitespace /// comma = "," whitespace /// open-parens = "(" whitespace open_parens = _{ "(" ~ whitespace } /// close-parens = ")" whitespace close_parens = _{ ")" ~ whitespace } /// at = "@" whitespace /// colon = ":" nonempty-whitespace ; To disambiguate `env:VARIABLE` from type annotations colon = _{ ":" ~ nonempty_whitespace } /// import-alt = "?" nonempty-whitespace ; To disambiguate `http://a/a?a` /// /// combine = ( %x2227 / "/\" ) whitespace /// combine-types = ( %x2A53 / "//\\" ) whitespace /// prefer = ( %x2AFD / "//" ) whitespace /// lambda = ( %x3BB / "\" ) whitespace /// forall = ( %x2200 / %x66.6f.72.61.6c.6c ) whitespace /// arrow = ( %x2192 / "->" ) whitespace /// /// exponent = "e" [ "+" / "-" ] 1*DIGIT /// /// double-literal = [ "+" / "-" ] 1*DIGIT ( "." 1*DIGIT [ exponent ] / exponent) whitespace /// /// natural-literal-raw = 1*DIGIT natural_literal_raw = _{ ASCII_DIGIT+ } /// /// integer-literal = ( "+" / "-" ) natural-literal-raw whitespace /// /// natural-literal = natural-literal-raw whitespace natural_literal = { natural_literal_raw ~ whitespace } /// /// identifier = label [ at natural-literal-raw whitespace ] /// /// identifier-reserved-prefix = /// reserved-raw 1*(ALPHA / DIGIT / "-" / "/" / "_") whitespace [ at natural-literal-raw whitespace ] /// /// identifier-reserved-namespaced-prefix = /// reserved-namespaced-raw 1*(ALPHA / DIGIT / "-" / "/" / "_") whitespace [ at natural-literal-raw whitespace ] /// /// missing = missing-raw whitespace /// /// ; Printable characters other than " ()[]{}<>/\," /// ; /// ; Excluding those characters ensures that paths don't have to end with trailing /// ; whitespace most of the time /// path-character = /// ; %x20 = " " /// %x21 /// ; %x22 = "\"" /// ; %x23 = "#" /// / %x24-27 /// ; %x28 = "(" /// ; %x29 = ")" /// / %x2A-2B /// ; %x2C = "," /// / %x2D-2E /// ; %x2F = "/" /// / %x30-3B /// ; %x3C = "<" /// / %x3D /// ; %x3E = ">" /// ; %x3F = "?" /// / %x40-5A /// ; %x5B = "[" /// ; %x5C = "\" /// ; %x5D = "]" /// / %x5E-7A /// ; %x7B = "{" /// / %x7C /// ; %x7D = "}" /// / %x7E /// /// quoted-path-character = /// %x20-21 /// ; %x22 = "\"" /// / %x23-2E /// ; %x2F = "/" /// / %x30-10FFFF /// /// /// path-component = "/" ( 1*path-character / %x22 1*quoted-path-character %x22 ) /// /// directory = *path-component /// /// file = path-component /// /// local-raw = /// ".." directory file ; Relative path /// / "." directory file ; Relative path /// / "~" directory file ; Home-anchored path /// ; NOTE: Backtrack if parsing this alternative fails /// ; /// ; This is because the first character of this alternative will be "/", but /// ; if the second character is "/" or "\" then this should have been parsed /// ; as an operator instead of a path /// / directory file ; Absolute path /// /// local = local-raw whitespace /// /// ; `http[s]` URI grammar based on RFC7230 and RFC 3986 with some differences /// ; noted below /// /// scheme = %x68.74.74.70 [ %x73 ] ; "http" [ "s" ] /// /// ; NOTE: This does not match the official grammar for a URI. Specifically, this /// ; replaces `path-abempty` with `directory file` /// http-raw = scheme "://" authority directory file [ "?" query ] [ "#" fragment ] /// /// ; NOTE: Backtrack if parsing the optional user info prefix fails /// authority = [ userinfo "@" ] host [ ":" port ] /// /// userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) /// /// host = IP-literal / IPv4address / reg-name /// /// port = *DIGIT /// /// IP-literal = "[" ( IPv6address / IPvFuture ) "]" /// /// IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) /// /// ; NOTE: Backtrack when parsing each alternative /// IPv6address = 6( h16 ":" ) ls32 /// / "::" 5( h16 ":" ) ls32 /// / [ h16 ] "::" 4( h16 ":" ) ls32 /// / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 /// / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 /// / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 /// / [ *4( h16 ":" ) h16 ] "::" ls32 /// / [ *5( h16 ":" ) h16 ] "::" h16 /// / [ *6( h16 ":" ) h16 ] "::" /// /// h16 = 1*4HEXDIG /// /// ls32 = ( h16 ":" h16 ) / IPv4address /// /// IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet /// /// ; NOTE: Backtrack when parsing these alternatives and try them in reverse order /// dec-octet = DIGIT ; 0-9 /// / %x31-39 DIGIT ; 10-99 /// / "1" 2DIGIT ; 100-199 /// / "2" %x30-34 DIGIT ; 200-249 /// / "25" %x30-35 ; 250-255 /// /// reg-name = *( unreserved / pct-encoded / sub-delims ) /// /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@" /// /// query = *( pchar / "/" / "?" ) /// /// fragment = *( pchar / "/" / "?" ) /// /// pct-encoded = "%" HEXDIG HEXDIG /// /// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" /// /// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" /// /// http = /// http-raw whitespace /// [ using (import-hashed / open-parens import-hashed close-parens) ] /// /// ; Dhall supports unquoted environment variables that are Bash-compliant or /// ; quoted environment variables that are POSIX-compliant /// env = "env:" /// ( bash-environment-variable /// / %x22 posix-environment-variable %x22 /// ) /// whitespace /// /// ; Bash supports a restricted subset of POSIX environment variables. From the /// ; Bash `man` page, an environment variable name is: /// ; /// ; > A word consisting only of alphanumeric characters and under-scores, and /// ; > beginning with an alphabetic character or an under-score /// bash-environment-variable = (ALPHA / "_") *(ALPHA / DIGIT / "_") /// /// ; The POSIX standard is significantly more flexible about legal environment /// ; variable names, which can contain alerts (i.e. '\a'), whitespace, or /// ; punctuation, for example. The POSIX standard says about environment variable /// ; names: /// ; /// ; > The value of an environment variable is a string of characters. For a /// ; > C-language program, an array of strings called the environment shall be made /// ; > available when a process begins. The array is pointed to by the external /// ; > variable environ, which is defined as: /// ; > /// ; > extern char **environ; /// ; > /// ; > These strings have the form name=value; names shall not contain the /// ; > character '='. For values to be portable across systems conforming to IEEE /// ; > Std 1003.1-2001, the value shall be composed of characters from the portable /// ; > character set (except NUL and as indicated below). /// ; /// ; Note that the standard does not explicitly state that the name must have at /// ; least one character, but `env` does not appear to support this and `env` /// ; claims to be POSIX-compliant. To be safe, Dhall requires at least one /// ; character like `env` /// posix-environment-variable = 1*posix-environment-variable-character /// /// ; These are all the characters from the POSIX Portable Character Set except for /// ; '\0' (NUL) and '='. Note that the POSIX standard does not explicitly state /// ; that environment variable names cannot have NUL. However, this is implicit /// ; in the fact that environment variables are passed to the program as /// ; NUL-terminated `name=value` strings, which implies that the `name` portion of /// ; the string cannot have NUL characters /// posix-environment-variable-character = /// %x5C ; '\' Beginning of escape sequence /// ( %x22 ; '"' quotation mark U+0022 /// / %x5C ; '\' reverse solidus U+005C /// / %x61 ; 'a' alert U+0007 /// / %x62 ; 'b' backspace U+0008 /// / %x66 ; 'f' form feed U+000C /// / %x6E ; 'n' line feed U+000A /// / %x72 ; 'r' carriage return U+000D /// / %x74 ; 't' tab U+0009 /// / %x76 ; 'v' vertical tab U+000B /// ) /// ; Printable characters except double quote, backslash and equals /// / %x20-21 /// ; %x22 = '"' /// / %x23-3C /// ; %x3D = '=' /// / %x3E-5B /// ; %x5C = "\" /// / %x5D-7E /// /// import-type = missing / local / http / env /// /// hash = %x73.68.61.32.35.36.3a 64HEXDIG whitespace ; "sha256:XXX...XXX" /// /// import-hashed = import-type [ hash ] /// /// ; "http://example.com" /// ; "./foo/bar" /// ; "env:FOO" /// import = import-hashed [ as Text ] /// /// ; NOTE: Every rule past this point should only reference rules that end with /// ; whitespace. This ensures consistent handling of whitespace in the absence of /// ; a separate lexing step /// /// expression = /// ; "\(x : a) -> b" /// lambda open-parens label colon expression close-parens arrow expression /// /// ; "if a then b else c" /// / if expression then expression else expression /// /// ; "let x : t = e1 in e2" /// ; "let x = e1 in e2" /// ; "let x = e1 let y = e2 in e3" /// / 1*(let label [ colon expression ] equal expression) in expression /// /// ; "forall (x : a) -> b" /// / forall open-parens label colon expression close-parens arrow expression /// /// ; "a -> b" /// ; /// ; NOTE: Backtrack if parsing this alternative fails /// / operator-expression arrow expression /// /// / annotated-expression expression = _{ annotated_expression } /// /// annotated-expression = /// ; "merge e1 e2 : t" /// ; "merge e1 e2" /// merge import-expression import-expression [ colon application-expression ] /// /// ; "[] : List t" /// ; "[] : Optional t" /// ; "[x] : Optional t" /// ; /// ; NOTE: Backtrack if parsing this alternative fails since we can't tell /// ; from the opening bracket whether or not this will be an empty list or /// ; non-empty list /// / open-bracket (empty-collection / non-empty-optional) /// /// ; "x : t" /// / operator-expression (colon expression / "") annotated_expression = _{ operator_expression ~ (colon ~ expression)? } /// /// empty-collection = close-bracket colon (List / Optional) import-expression /// /// non-empty-optional = expression close-bracket colon Optional import-expression /// /// operator-expression = import-alt-expression operator_expression = _{ plus_expression } /// /// import-alt-expression = or-expression *(import-alt or-expression) // import_alt_expression = { application_expression } /// or-expression = plus-expression *(or plus-expression ) /// plus-expression = text-append-expression *(plus text-append-expression ) plus_expression = { times_expression ~ (plus ~ times_expression)* } /// text-append-expression = list-append-expression *(text-append list-append-expression ) /// list-append-expression = and-expression *(list-append and-expression ) /// and-expression = combine-expression *(and combine-expression ) /// combine-expression = prefer-expression *(combine prefer-expression ) /// prefer-expression = combine-types-expression *(prefer combine-types-expression) /// combine-types-expression = times-expression *(combine-types times-expression ) /// times-expression = equal-expression *(times equal-expression ) times_expression = { primitive_expression ~ (times ~ primitive_expression)* } /// equal-expression = not-equal-expression *(double-equal not-equal-expression ) /// not-equal-expression = application-expression *(not-equal application-expression ) /// /// ; Import expressions need to be separated by some whitespace, otherwise there /// ; would be ambiguity: `./ab` could be interpreted as "import the file `./ab`", /// ; or "apply the import `./a` to label `b`" /// application-expression = /// [ constructors / Some ] import-expression *(whitespace-chunk import-expression) /// /// import-expression = import / selector-expression /// /// ; `record.field` extracts one field of a record /// ; /// ; `record.{ field0, field1, field2 }` projects out several fields of a record /// ; /// ; NOTE: Backtrack when parsing the `*(dot ...)`. The reason why is that you /// ; can't tell from parsing just the period whether "foo." will become "foo.bar" /// ; (i.e. accessing field `bar` of the record `foo`) or `foo./bar` (i.e. applying /// ; the function `foo` to the relative path `./bar`) /// selector-expression = primitive-expression *(dot ( label / labels )) /// /// ; NOTE: Backtrack when parsing the first three alternatives (i.e. the numeric /// ; literals). This is because they share leading characters in common /// /// ; NOTE: The reason why we have three different types of identifiers (that is: /// ; identifier, identifier-reserved-prefix, identifier-reserved-namespaced-prefix) /// ; is that it's the only way to parse correctly identifiers that start with reserved /// ; words, other than using a lexer and use the longest match rule. /// ; /// ; Since reserved words can include themselves (e.g. 'List/build' includes 'List'), /// ; we have to match the "namespaced" reserved words before the identifiers prefixed /// ; by a reserved word. /// primitive-expression = /// ; "2.0" /// double-literal /// /// ; "2" /// / natural-literal /// /// ; "+2" /// / integer-literal /// /// ; "-Infinity" /// / "-" Infinity-raw whitespace /// /// ; '"ABC"' /// / text-literal /// /// ; "{ foo = 1 , bar = True }" /// ; "{ foo : Integer, bar : Bool }" /// / open-brace record-type-or-literal close-brace /// /// ; "< Foo : Integer | Bar : Bool >" /// ; "< Foo : Integer | Bar = True >" /// / open-angle union-type-or-literal close-angle /// /// ; "[1, 2, 3]" /// / non-empty-list-literal ; `annotated-expression` handles empty lists /// /// ; "List/foldWith" /// / identifier-reserved-namespaced-prefix /// /// ; "List/head" /// / reserved-namespaced /// /// ; "List/map" /// ; "TypeDefinition" /// / identifier-reserved-prefix /// /// ; "List" /// / reserved /// /// ; "x" /// ; "x@2" /// / identifier /// /// ; "( e )" /// / open-parens expression close-parens primitive_expression = _{ natural_literal | open_parens ~ expression ~ close_parens } /// /// labels = open-brace ( label *(comma label) / "" ) close-brace /// /// record-type-or-literal = /// equal ; Empty record literal /// / non-empty-record-type-or-literal /// / "" ; Empty record type /// /// non-empty-record-type-or-literal = /// label (non-empty-record-literal / non-empty-record-type) /// /// non-empty-record-type = colon expression *(comma label colon expression) /// non-empty-record-literal = equal expression *(comma label equal expression) /// /// union-type-or-literal = /// non-empty-union-type-or-literal /// / "" ; Empty union type /// /// non-empty-union-type-or-literal = /// label /// ( equal expression *(bar label colon expression) /// / colon expression (bar non-empty-union-type-or-literal / "") /// ) /// /// non-empty-list-literal = open-bracket expression *(comma expression) close-bracket /// /// ; All expressions end with trailing whitespace. This just adds a final /// ; whitespace prefix for the top-level of the program /// complete-expression = whitespace expression complete_expression = _{ SOI ~ whitespace ~ expression ~ EOI }