From 644659c37cb44429fe64fa1cbcb50068ecb77480 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sat, 23 Mar 2019 16:51:11 +0100 Subject: Rebase latest changes in grammar --- dhall_parser/src/dhall.abnf | 115 ++++++++++++++++++++------------- dhall_parser/src/dhall.pest.visibility | 5 ++ 2 files changed, 75 insertions(+), 45 deletions(-) diff --git a/dhall_parser/src/dhall.abnf b/dhall_parser/src/dhall.abnf index 51de689..f9ba91e 100644 --- a/dhall_parser/src/dhall.abnf +++ b/dhall_parser/src/dhall.abnf @@ -152,17 +152,34 @@ HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" ; * as ; * using ; * merge +; * missing +; * Infinity +; * Some simple-label-first-char = ALPHA / "_" simple-label-next-char = ALPHA / DIGIT / "-" / "/" / "_" simple-label = simple-label-first-char *simple-label-next-char -quoted-label = 1*(ALPHA / DIGIT / "-" / "/" / "_" / ":" / "." / "$") +quoted-label-char = + %x20-5F + ; %x60 = '`' + / %x61-7E + +quoted-label = 1*quoted-label-char ; NOTE: Dhall does not support Unicode labels, mainly to minimize the potential ; for code obfuscation +; A label cannot not be any of the reserved identifiers for builtins (unless quoted). +; Their list can be found in semantics.md. This is not enforced by the grammar but +; should be checked by implementations. The only place where this restriction applies +; is bound variables. label-raw = ("`" quoted-label "`" / simple-label) label = label-raw whitespace +; An any-label is allowed to be one of the reserved identifiers. +any-label = label +any-label-raw = label-raw + + ; Dhall's double-quoted strings are equivalent to JSON strings except with ; support for string interpolation (and escaping string interpolation) ; @@ -266,6 +283,7 @@ text-literal-raw = (double-quote-literal / single-quote-literal) ; ; If you don't feel like reading hex, these are all the same as the rule name, ; except without the '-raw' ending. +; Keywords that should never be parsed as identifiers if-raw = %x69.66 then-raw = %x74.68.65.6e else-raw = %x65.6c.73.65 @@ -275,11 +293,14 @@ as-raw = %x61.73 using-raw = %x75.73.69.6e.67 merge-raw = %x6d.65.72.67.65 missing-raw = %x6d.69.73.73.69.6e.67 +Infinity-raw = %x49.6e.66.69.6e.69.74.79 +; Reserved identifiers, only needed for some special cases of parsing Optional-raw = %x4f.70.74.69.6f.6e.61.6c Text-raw = %x54.65.78.74 List-raw = %x4c.69.73.74 Infinity-raw = %x49.6e.66.69.6e.69.74.79 NaN-raw = %x4e.61.4e +Some-raw = %x53.6f.6d.65 ; Whitespaced rules for reserved words, to be used when matching expressions if = if-raw nonempty-whitespace @@ -290,38 +311,39 @@ in = in-raw nonempty-whitespace as = as-raw nonempty-whitespace using = using-raw nonempty-whitespace merge = merge-raw nonempty-whitespace +Some = Some-raw nonempty-whitespace Optional = Optional-raw whitespace Text = Text-raw whitespace List = List-raw whitespace -equal = "=" whitespace -or = "||" whitespace -plus = "+" nonempty-whitespace ; To disambiguate `f +2` -text-append = "++" whitespace -list-append = "#" whitespace -and = "&&" whitespace -times = "*" whitespace -double-equal = "==" whitespace -not-equal = "!=" whitespace -dot = "." whitespace -bar = "|" whitespace -comma = "," whitespace -at = "@" whitespace -colon = ":" nonempty-whitespace ; To disambiguate `env:VARIABLE` from type annotations -import-alt = "?" nonempty-whitespace ; To disambiguate `http://a/a?a` -open-parens = "(" whitespace + +equal = "=" whitespace +or = "||" whitespace +plus = "+" nonempty-whitespace ; To disambiguate `f +2` +text-append = "++" whitespace +list-append = "#" whitespace +and = "&&" whitespace +times = "*" whitespace +double-equal = "==" whitespace +not-equal = "!=" whitespace +dot = "." whitespace +bar = "|" whitespace +comma = "," whitespace +at = "@" whitespace +open-parens = "(" whitespace close-parens-raw = ")" -close-parens = ")" whitespace -open-brace = "{" whitespace +close-parens = ")" whitespace +open-brace = "{" whitespace close-brace-raw = "}" -close-brace = "}" whitespace -open-bracket = "[" whitespace +close-brace = "}" whitespace +open-bracket = "[" whitespace close-bracket-raw = "]" -close-bracket = "]" whitespace -open-angle = "<" whitespace +close-bracket = "]" whitespace +open-angle = "<" whitespace close-angle-raw = ">" -close-angle = ">" whitespace - +close-angle = ">" whitespace +colon = ":" nonempty-whitespace ; To disambiguate `env:VARIABLE` from type annotations +import-alt = "?" nonempty-whitespace ; To disambiguate `http://a/a?a` combine = ( %x2227 / "/\" ) whitespace combine-types = ( %x2A53 / "//\\" ) whitespace prefer = ( %x2AFD / "//" ) whitespace @@ -337,7 +359,9 @@ natural-literal-raw = 1*DIGIT integer-literal-raw = ( "+" / "-" ) natural-literal-raw -identifier-raw = label-raw [ whitespace at natural-literal-raw ] +; The implementation should recognize reserved names for builtins and treat them as special +; values instead of variables. +identifier-raw = any-label-raw [ whitespace at natural-literal-raw ] identifier = identifier-raw whitespace ; Printable characters other than " ()[]{}<>/\," @@ -406,8 +430,12 @@ absolute-path = path ; Absolute path scheme = %x68.74.74.70 [ %x73 ] ; "http" [ "s" ] -; NOTE: This does not match the official grammar for a URI. Specifically, this -; replaces `path-abempty` with `path` +; NOTE: This does not match the official grammar for a URI. Specifically: +; +; * this replaces `path-abempty` with `path`, so an empty path is +; not valid +; * this does not support fragment identifiers, which have no meaning within +; Dhall expressions and do not affect import resolution http-raw = scheme "://" authority path [ "?" query ] ; NOTE: Backtrack if parsing the optional user info prefix fails @@ -544,8 +572,6 @@ import-raw = import-hashed-raw [ whitespace as Text-raw ] ; The exception is the rules ending in -raw, which should _not_ end in whitespace. ; This is important to avoid the need for sequential backtracking in application-expression. -; An arbitrary dhall expression. Only use in a context where parentheses or -; keywords prevent possible ambiguity. See also atomic-expression. expression = lambda-expression / ifthenelse-expression @@ -580,14 +606,14 @@ arrow-expression = operator-expression arrow expression ; "merge e1 e2 : t" ; "merge e1 e2" -merge-expression = merge atomic-expression atomic-expression [ colon application-expression ] +merge-expression = merge import-expression import-expression [ colon application-expression ] ; "[] : List t" ; "[] : Optional t" ; "[x] : Optional t" empty-list-or-optional = open-bracket (empty-collection / non-empty-optional) -empty-collection = close-bracket colon (List / Optional) atomic-expression -non-empty-optional = expression close-bracket colon Optional atomic-expression +empty-collection = close-bracket colon (List / Optional) import-expression +non-empty-optional = expression close-bracket colon Optional import-expression ; "x : t" annotated-expression = operator-expression [ colon expression ] @@ -611,12 +637,11 @@ not-equal-expression = application-expression *(not-equal application- ; Import expressions need to be separated by some whitespace, otherwise there ; would be ambiguity: `./ab` could be interpreted as "import the file `./ab`", ; or "apply the import `./a` to label `b`" -; The -raw handling is important for greedy parsers, that can't do sequential backtracking. -application-expression = atomic-expression-raw *(nonempty-whitespace atomic-expression-raw) whitespace +application-expression = + import-expression-raw *(nonempty-whitespace import-expression-raw) whitespace -; An expression that does not need to be surrounded by parentheses to disambiguate -atomic-expression = atomic-expression-raw whitespace -atomic-expression-raw = +import-expression = import-expression-raw whitespace +import-expression-raw = import-raw / selector-expression-raw @@ -629,9 +654,9 @@ atomic-expression-raw = ; the function `foo` to the relative path `./bar`) selector-expression-raw = primitive-expression-raw *(whitespace dot selector-raw) -selector-raw = label-raw / labels-raw +selector-raw = any-label-raw / labels-raw -labels-raw = open-brace [ label *(comma label) ] close-brace-raw +labels-raw = open-brace [ any-label *(comma any-label) ] close-brace-raw primitive-expression-raw = @@ -679,11 +704,11 @@ record-type-or-literal = empty-record-literal = equal empty-record-type = "" non-empty-record-type-or-literal = - label (non-empty-record-literal / non-empty-record-type) + any-label (non-empty-record-literal / non-empty-record-type) non-empty-record-type = colon expression *(comma record-type-entry) -record-type-entry = label colon expression +record-type-entry = any-label colon expression non-empty-record-literal = equal expression *(comma record-literal-entry) -record-literal-entry = label equal expression +record-literal-entry = any-label equal expression ; "< Foo : Integer | Bar : Bool >" ; "< Foo : Integer | Bar = True >" @@ -692,12 +717,12 @@ union-type-or-literal = / empty-union-type empty-union-type = "" non-empty-union-type-or-literal = - label + any-label ( equal expression union-type-entries / colon expression [ bar non-empty-union-type-or-literal ] ) union-type-entries = *(bar union-type-entry) -union-type-entry = label colon expression +union-type-entry = any-label colon expression ; "[1, 2, 3]" ; `empty-list-or-optional` handles empty lists diff --git a/dhall_parser/src/dhall.pest.visibility b/dhall_parser/src/dhall.pest.visibility index 3d0a80c..da5217a 100644 --- a/dhall_parser/src/dhall.pest.visibility +++ b/dhall_parser/src/dhall.pest.visibility @@ -18,9 +18,12 @@ end_of_line # simple_label_next_char # simple_label_start simple_label +# quoted_label_char quoted_label label_raw # label +# any_label_raw +# any_label double_quote_chunk double_quote_escaped double_quote_literal @@ -151,6 +154,8 @@ not_equal_expression application_expression # atomic_expression # atomic_expression_raw +# import_expression +# import_expression_raw selector_expression_raw selector_raw labels_raw -- cgit v1.2.3