From f385e9dabfe3d923a841dc6d99542683cbbbb77a Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Thu, 4 Apr 2019 19:46:17 +0200 Subject: Import many trivial changes from upstream --- dhall_parser/src/dhall.abnf | 90 ++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 46 deletions(-) (limited to 'dhall_parser/src') diff --git a/dhall_parser/src/dhall.abnf b/dhall_parser/src/dhall.abnf index 2fad4d4..ca54215 100644 --- a/dhall_parser/src/dhall.abnf +++ b/dhall_parser/src/dhall.abnf @@ -132,6 +132,7 @@ whitespace-chunk = whsp = *whitespace-chunk +; nonempty whitespace whsp1 = 1*whitespace-chunk ; Uppercase or lowercase ASCII letter @@ -142,19 +143,13 @@ DIGIT = %x30-39 ; 0-9 HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" -; A simple label cannot be one of the following reserved keywords: -; -; * if -; * then -; * else -; * let -; * in -; * as -; * using -; * merge -; * missing -; * Infinity -; * Some +; A simple label cannot be one of the reserved keywords +; listed in the `keyword` rule. +; A PEG parser could use negative lookahead to +; enforce this, e.g. as follows: +; simple-label = +; keyword 1*simple-label-next-char +; / !keyword (simple-label-first-char *simple-label-next-char) simple-label-first-char = ALPHA / "_" simple-label-next-char = ALPHA / DIGIT / "-" / "/" / "_" simple-label = simple-label-first-char *simple-label-next-char @@ -174,6 +169,7 @@ label = ("`" quoted-label "`" / simple-label) ; Their list can be found in semantics.md. This is not enforced by the grammar but ; should be checked by implementations. The only place where this restriction applies ; is bound variables. +; A PEG parser could use negative lookahead to avoid parsing those identifiers. nonreserved-label = label ; An any-label is allowed to be one of the reserved identifiers. @@ -216,8 +212,8 @@ any-label = label ; > "\uD834\uDD1E". double-quote-chunk = interpolation - ; '\' - / %x5C double-quote-escaped + ; '\' Beginning of escape sequence + / %x5C double-quote-escaped / double-quote-char double-quote-escaped = @@ -273,7 +269,6 @@ single-quote-char = single-quote-literal = "''" end-of-line single-quote-continue -; Interpolation interpolation = "${" complete-expression "}" text-literal = (double-quote-literal / single-quote-literal) @@ -281,25 +276,25 @@ text-literal = (double-quote-literal / single-quote-literal) ; RFC 5234 interprets string literals as case-insensitive and recommends using ; hex instead for case-sensitive strings ; -; If you don't feel like reading hex, these are all the same as the rule name, -; except without the '' ending. +; If you don't feel like reading hex, these are all the same as the rule name. ; Keywords that should never be parsed as identifiers -if = %x69.66 -then = %x74.68.65.6e -else = %x65.6c.73.65 -let = %x6c.65.74 -in = %x69.6e -as = %x61.73 -using = %x75.73.69.6e.67 -merge = %x6d.65.72.67.65 -missing = %x6d.69.73.73.69.6e.67 -Infinity = %x49.6e.66.69.6e.69.74.79 +if = %x69.66 +then = %x74.68.65.6e +else = %x65.6c.73.65 +let = %x6c.65.74 +in = %x69.6e +as = %x61.73 +using = %x75.73.69.6e.67 +merge = %x6d.65.72.67.65 +missing = %x6d.69.73.73.69.6e.67 +Infinity = %x49.6e.66.69.6e.69.74.79 +NaN = %x4e.61.4e +Some = %x53.6f.6d.65 + ; Reserved identifiers, only needed for some special cases of parsing Optional = %x4f.70.74.69.6f.6e.61.6c Text = %x54.65.78.74 List = %x4c.69.73.74 -NaN = %x4e.61.4e -Some = %x53.6f.6d.65 combine = %x2227 / "/\" combine-types = %x2A53 / "//\\" @@ -363,6 +358,8 @@ quoted-path-component = 1*quoted-path-character path-component = "/" ( unquoted-path-component / %x22 quoted-path-component %x22 ) +; The last path-component matched by this rule is referred to as "file" in the semantics, +; and the other path-components as "directory". path = 1*path-component local = @@ -513,7 +510,7 @@ posix-environment-variable-character = import-type = missing / local / http / env -hash = %x73.68.61.32.35.36.3a 64HEXDIG ; "sha256:XXX...XXX" +hash = %x73.68.61.32.35.36.3a 64HEXDIG ; "sha256:XXX...XXX" import-hashed = import-type [ whsp hash ] @@ -522,12 +519,6 @@ import-hashed = import-type [ whsp hash ] ; "env:FOO" import = import-hashed [ whsp as whsp1 Text ] -; NOTE: Every rule past this point should only reference rules that end with -; whitespace. This ensures consistent handling of whitespace in the absence of -; a separate lexing step. -; The exception is the rules ending in , which should _not_ end in whitespace. -; This is important to avoid the need for sequential backtracking in application-expression. - expression = ; "\(x : a) -> b" lambda whsp "(" whsp nonreserved-label whsp ":" whsp1 expression whsp ")" whsp arrow whsp expression @@ -544,6 +535,7 @@ expression = / forall whsp "(" whsp nonreserved-label whsp ":" whsp1 expression whsp ")" whsp arrow whsp expression ; "a -> b" + ; ; NOTE: Backtrack if parsing this alternative fails / operator-expression whsp arrow whsp expression @@ -554,6 +546,7 @@ expression = ; "[] : List t" ; "[] : Optional t" ; "[x] : Optional t" + ; ; NOTE: Backtrack if parsing this alternative fails since we can't tell ; from the opening bracket whether or not this will be an empty list or ; a non-empty list @@ -562,11 +555,17 @@ expression = ; "x : t" / annotated-expression -; "x : t" +; Nonempty-whitespace to disambiguate `env:VARIABLE` from type annotations annotated-expression = operator-expression [ whsp ":" whsp1 expression ] +; "let x = e1" let-binding = let whsp1 nonreserved-label whsp [ ":" whsp1 expression whsp ] "=" whsp expression whsp + +; "] : List t" +; "] : Optional t" empty-collection = "]" whsp ":" whsp1 (List / Optional) whsp import-expression + +; "x] : Optional t" non-empty-optional = expression whsp "]" whsp ":" whsp1 Optional whsp import-expression operator-expression = import-alt-expression @@ -592,14 +591,13 @@ not-equal-expression = application-expression *(whsp "!=" whsp application application-expression = import-expression *(whsp1 import-expression) -import-expression = - import - / selector-expression +import-expression = import / selector-expression ; `record.field` extracts one field of a record +; ; `record.{ field0, field1, field2 }` projects out several fields of a record ; -; NOTE: Backtrack when parsing the `*(dot ...)`. The reason why is that you +; NOTE: Backtrack when parsing the `*("." ...)`. The reason why is that you ; can't tell from parsing just the period whether "foo." will become "foo.bar" ; (i.e. accessing field `bar` of the record `foo`) or `foo./bar` (i.e. applying ; the function `foo` to the relative path `./bar`) @@ -646,14 +644,13 @@ literal-expression = minus-infinity-literal = "-" Infinity plus-infinity-literal = Infinity -; "{ foo = 1 , bar = True }" -; "{ foo : Integer, bar : Bool }" record-type-or-literal = empty-record-literal / non-empty-record-type-or-literal / empty-record-type empty-record-literal = "=" whsp empty-record-type = "" + non-empty-record-type-or-literal = any-label whsp (non-empty-record-literal / non-empty-record-type) non-empty-record-type = ":" whsp1 expression whsp *("," whsp record-type-entry) @@ -666,7 +663,9 @@ record-literal-entry = any-label whsp "=" whsp expression whsp union-type-or-literal = non-empty-union-type-or-literal / empty-union-type + empty-union-type = "" + non-empty-union-type-or-literal = any-label whsp ( "=" whsp expression whsp union-type-entries @@ -683,6 +682,5 @@ non-empty-list-literal = "[" whsp expression whsp *("," whsp expression whsp) "] parenthesized-expression = "(" whsp expression whsp ")" -; All expressions end with trailing whitespace. This just adds a final -; whitespace prefix for the top-level of the program +; This just adds surrounding whitespace for the top-level of the program complete-expression = whsp expression whsp -- cgit v1.2.3