diff options
author | Nadrieril Feneanar | 2019-04-04 23:51:46 +0200 |
---|---|---|
committer | GitHub | 2019-04-04 23:51:46 +0200 |
commit | a9f4e9b10196d75e674d95160955360d388a772b (patch) | |
tree | 20d869202eadc1193e449d3b3320f9b1d89ae578 /dhall_parser | |
parent | 7336787af41adc4dfe650f7c7f10f9c7c3c011db (diff) | |
parent | 74eb88c42d938672137771fab33ef0118903b5e1 (diff) |
Merge pull request #42 from Nadrieril/move-to-upstream-grammar
Move to upstream grammar
Diffstat (limited to 'dhall_parser')
-rw-r--r-- | dhall_parser/build.rs | 7 | ||||
-rw-r--r-- | dhall_parser/src/dhall.abnf | 312 | ||||
-rw-r--r-- | dhall_parser/src/dhall.pest.visibility | 100 |
3 files changed, 199 insertions, 220 deletions
diff --git a/dhall_parser/build.rs b/dhall_parser/build.rs index 4e75181..615a55c 100644 --- a/dhall_parser/build.rs +++ b/dhall_parser/build.rs @@ -42,13 +42,6 @@ fn main() -> std::io::Result<()> { )?; writeln!( &mut file, - "keyword = _{{ - let_ | in_ | if_ | then - | else_ | Infinity | NaN - }}" - )?; - writeln!( - &mut file, "final_expression = ${{ SOI ~ complete_expression ~ EOI }}" )?; diff --git a/dhall_parser/src/dhall.abnf b/dhall_parser/src/dhall.abnf index 02edc84..847da02 100644 --- a/dhall_parser/src/dhall.abnf +++ b/dhall_parser/src/dhall.abnf @@ -100,10 +100,9 @@ ;
; For simplicity this supports Unix and Windows line-endings, which are the most
; common
-end-of-line-silent =
+end-of-line =
%x0A ; "\n"
/ %x0D.0A ; "\r\n"
-end-of-line = end-of-line-silent
tab = %x09 ; "\t"
@@ -113,7 +112,7 @@ block-comment-chunk = block-comment
/ %x20-10FFFF
/ tab
- / end-of-line-silent
+ / end-of-line
block-comment-continue = "-}" / block-comment-chunk block-comment-continue
@@ -121,18 +120,19 @@ not-end-of-line = %x20-10FFFF / tab ; NOTE: Slightly different from Haskell-style single-line comments because this
; does not require a space after the dashes
-line-comment = "--" *not-end-of-line end-of-line-silent
+line-comment = "--" *not-end-of-line end-of-line
whitespace-chunk =
" "
/ tab
- / end-of-line-silent
+ / end-of-line
/ line-comment
/ block-comment
-whitespace = *whitespace-chunk
+whsp = *whitespace-chunk
-nonempty-whitespace = 1*whitespace-chunk
+; nonempty whitespace
+whsp1 = 1*whitespace-chunk
; Uppercase or lowercase ASCII letter
ALPHA = %x41-5A / %x61-7A
@@ -142,19 +142,13 @@ DIGIT = %x30-39 ; 0-9 HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
-; A simple label cannot be one of the following reserved keywords:
-;
-; * if
-; * then
-; * else
-; * let
-; * in
-; * as
-; * using
-; * merge
-; * missing
-; * Infinity
-; * Some
+; A simple label cannot be one of the reserved keywords
+; listed in the `keyword` rule.
+; A PEG parser could use negative lookahead to
+; enforce this, e.g. as follows:
+; simple-label =
+; keyword 1*simple-label-next-char
+; / !keyword (simple-label-first-char *simple-label-next-char)
simple-label-first-char = ALPHA / "_"
simple-label-next-char = ALPHA / DIGIT / "-" / "/" / "_"
simple-label = simple-label-first-char *simple-label-next-char
@@ -170,11 +164,12 @@ quoted-label = 1*quoted-label-char ; for code obfuscation
label = ("`" quoted-label "`" / simple-label)
-; An unreserved-label cannot not be any of the reserved identifiers for builtins (unless quoted).
+; A nonreserved-label cannot not be any of the reserved identifiers for builtins (unless quoted).
; Their list can be found in semantics.md. This is not enforced by the grammar but
; should be checked by implementations. The only place where this restriction applies
; is bound variables.
-unreserved-label = label
+; A PEG parser could use negative lookahead to avoid parsing those identifiers.
+nonreserved-label = label
; An any-label is allowed to be one of the reserved identifiers.
any-label = label
@@ -216,8 +211,8 @@ any-label = label ; > "\uD834\uDD1E".
double-quote-chunk =
interpolation
- ; '\'
- / %x5C double-quote-escaped
+ ; '\' Beginning of escape sequence
+ / %x5C double-quote-escaped
/ double-quote-char
double-quote-escaped =
@@ -273,7 +268,6 @@ single-quote-char = single-quote-literal = "''" end-of-line single-quote-continue
-; Interpolation
interpolation = "${" complete-expression "}"
text-literal = (double-quote-literal / single-quote-literal)
@@ -281,25 +275,34 @@ text-literal = (double-quote-literal / single-quote-literal) ; RFC 5234 interprets string literals as case-insensitive and recommends using
; hex instead for case-sensitive strings
;
-; If you don't feel like reading hex, these are all the same as the rule name,
-; except without the '' ending.
+; If you don't feel like reading hex, these are all the same as the rule name.
; Keywords that should never be parsed as identifiers
-if = %x69.66
-then = %x74.68.65.6e
-else = %x65.6c.73.65
-let = %x6c.65.74
-in = %x69.6e
-as = %x61.73
-using = %x75.73.69.6e.67
-merge = %x6d.65.72.67.65
-missing = %x6d.69.73.73.69.6e.67
-Infinity = %x49.6e.66.69.6e.69.74.79
+if = %x69.66
+then = %x74.68.65.6e
+else = %x65.6c.73.65
+let = %x6c.65.74
+in = %x69.6e
+as = %x61.73
+using = %x75.73.69.6e.67
+merge = %x6d.65.72.67.65
+missing = %x6d.69.73.73.69.6e.67
+Infinity = %x49.6e.66.69.6e.69.74.79
+NaN = %x4e.61.4e
+Some = %x53.6f.6d.65
+
+; Unused rule that could be used as negative lookahead in the
+; `simple-label` rule for parsers that support this.
+keyword =
+ if / then / else
+ / let / in
+ / using / missing / as
+ / Infinity / NaN
+ / merge / Some
+
; Reserved identifiers, only needed for some special cases of parsing
Optional = %x4f.70.74.69.6f.6e.61.6c
Text = %x54.65.78.74
List = %x4c.69.73.74
-NaN = %x4e.61.4e
-Some = %x53.6f.6d.65
combine = %x2227 / "/\"
combine-types = %x2A53 / "//\\"
@@ -310,7 +313,20 @@ arrow = %x2192 / "->" exponent = "e" [ "+" / "-" ] 1*DIGIT
-double-literal = [ "+" / "-" ] 1*DIGIT ( "." 1*DIGIT [ exponent ] / exponent)
+numeric-double-literal = [ "+" / "-" ] 1*DIGIT ( "." 1*DIGIT [ exponent ] / exponent)
+
+minus-infinity-literal = "-" Infinity
+plus-infinity-literal = Infinity
+
+double-literal =
+ ; "2.0"
+ numeric-double-literal
+ ; "-Infinity"
+ / minus-infinity-literal
+ ; "Infinity"
+ / plus-infinity-literal
+ ; "NaN"
+ / NaN
natural-literal = 1*DIGIT
@@ -318,7 +334,7 @@ integer-literal = ( "+" / "-" ) natural-literal ; The implementation should recognize reserved names for builtins and treat them as special
; values instead of variables.
-identifier = any-label [ whitespace "@" whitespace natural-literal ]
+identifier = any-label [ whsp "@" whsp natural-literal ]
; Printable characters other than " ()[]{}<>/\,"
;
@@ -363,6 +379,8 @@ quoted-path-component = 1*quoted-path-character path-component = "/" ( unquoted-path-component / %x22 quoted-path-component %x22 )
+; The last path-component matched by this rule is referred to as "file" in the semantics,
+; and the other path-components as "directory".
path = 1*path-component
local =
@@ -445,7 +463,7 @@ sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" http =
http-raw
- [ whitespace using nonempty-whitespace (import-hashed / "(" whitespace import-hashed whitespace ")") ]
+ [ whsp using whsp1 (import-hashed / "(" whsp import-hashed whsp ")") ]
; Dhall supports unquoted environment variables that are Bash-compliant or
; quoted environment variables that are POSIX-compliant
@@ -513,117 +531,106 @@ posix-environment-variable-character = import-type = missing / local / http / env
-hash = %x73.68.61.32.35.36.3a 64HEXDIG ; "sha256:XXX...XXX"
+hash = %x73.68.61.32.35.36.3a 64HEXDIG ; "sha256:XXX...XXX"
-import-hashed = import-type [ whitespace hash ]
+import-hashed = import-type [ whsp hash ]
; "http://example.com"
; "./foo/bar"
; "env:FOO"
-import = import-hashed [ whitespace as nonempty-whitespace Text ]
-
-; NOTE: Every rule past this point should only reference rules that end with
-; whitespace. This ensures consistent handling of whitespace in the absence of
-; a separate lexing step.
-; The exception is the rules ending in , which should _not_ end in whitespace.
-; This is important to avoid the need for sequential backtracking in application-expression.
+import = import-hashed [ whsp as whsp1 Text ]
expression =
- lambda-expression
- / ifthenelse-expression
- / let-expression
- / forall-expression
+ ; "\(x : a) -> b"
+ lambda whsp "(" whsp nonreserved-label whsp ":" whsp1 expression whsp ")" whsp arrow whsp expression
+
+ ; "if a then b else c"
+ / if whsp1 expression whsp then whsp1 expression whsp else whsp1 expression
+
+ ; "let x : t = e1 in e2"
+ ; "let x = e1 in e2"
+ ; "let x = e1 let y = e2 in e3"
+ / 1*let-binding in whsp1 expression
+
+ ; "forall (x : a) -> b"
+ / forall whsp "(" whsp nonreserved-label whsp ":" whsp1 expression whsp ")" whsp arrow whsp expression
+
+ ; "a -> b"
+ ;
; NOTE: Backtrack if parsing this alternative fails
- / arrow-expression
- / merge-expression
+ / operator-expression whsp arrow whsp expression
+
+ ; "merge e1 e2 : t"
+ ; "merge e1 e2"
+ / merge whsp1 import-expression whsp import-expression [ whsp ":" whsp1 application-expression ]
+
+ ; "[] : List t"
+ ; "[] : Optional t"
+ ; "[x] : Optional t"
+ ;
; NOTE: Backtrack if parsing this alternative fails since we can't tell
; from the opening bracket whether or not this will be an empty list or
; a non-empty list
- / empty-list-or-optional
+ / "[" whsp (empty-collection / non-empty-optional)
+
+ ; "x : t"
/ annotated-expression
-; "\(x : a) -> b"
-lambda-expression = lambda whitespace "(" whitespace unreserved-label whitespace ":" nonempty-whitespace expression ")" whitespace arrow whitespace expression
-
-; "if a then b else c"
-ifthenelse-expression = if nonempty-whitespace expression then nonempty-whitespace expression else nonempty-whitespace expression
-
-; "let x : t = e1 in e2"
-; "let x = e1 in e2"
-; "let x = e1 let y = e2 in e3"
-let-expression = 1*let-binding in nonempty-whitespace expression
-let-binding = let nonempty-whitespace unreserved-label whitespace [ ":" nonempty-whitespace expression ] "=" whitespace expression
-
-; "forall (x : a) -> b"
-forall-expression = forall whitespace "(" whitespace unreserved-label whitespace ":" nonempty-whitespace expression ")" whitespace arrow whitespace expression
+; Nonempty-whitespace to disambiguate `env:VARIABLE` from type annotations
+annotated-expression = operator-expression [ whsp ":" whsp1 expression ]
-; "a -> b"
-arrow-expression = operator-expression arrow whitespace expression
+; "let x = e1"
+let-binding = let whsp1 nonreserved-label whsp [ ":" whsp1 expression whsp ] "=" whsp expression whsp
-; "merge e1 e2 : t"
-; "merge e1 e2"
-merge-expression = merge nonempty-whitespace import-expression whitespace import-expression whitespace [ ":" nonempty-whitespace application-expression ]
-
-; "[] : List t"
-; "[] : Optional t"
-; "[x] : Optional t"
-empty-list-or-optional = "[" whitespace (empty-collection / non-empty-optional)
-empty-collection = "]" whitespace ":" nonempty-whitespace (List whitespace / Optional whitespace) import-expression whitespace
-non-empty-optional = expression "]" whitespace ":" nonempty-whitespace Optional whitespace import-expression whitespace
-
-; "x : t"
-annotated-expression = operator-expression [ ":" nonempty-whitespace expression ]
+; "] : List t"
+; "] : Optional t"
+empty-collection = "]" whsp ":" whsp1 (List / Optional) whsp import-expression
+; "x] : Optional t"
+non-empty-optional = expression whsp "]" whsp ":" whsp1 Optional whsp import-expression
operator-expression = import-alt-expression
-import-alt-expression = or-expression *("?" nonempty-whitespace or-expression)
-or-expression = plus-expression *("||" whitespace plus-expression )
-plus-expression = text-append-expression *("+" nonempty-whitespace text-append-expression )
-text-append-expression = list-append-expression *("++" whitespace list-append-expression )
-list-append-expression = and-expression *("#" whitespace and-expression )
-and-expression = combine-expression *("&&" whitespace combine-expression )
-combine-expression = prefer-expression *(combine whitespace prefer-expression )
-prefer-expression = combine-types-expression *(prefer whitespace combine-types-expression)
-combine-types-expression = times-expression *(combine-types whitespace times-expression )
-times-expression = equal-expression *("*" whitespace equal-expression )
-equal-expression = not-equal-expression *("==" whitespace not-equal-expression )
-not-equal-expression = application-expression *("!=" whitespace application-expression )
+; Nonempty-whitespace to disambiguate `http://a/a?a`
+import-alt-expression = or-expression *(whsp "?" whsp1 or-expression)
+or-expression = plus-expression *(whsp "||" whsp plus-expression)
+; Nonempty-whitespace to disambiguate `f +2`
+plus-expression = text-append-expression *(whsp "+" whsp1 text-append-expression)
+text-append-expression = list-append-expression *(whsp "++" whsp list-append-expression)
+list-append-expression = and-expression *(whsp "#" whsp and-expression)
+and-expression = combine-expression *(whsp "&&" whsp combine-expression)
+combine-expression = prefer-expression *(whsp combine whsp prefer-expression)
+prefer-expression = combine-types-expression *(whsp prefer whsp combine-types-expression)
+combine-types-expression = times-expression *(whsp combine-types whsp times-expression)
+times-expression = equal-expression *(whsp "*" whsp equal-expression)
+equal-expression = not-equal-expression *(whsp "==" whsp not-equal-expression)
+not-equal-expression = application-expression *(whsp "!=" whsp application-expression)
; Import expressions need to be separated by some whitespace, otherwise there
; would be ambiguity: `./ab` could be interpreted as "import the file `./ab`",
; or "apply the import `./a` to label `b`"
application-expression =
- import-expression *(nonempty-whitespace import-expression) whitespace
+ [ Some whsp1 ] import-expression *(whsp1 import-expression)
-import-expression =
- import
- / selector-expression
+import-expression = import / selector-expression
; `record.field` extracts one field of a record
+;
; `record.{ field0, field1, field2 }` projects out several fields of a record
;
-; NOTE: Backtrack when parsing the `*(dot ...)`. The reason why is that you
+; NOTE: Backtrack when parsing the `*("." ...)`. The reason why is that you
; can't tell from parsing just the period whether "foo." will become "foo.bar"
; (i.e. accessing field `bar` of the record `foo`) or `foo./bar` (i.e. applying
; the function `foo` to the relative path `./bar`)
-selector-expression = primitive-expression *(whitespace "." whitespace selector)
+selector-expression = primitive-expression *(whsp "." whsp selector)
selector = any-label / labels
-labels = "{" whitespace [ any-label whitespace *("," whitespace any-label whitespace) ] "}"
-
-
-primitive-expression =
- literal-expression
- / "{" whitespace record-type-or-literal "}"
- / "<" whitespace union-type-or-literal ">"
- / non-empty-list-literal
- / parenthesized-expression
+labels = "{" whsp [ any-label whsp *("," whsp any-label whsp) ] "}"
; NOTE: Backtrack when parsing the first three alternatives (i.e. the numeric
; literals). This is because they share leading characters in common
-literal-expression =
+primitive-expression =
; "2.0"
double-literal
@@ -633,60 +640,65 @@ literal-expression = ; "+2"
/ integer-literal
- ; "-Infinity"
- / minus-infinity-literal
- ; "Infinity"
- / plus-infinity-literal
- ; "NaN"
- / NaN
-
; '"ABC"'
/ text-literal
+ ; "{ foo = 1 , bar = True }"
+ ; "{ foo : Integer, bar : Bool }"
+ / "{" whsp record-type-or-literal whsp "}"
+
+ ; "< Foo : Integer | Bar : Bool >"
+ ; "< Foo : Integer | Bar = True | Baz : Bool >"
+ ; "< Foo | Bar : Bool >"
+ / "<" whsp union-type-or-literal whsp ">"
+
+ ; "[1, 2, 3]"
+ ; `empty-collection` handles empty lists
+ / non-empty-list-literal
+
; "x"
; "x@2"
/ identifier
+
+ ; "( e )"
+ / "(" complete-expression ")"
-minus-infinity-literal = "-" Infinity
-plus-infinity-literal = Infinity
-
-; "{ foo = 1 , bar = True }"
-; "{ foo : Integer, bar : Bool }"
record-type-or-literal =
empty-record-literal
/ non-empty-record-type-or-literal
/ empty-record-type
-empty-record-literal = "=" whitespace
+
+empty-record-literal = "="
empty-record-type = ""
+
non-empty-record-type-or-literal =
- any-label whitespace (non-empty-record-literal / non-empty-record-type)
-non-empty-record-type = ":" nonempty-whitespace expression *("," whitespace record-type-entry)
-record-type-entry = any-label whitespace ":" nonempty-whitespace expression
-non-empty-record-literal = "=" whitespace expression *("," whitespace record-literal-entry)
-record-literal-entry = any-label whitespace "=" whitespace expression
-
-; "< Foo : Integer | Bar : Bool >"
-; "< Foo : Integer | Bar = True >"
+ any-label whsp (non-empty-record-literal / non-empty-record-type)
+
+non-empty-record-type = ":" whsp1 expression *(whsp "," whsp record-type-entry)
+record-type-entry = any-label whsp ":" whsp1 expression
+
+non-empty-record-literal = "=" whsp expression *(whsp "," whsp record-literal-entry)
+record-literal-entry = any-label whsp "=" whsp expression
+
union-type-or-literal =
non-empty-union-type-or-literal
/ empty-union-type
+
empty-union-type = ""
+
non-empty-union-type-or-literal =
- any-label whitespace
- ( "=" whitespace expression union-type-entries
- / ":" nonempty-whitespace expression [ "|" whitespace non-empty-union-type-or-literal ]
- )
-union-type-entries = *("|" whitespace union-type-entry)
-union-type-entry = any-label whitespace ":" nonempty-whitespace expression
+ any-label [ whsp ( union-literal-variant-value / union-type-or-literal-variant-type) ]
+
+; = True | ...
+union-literal-variant-value = "=" whsp expression *(whsp "|" whsp union-type-entry)
+union-type-entry = any-label whsp ":" whsp1 expression
-; "[1, 2, 3]"
-; `empty-list-or-optional` handles empty lists
-non-empty-list-literal = "[" whitespace expression *("," whitespace expression) "]"
+; : Integer | ...
+; | ...
+union-type-or-literal-variant-type = [ ":" whsp1 expression ] [ whsp "|" whsp non-empty-union-type-or-literal ]
-; "( e )"
-parenthesized-expression = "(" whitespace expression ")"
+non-empty-list-literal = "[" whsp expression whsp *("," whsp expression whsp) "]"
-; All expressions end with trailing whitespace. This just adds a final
-; whitespace prefix for the top-level of the program
-complete-expression = whitespace expression
+; This just adds surrounding whitespace for the top-level of the program
+complete-expression = whsp expression whsp
diff --git a/dhall_parser/src/dhall.pest.visibility b/dhall_parser/src/dhall.pest.visibility index ee5ea2b..f881a50 100644 --- a/dhall_parser/src/dhall.pest.visibility +++ b/dhall_parser/src/dhall.pest.visibility @@ -1,5 +1,4 @@ -end_of_line -# end_of_line_silent +# end_of_line # tab # block_comment # block_comment_chunk @@ -7,91 +6,71 @@ end_of_line # not_end_of_line # line_comment # whitespace_chunk -# whitespace -# whitespace_ -# nonempty_whitespace +# whsp +# whsp1 # ALPHA # DIGIT # HEXDIG # simple_label_first_char -# simple_label_next_other_char # simple_label_next_char -# simple_label_start simple_label # quoted_label_char quoted_label label +nonreserved_label # any_label double_quote_chunk double_quote_escaped +double_quote_char double_quote_literal single_quote_continue +escaped_quote_pair +escaped_interpolation +single_quote_char single_quote_literal +interpolation # text_literal -# if -# then -# else -# let -# in -# as -# using -# merge -missing -# if_ +if_ # then # else_ # let_ -# in_ +in_ # as_ # using -# merge +merge +missing # Infinity +NaN +Some +# keyword Optional Text List -# equal -# or -# plus -# text_append -# list_append -# and -# times -# double_equal -# not_equal -# dot -# bar -# comma -# at -# colon -# import_alt -# open_parens -# close_parens -# close_parens -# open_brace -# close_brace -# close_brace -# open_bracket -# close_bracket -# close_bracket -# open_angle -# close_angle -# close_angle # combine # combine_types # prefer -# lambda -# forall -# arrow +lambda +forall +arrow # exponent +numeric_double_literal +minus_infinity_literal +plus_infinity_literal double_literal natural_literal integer_literal identifier # path_character # quoted_path_character +unquoted_path_component +quoted_path_component path_component path # local +parent_path +here_path +home_path +absolute_path scheme http_raw authority @@ -108,7 +87,6 @@ authority # reg_name # pchar query -fragment # pct_encoded # unreserved # sub_delims @@ -121,17 +99,11 @@ import_type hash import_hashed import -# expression -lambda_expression -ifthenelse_expression -let_expression -forall_expression -arrow_expression -merge_expression -# empty_list_or_optional +expression +annotated_expression +let_binding empty_collection non_empty_optional -annotated_expression # operator_expression import_alt_expression or_expression @@ -146,22 +118,24 @@ times_expression equal_expression not_equal_expression application_expression -# atomic_expression # import_expression selector_expression selector labels -# primitive_expression -literal_expression +primitive_expression # record_type_or_literal empty_record_literal empty_record_type non_empty_record_type_or_literal non_empty_record_type +record_type_entry non_empty_record_literal +record_literal_entry union_type_or_literal empty_union_type non_empty_union_type_or_literal +union_literal_variant_value +union_type_entry +union_type_or_literal_variant_type non_empty_list_literal -# parenthesized_expression # complete_expression |