summaryrefslogtreecommitdiff
path: root/dhall_parser
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--dhall_parser/build.rs7
-rw-r--r--dhall_parser/src/dhall.abnf312
-rw-r--r--dhall_parser/src/dhall.pest.visibility100
3 files changed, 199 insertions, 220 deletions
diff --git a/dhall_parser/build.rs b/dhall_parser/build.rs
index 4e75181..615a55c 100644
--- a/dhall_parser/build.rs
+++ b/dhall_parser/build.rs
@@ -42,13 +42,6 @@ fn main() -> std::io::Result<()> {
)?;
writeln!(
&mut file,
- "keyword = _{{
- let_ | in_ | if_ | then
- | else_ | Infinity | NaN
- }}"
- )?;
- writeln!(
- &mut file,
"final_expression = ${{ SOI ~ complete_expression ~ EOI }}"
)?;
diff --git a/dhall_parser/src/dhall.abnf b/dhall_parser/src/dhall.abnf
index 02edc84..847da02 100644
--- a/dhall_parser/src/dhall.abnf
+++ b/dhall_parser/src/dhall.abnf
@@ -100,10 +100,9 @@
;
; For simplicity this supports Unix and Windows line-endings, which are the most
; common
-end-of-line-silent =
+end-of-line =
%x0A ; "\n"
/ %x0D.0A ; "\r\n"
-end-of-line = end-of-line-silent
tab = %x09 ; "\t"
@@ -113,7 +112,7 @@ block-comment-chunk =
block-comment
/ %x20-10FFFF
/ tab
- / end-of-line-silent
+ / end-of-line
block-comment-continue = "-}" / block-comment-chunk block-comment-continue
@@ -121,18 +120,19 @@ not-end-of-line = %x20-10FFFF / tab
; NOTE: Slightly different from Haskell-style single-line comments because this
; does not require a space after the dashes
-line-comment = "--" *not-end-of-line end-of-line-silent
+line-comment = "--" *not-end-of-line end-of-line
whitespace-chunk =
" "
/ tab
- / end-of-line-silent
+ / end-of-line
/ line-comment
/ block-comment
-whitespace = *whitespace-chunk
+whsp = *whitespace-chunk
-nonempty-whitespace = 1*whitespace-chunk
+; nonempty whitespace
+whsp1 = 1*whitespace-chunk
; Uppercase or lowercase ASCII letter
ALPHA = %x41-5A / %x61-7A
@@ -142,19 +142,13 @@ DIGIT = %x30-39 ; 0-9
HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
-; A simple label cannot be one of the following reserved keywords:
-;
-; * if
-; * then
-; * else
-; * let
-; * in
-; * as
-; * using
-; * merge
-; * missing
-; * Infinity
-; * Some
+; A simple label cannot be one of the reserved keywords
+; listed in the `keyword` rule.
+; A PEG parser could use negative lookahead to
+; enforce this, e.g. as follows:
+; simple-label =
+; keyword 1*simple-label-next-char
+; / !keyword (simple-label-first-char *simple-label-next-char)
simple-label-first-char = ALPHA / "_"
simple-label-next-char = ALPHA / DIGIT / "-" / "/" / "_"
simple-label = simple-label-first-char *simple-label-next-char
@@ -170,11 +164,12 @@ quoted-label = 1*quoted-label-char
; for code obfuscation
label = ("`" quoted-label "`" / simple-label)
-; An unreserved-label cannot not be any of the reserved identifiers for builtins (unless quoted).
+; A nonreserved-label cannot not be any of the reserved identifiers for builtins (unless quoted).
; Their list can be found in semantics.md. This is not enforced by the grammar but
; should be checked by implementations. The only place where this restriction applies
; is bound variables.
-unreserved-label = label
+; A PEG parser could use negative lookahead to avoid parsing those identifiers.
+nonreserved-label = label
; An any-label is allowed to be one of the reserved identifiers.
any-label = label
@@ -216,8 +211,8 @@ any-label = label
; > "\uD834\uDD1E".
double-quote-chunk =
interpolation
- ; '\'
- / %x5C double-quote-escaped
+ ; '\' Beginning of escape sequence
+ / %x5C double-quote-escaped
/ double-quote-char
double-quote-escaped =
@@ -273,7 +268,6 @@ single-quote-char =
single-quote-literal = "''" end-of-line single-quote-continue
-; Interpolation
interpolation = "${" complete-expression "}"
text-literal = (double-quote-literal / single-quote-literal)
@@ -281,25 +275,34 @@ text-literal = (double-quote-literal / single-quote-literal)
; RFC 5234 interprets string literals as case-insensitive and recommends using
; hex instead for case-sensitive strings
;
-; If you don't feel like reading hex, these are all the same as the rule name,
-; except without the '' ending.
+; If you don't feel like reading hex, these are all the same as the rule name.
; Keywords that should never be parsed as identifiers
-if = %x69.66
-then = %x74.68.65.6e
-else = %x65.6c.73.65
-let = %x6c.65.74
-in = %x69.6e
-as = %x61.73
-using = %x75.73.69.6e.67
-merge = %x6d.65.72.67.65
-missing = %x6d.69.73.73.69.6e.67
-Infinity = %x49.6e.66.69.6e.69.74.79
+if = %x69.66
+then = %x74.68.65.6e
+else = %x65.6c.73.65
+let = %x6c.65.74
+in = %x69.6e
+as = %x61.73
+using = %x75.73.69.6e.67
+merge = %x6d.65.72.67.65
+missing = %x6d.69.73.73.69.6e.67
+Infinity = %x49.6e.66.69.6e.69.74.79
+NaN = %x4e.61.4e
+Some = %x53.6f.6d.65
+
+; Unused rule that could be used as negative lookahead in the
+; `simple-label` rule for parsers that support this.
+keyword =
+ if / then / else
+ / let / in
+ / using / missing / as
+ / Infinity / NaN
+ / merge / Some
+
; Reserved identifiers, only needed for some special cases of parsing
Optional = %x4f.70.74.69.6f.6e.61.6c
Text = %x54.65.78.74
List = %x4c.69.73.74
-NaN = %x4e.61.4e
-Some = %x53.6f.6d.65
combine = %x2227 / "/\"
combine-types = %x2A53 / "//\\"
@@ -310,7 +313,20 @@ arrow = %x2192 / "->"
exponent = "e" [ "+" / "-" ] 1*DIGIT
-double-literal = [ "+" / "-" ] 1*DIGIT ( "." 1*DIGIT [ exponent ] / exponent)
+numeric-double-literal = [ "+" / "-" ] 1*DIGIT ( "." 1*DIGIT [ exponent ] / exponent)
+
+minus-infinity-literal = "-" Infinity
+plus-infinity-literal = Infinity
+
+double-literal =
+ ; "2.0"
+ numeric-double-literal
+ ; "-Infinity"
+ / minus-infinity-literal
+ ; "Infinity"
+ / plus-infinity-literal
+ ; "NaN"
+ / NaN
natural-literal = 1*DIGIT
@@ -318,7 +334,7 @@ integer-literal = ( "+" / "-" ) natural-literal
; The implementation should recognize reserved names for builtins and treat them as special
; values instead of variables.
-identifier = any-label [ whitespace "@" whitespace natural-literal ]
+identifier = any-label [ whsp "@" whsp natural-literal ]
; Printable characters other than " ()[]{}<>/\,"
;
@@ -363,6 +379,8 @@ quoted-path-component = 1*quoted-path-character
path-component = "/" ( unquoted-path-component / %x22 quoted-path-component %x22 )
+; The last path-component matched by this rule is referred to as "file" in the semantics,
+; and the other path-components as "directory".
path = 1*path-component
local =
@@ -445,7 +463,7 @@ sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
http =
http-raw
- [ whitespace using nonempty-whitespace (import-hashed / "(" whitespace import-hashed whitespace ")") ]
+ [ whsp using whsp1 (import-hashed / "(" whsp import-hashed whsp ")") ]
; Dhall supports unquoted environment variables that are Bash-compliant or
; quoted environment variables that are POSIX-compliant
@@ -513,117 +531,106 @@ posix-environment-variable-character =
import-type = missing / local / http / env
-hash = %x73.68.61.32.35.36.3a 64HEXDIG ; "sha256:XXX...XXX"
+hash = %x73.68.61.32.35.36.3a 64HEXDIG ; "sha256:XXX...XXX"
-import-hashed = import-type [ whitespace hash ]
+import-hashed = import-type [ whsp hash ]
; "http://example.com"
; "./foo/bar"
; "env:FOO"
-import = import-hashed [ whitespace as nonempty-whitespace Text ]
-
-; NOTE: Every rule past this point should only reference rules that end with
-; whitespace. This ensures consistent handling of whitespace in the absence of
-; a separate lexing step.
-; The exception is the rules ending in , which should _not_ end in whitespace.
-; This is important to avoid the need for sequential backtracking in application-expression.
+import = import-hashed [ whsp as whsp1 Text ]
expression =
- lambda-expression
- / ifthenelse-expression
- / let-expression
- / forall-expression
+ ; "\(x : a) -> b"
+ lambda whsp "(" whsp nonreserved-label whsp ":" whsp1 expression whsp ")" whsp arrow whsp expression
+
+ ; "if a then b else c"
+ / if whsp1 expression whsp then whsp1 expression whsp else whsp1 expression
+
+ ; "let x : t = e1 in e2"
+ ; "let x = e1 in e2"
+ ; "let x = e1 let y = e2 in e3"
+ / 1*let-binding in whsp1 expression
+
+ ; "forall (x : a) -> b"
+ / forall whsp "(" whsp nonreserved-label whsp ":" whsp1 expression whsp ")" whsp arrow whsp expression
+
+ ; "a -> b"
+ ;
; NOTE: Backtrack if parsing this alternative fails
- / arrow-expression
- / merge-expression
+ / operator-expression whsp arrow whsp expression
+
+ ; "merge e1 e2 : t"
+ ; "merge e1 e2"
+ / merge whsp1 import-expression whsp import-expression [ whsp ":" whsp1 application-expression ]
+
+ ; "[] : List t"
+ ; "[] : Optional t"
+ ; "[x] : Optional t"
+ ;
; NOTE: Backtrack if parsing this alternative fails since we can't tell
; from the opening bracket whether or not this will be an empty list or
; a non-empty list
- / empty-list-or-optional
+ / "[" whsp (empty-collection / non-empty-optional)
+
+ ; "x : t"
/ annotated-expression
-; "\(x : a) -> b"
-lambda-expression = lambda whitespace "(" whitespace unreserved-label whitespace ":" nonempty-whitespace expression ")" whitespace arrow whitespace expression
-
-; "if a then b else c"
-ifthenelse-expression = if nonempty-whitespace expression then nonempty-whitespace expression else nonempty-whitespace expression
-
-; "let x : t = e1 in e2"
-; "let x = e1 in e2"
-; "let x = e1 let y = e2 in e3"
-let-expression = 1*let-binding in nonempty-whitespace expression
-let-binding = let nonempty-whitespace unreserved-label whitespace [ ":" nonempty-whitespace expression ] "=" whitespace expression
-
-; "forall (x : a) -> b"
-forall-expression = forall whitespace "(" whitespace unreserved-label whitespace ":" nonempty-whitespace expression ")" whitespace arrow whitespace expression
+; Nonempty-whitespace to disambiguate `env:VARIABLE` from type annotations
+annotated-expression = operator-expression [ whsp ":" whsp1 expression ]
-; "a -> b"
-arrow-expression = operator-expression arrow whitespace expression
+; "let x = e1"
+let-binding = let whsp1 nonreserved-label whsp [ ":" whsp1 expression whsp ] "=" whsp expression whsp
-; "merge e1 e2 : t"
-; "merge e1 e2"
-merge-expression = merge nonempty-whitespace import-expression whitespace import-expression whitespace [ ":" nonempty-whitespace application-expression ]
-
-; "[] : List t"
-; "[] : Optional t"
-; "[x] : Optional t"
-empty-list-or-optional = "[" whitespace (empty-collection / non-empty-optional)
-empty-collection = "]" whitespace ":" nonempty-whitespace (List whitespace / Optional whitespace) import-expression whitespace
-non-empty-optional = expression "]" whitespace ":" nonempty-whitespace Optional whitespace import-expression whitespace
-
-; "x : t"
-annotated-expression = operator-expression [ ":" nonempty-whitespace expression ]
+; "] : List t"
+; "] : Optional t"
+empty-collection = "]" whsp ":" whsp1 (List / Optional) whsp import-expression
+; "x] : Optional t"
+non-empty-optional = expression whsp "]" whsp ":" whsp1 Optional whsp import-expression
operator-expression = import-alt-expression
-import-alt-expression = or-expression *("?" nonempty-whitespace or-expression)
-or-expression = plus-expression *("||" whitespace plus-expression )
-plus-expression = text-append-expression *("+" nonempty-whitespace text-append-expression )
-text-append-expression = list-append-expression *("++" whitespace list-append-expression )
-list-append-expression = and-expression *("#" whitespace and-expression )
-and-expression = combine-expression *("&&" whitespace combine-expression )
-combine-expression = prefer-expression *(combine whitespace prefer-expression )
-prefer-expression = combine-types-expression *(prefer whitespace combine-types-expression)
-combine-types-expression = times-expression *(combine-types whitespace times-expression )
-times-expression = equal-expression *("*" whitespace equal-expression )
-equal-expression = not-equal-expression *("==" whitespace not-equal-expression )
-not-equal-expression = application-expression *("!=" whitespace application-expression )
+; Nonempty-whitespace to disambiguate `http://a/a?a`
+import-alt-expression = or-expression *(whsp "?" whsp1 or-expression)
+or-expression = plus-expression *(whsp "||" whsp plus-expression)
+; Nonempty-whitespace to disambiguate `f +2`
+plus-expression = text-append-expression *(whsp "+" whsp1 text-append-expression)
+text-append-expression = list-append-expression *(whsp "++" whsp list-append-expression)
+list-append-expression = and-expression *(whsp "#" whsp and-expression)
+and-expression = combine-expression *(whsp "&&" whsp combine-expression)
+combine-expression = prefer-expression *(whsp combine whsp prefer-expression)
+prefer-expression = combine-types-expression *(whsp prefer whsp combine-types-expression)
+combine-types-expression = times-expression *(whsp combine-types whsp times-expression)
+times-expression = equal-expression *(whsp "*" whsp equal-expression)
+equal-expression = not-equal-expression *(whsp "==" whsp not-equal-expression)
+not-equal-expression = application-expression *(whsp "!=" whsp application-expression)
; Import expressions need to be separated by some whitespace, otherwise there
; would be ambiguity: `./ab` could be interpreted as "import the file `./ab`",
; or "apply the import `./a` to label `b`"
application-expression =
- import-expression *(nonempty-whitespace import-expression) whitespace
+ [ Some whsp1 ] import-expression *(whsp1 import-expression)
-import-expression =
- import
- / selector-expression
+import-expression = import / selector-expression
; `record.field` extracts one field of a record
+;
; `record.{ field0, field1, field2 }` projects out several fields of a record
;
-; NOTE: Backtrack when parsing the `*(dot ...)`. The reason why is that you
+; NOTE: Backtrack when parsing the `*("." ...)`. The reason why is that you
; can't tell from parsing just the period whether "foo." will become "foo.bar"
; (i.e. accessing field `bar` of the record `foo`) or `foo./bar` (i.e. applying
; the function `foo` to the relative path `./bar`)
-selector-expression = primitive-expression *(whitespace "." whitespace selector)
+selector-expression = primitive-expression *(whsp "." whsp selector)
selector = any-label / labels
-labels = "{" whitespace [ any-label whitespace *("," whitespace any-label whitespace) ] "}"
-
-
-primitive-expression =
- literal-expression
- / "{" whitespace record-type-or-literal "}"
- / "<" whitespace union-type-or-literal ">"
- / non-empty-list-literal
- / parenthesized-expression
+labels = "{" whsp [ any-label whsp *("," whsp any-label whsp) ] "}"
; NOTE: Backtrack when parsing the first three alternatives (i.e. the numeric
; literals). This is because they share leading characters in common
-literal-expression =
+primitive-expression =
; "2.0"
double-literal
@@ -633,60 +640,65 @@ literal-expression =
; "+2"
/ integer-literal
- ; "-Infinity"
- / minus-infinity-literal
- ; "Infinity"
- / plus-infinity-literal
- ; "NaN"
- / NaN
-
; '"ABC"'
/ text-literal
+ ; "{ foo = 1 , bar = True }"
+ ; "{ foo : Integer, bar : Bool }"
+ / "{" whsp record-type-or-literal whsp "}"
+
+ ; "< Foo : Integer | Bar : Bool >"
+ ; "< Foo : Integer | Bar = True | Baz : Bool >"
+ ; "< Foo | Bar : Bool >"
+ / "<" whsp union-type-or-literal whsp ">"
+
+ ; "[1, 2, 3]"
+ ; `empty-collection` handles empty lists
+ / non-empty-list-literal
+
; "x"
; "x@2"
/ identifier
+
+ ; "( e )"
+ / "(" complete-expression ")"
-minus-infinity-literal = "-" Infinity
-plus-infinity-literal = Infinity
-
-; "{ foo = 1 , bar = True }"
-; "{ foo : Integer, bar : Bool }"
record-type-or-literal =
empty-record-literal
/ non-empty-record-type-or-literal
/ empty-record-type
-empty-record-literal = "=" whitespace
+
+empty-record-literal = "="
empty-record-type = ""
+
non-empty-record-type-or-literal =
- any-label whitespace (non-empty-record-literal / non-empty-record-type)
-non-empty-record-type = ":" nonempty-whitespace expression *("," whitespace record-type-entry)
-record-type-entry = any-label whitespace ":" nonempty-whitespace expression
-non-empty-record-literal = "=" whitespace expression *("," whitespace record-literal-entry)
-record-literal-entry = any-label whitespace "=" whitespace expression
-
-; "< Foo : Integer | Bar : Bool >"
-; "< Foo : Integer | Bar = True >"
+ any-label whsp (non-empty-record-literal / non-empty-record-type)
+
+non-empty-record-type = ":" whsp1 expression *(whsp "," whsp record-type-entry)
+record-type-entry = any-label whsp ":" whsp1 expression
+
+non-empty-record-literal = "=" whsp expression *(whsp "," whsp record-literal-entry)
+record-literal-entry = any-label whsp "=" whsp expression
+
union-type-or-literal =
non-empty-union-type-or-literal
/ empty-union-type
+
empty-union-type = ""
+
non-empty-union-type-or-literal =
- any-label whitespace
- ( "=" whitespace expression union-type-entries
- / ":" nonempty-whitespace expression [ "|" whitespace non-empty-union-type-or-literal ]
- )
-union-type-entries = *("|" whitespace union-type-entry)
-union-type-entry = any-label whitespace ":" nonempty-whitespace expression
+ any-label [ whsp ( union-literal-variant-value / union-type-or-literal-variant-type) ]
+
+; = True | ...
+union-literal-variant-value = "=" whsp expression *(whsp "|" whsp union-type-entry)
+union-type-entry = any-label whsp ":" whsp1 expression
-; "[1, 2, 3]"
-; `empty-list-or-optional` handles empty lists
-non-empty-list-literal = "[" whitespace expression *("," whitespace expression) "]"
+; : Integer | ...
+; | ...
+union-type-or-literal-variant-type = [ ":" whsp1 expression ] [ whsp "|" whsp non-empty-union-type-or-literal ]
-; "( e )"
-parenthesized-expression = "(" whitespace expression ")"
+non-empty-list-literal = "[" whsp expression whsp *("," whsp expression whsp) "]"
-; All expressions end with trailing whitespace. This just adds a final
-; whitespace prefix for the top-level of the program
-complete-expression = whitespace expression
+; This just adds surrounding whitespace for the top-level of the program
+complete-expression = whsp expression whsp
diff --git a/dhall_parser/src/dhall.pest.visibility b/dhall_parser/src/dhall.pest.visibility
index ee5ea2b..f881a50 100644
--- a/dhall_parser/src/dhall.pest.visibility
+++ b/dhall_parser/src/dhall.pest.visibility
@@ -1,5 +1,4 @@
-end_of_line
-# end_of_line_silent
+# end_of_line
# tab
# block_comment
# block_comment_chunk
@@ -7,91 +6,71 @@ end_of_line
# not_end_of_line
# line_comment
# whitespace_chunk
-# whitespace
-# whitespace_
-# nonempty_whitespace
+# whsp
+# whsp1
# ALPHA
# DIGIT
# HEXDIG
# simple_label_first_char
-# simple_label_next_other_char
# simple_label_next_char
-# simple_label_start
simple_label
# quoted_label_char
quoted_label
label
+nonreserved_label
# any_label
double_quote_chunk
double_quote_escaped
+double_quote_char
double_quote_literal
single_quote_continue
+escaped_quote_pair
+escaped_interpolation
+single_quote_char
single_quote_literal
+interpolation
# text_literal
-# if
-# then
-# else
-# let
-# in
-# as
-# using
-# merge
-missing
-# if_
+if_
# then
# else_
# let_
-# in_
+in_
# as_
# using
-# merge
+merge
+missing
# Infinity
+NaN
+Some
+# keyword
Optional
Text
List
-# equal
-# or
-# plus
-# text_append
-# list_append
-# and
-# times
-# double_equal
-# not_equal
-# dot
-# bar
-# comma
-# at
-# colon
-# import_alt
-# open_parens
-# close_parens
-# close_parens
-# open_brace
-# close_brace
-# close_brace
-# open_bracket
-# close_bracket
-# close_bracket
-# open_angle
-# close_angle
-# close_angle
# combine
# combine_types
# prefer
-# lambda
-# forall
-# arrow
+lambda
+forall
+arrow
# exponent
+numeric_double_literal
+minus_infinity_literal
+plus_infinity_literal
double_literal
natural_literal
integer_literal
identifier
# path_character
# quoted_path_character
+unquoted_path_component
+quoted_path_component
path_component
path
# local
+parent_path
+here_path
+home_path
+absolute_path
scheme
http_raw
authority
@@ -108,7 +87,6 @@ authority
# reg_name
# pchar
query
-fragment
# pct_encoded
# unreserved
# sub_delims
@@ -121,17 +99,11 @@ import_type
hash
import_hashed
import
-# expression
-lambda_expression
-ifthenelse_expression
-let_expression
-forall_expression
-arrow_expression
-merge_expression
-# empty_list_or_optional
+expression
+annotated_expression
+let_binding
empty_collection
non_empty_optional
-annotated_expression
# operator_expression
import_alt_expression
or_expression
@@ -146,22 +118,24 @@ times_expression
equal_expression
not_equal_expression
application_expression
-# atomic_expression
# import_expression
selector_expression
selector
labels
-# primitive_expression
-literal_expression
+primitive_expression
# record_type_or_literal
empty_record_literal
empty_record_type
non_empty_record_type_or_literal
non_empty_record_type
+record_type_entry
non_empty_record_literal
+record_literal_entry
union_type_or_literal
empty_union_type
non_empty_union_type_or_literal
+union_literal_variant_value
+union_type_entry
+union_type_or_literal_variant_type
non_empty_list_literal
-# parenthesized_expression
# complete_expression