diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core.rs | 222 | ||||
-rw-r--r-- | src/grammar.lalrpop | 94 | ||||
-rw-r--r-- | src/grammar_util.rs | 8 | ||||
-rw-r--r-- | src/lexer.rs | 240 | ||||
-rw-r--r-- | src/main.rs | 31 | ||||
-rw-r--r-- | src/parser.rs | 34 |
6 files changed, 629 insertions, 0 deletions
diff --git a/src/core.rs b/src/core.rs new file mode 100644 index 0000000..8e59141 --- /dev/null +++ b/src/core.rs @@ -0,0 +1,222 @@ +use std::collections::HashMap; +use std::path::PathBuf; + +/* +module Dhall.Core ( + -- * Syntax + Const(..) + , Path(..) + , Var(..) + , Expr(..) + + -- * Normalization + , normalize + , subst + , shift + + -- * Pretty-printing + , pretty + + -- * Miscellaneous + , internalError + ) where +*/ + +/// Constants for a pure type system +/// +/// The only axiom is: +/// +/// ```c +/// ⊦ Type : Kind +/// ``` +/// +/// ... and the valid rule pairs are: +/// +/// ```c +/// ⊦ Type ↝ Type : Type -- Functions from terms to terms (ordinary functions) +/// ⊦ Kind ↝ Type : Type -- Functions from types to terms (polymorphic functions) +/// ⊦ Kind ↝ Kind : Kind -- Functions from types to types (type constructors) +/// ``` +/// +/// These are the same rule pairs as System Fω +/// +/// Note that Dhall does not support functions from terms to types and therefore +/// Dhall is not a dependently typed language +/// +#[derive(Debug, PartialEq, Eq)] // (Show, Bounded, Enum) +pub enum Const { + Type, + Kind, +} + + +/// Path to an external resource +#[derive(Debug, PartialEq, Eq)] // (Eq, Ord, Show) +pub enum Path { + File(PathBuf), + URL(String), +} + +/// Label for a bound variable +/// +/// The `String` field is the variable's name (i.e. \"`x`\"). +/// +/// The `Int` field disambiguates variables with the same name if there are +/// multiple bound variables of the same name in scope. Zero refers to the +/// nearest bound variable and the index increases by one for each bound +/// variable of the same name going outward. The following diagram may help: +/// +/// ```c +/// +---refers to--+ +/// | | +/// v | +/// \(x : Type) -> \(y : Type) -> \(x : Type) -> x@0 +/// +/// +------------------refers to-----------------+ +/// | | +/// v | +/// \(x : Type) -> \(y : Type) -> \(x : Type) -> x@1 +/// ``` +/// +/// This `Int` behaves like a De Bruijn index in the special case where all +/// variables have the same name. +/// +/// You can optionally omit the index if it is `0`: +/// +/// ```c +/// +refers to+ +/// | | +/// v | +/// \(x : *) -> \(y : *) -> \(x : *) -> x +/// ``` +/// +/// Zero indices are omitted when pretty-printing `Var`s and non-zero indices +/// appear as a numeric suffix. +/// +#[derive(Debug, PartialEq, Eq)] // (Eq, Show) +pub struct Var(pub String, pub Int); + +/* +instance IsString Var where + fromString str = V (fromString str) 0 + +instance Buildable Var where + build = buildVar +*/ + +/// Syntax tree for expressions +#[derive(Debug, PartialEq)] // (Functor, Foldable, Traversable, Show) +pub enum Expr<S, A> { + /// `Const c ~ c` + Const(Const), + /// `Var (V x 0) ~ x`<br> + /// `Var (V x n) ~ x@n` + Var(Var), + /// `Lam x A b ~ λ(x : A) -> b` + Lam(String, Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `Pi "_" A B ~ A -> B` + /// `Pi x A B ~ ∀(x : A) -> B` + Pi(String, Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `App f A ~ f A` + App(Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `Let x Nothing r e ~ let x = r in e` + /// `Let x (Just t) r e ~ let x : t = r in e` + Let(String, Option<Box<Expr<S, A>>>, Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `Annot x t ~ x : t` + Annot(Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `Bool ~ Bool` + Bool, + /// `BoolLit b ~ b` + BoolLit(bool), + /// `BoolAnd x y ~ x && y` + BoolAnd(Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `BoolOr x y ~ x || y` + BoolOr(Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `BoolEQ x y ~ x == y` + BoolEQ(Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `BoolNE x y ~ x != y` + BoolNE(Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `BoolIf x y z ~ if x then y else z` + BoolIf(Box<Expr<S, A>>, Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `Natural ~ Natural` + Natural, + /// `NaturalLit n ~ +n` + NaturalLit(Natural), + /// `NaturalFold ~ Natural/fold` + NaturalFold, + /// `NaturalBuild ~ Natural/build` + NaturalBuild, + /// `NaturalIsZero ~ Natural/isZero` + NaturalIsZero, + /// `NaturalEven ~ Natural/even` + NaturalEven, + /// `NaturalOdd ~ Natural/odd` + NaturalOdd, + /// `NaturalPlus x y ~ x + y` + NaturalPlus(Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `NaturalTimes x y ~ x * y` + NaturalTimes(Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `Integer ~ Integer` + Integer, + /// `IntegerLit n ~ n` + IntegerLit(Integer), + /// `Double ~ Double` + Double, + /// `DoubleLit n ~ n` + DoubleLit(Double), + /// `Text ~ Text` + Text, + /// `TextLit t ~ t` + TextLit(Builder), + /// `TextAppend x y ~ x ++ y` + TextAppend(Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `List ~ List` + List, + /// `ListLit t [x, y, z] ~ [x, y, z] : List t` + ListLit(Box<Expr<S, A>>, Vec<Expr<S, A>>), + /// `ListBuild ~ List/build` + ListBuild, + /// `ListFold ~ List/fold` + ListFold, + /// `ListLength ~ List/length` + ListLength, + /// `ListHead ~ List/head` + ListHead, + /// `ListLast ~ List/last` + ListLast, + /// `ListIndexed ~ List/indexed` + ListIndexed, + /// `ListReverse ~ List/reverse` + ListReverse, + /// `Optional ~ Optional` + Optional, + /// `OptionalLit t [e] ~ [e] : Optional t` + /// `OptionalLit t [] ~ [] : Optional t` + OptionalLit(Box<Expr<S, A>>, Vec<Expr<S, A>>), + /// `OptionalFold ~ Optional/fold` + OptionalFold, + /// `Record [(k1, t1), (k2, t2)] ~ { k1 : t1, k2 : t1 }` + Record(HashMap<String, Expr<S, A>>), + /// `RecordLit [(k1, v1), (k2, v2)] ~ { k1 = v1, k2 = v2 }` + RecordLit(HashMap<String, Expr<S, A>>), + /// `Union [(k1, t1), (k2, t2)] ~ < k1 : t1, k2 : t2 >` + Union(HashMap<String, Expr<S, A>>), + /// `UnionLit (k1, v1) [(k2, t2), (k3, t3)] ~ < k1 = t1, k2 : t2, k3 : t3 >` + UnionLit(String, Box<Expr<S, A>>, HashMap<String, Expr<S, A>>), + /// `Combine x y ~ x ∧ y` + Combine(Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `Merge x y t ~ merge x y : t` + Merge(Box<Expr<S, A>>, Box<Expr<S, A>>, Box<Expr<S, A>>), + /// `Field e x ~ e.x` + Field(Box<Expr<S, A>>, String), + /// `Note S x ~ e` + Note(S, Box<Expr<S, A>>), + /// `Embed path ~ path` + Embed(A), +} + +pub type Builder = String; +pub type Double = f64; +pub type Int = isize; +pub type Integer = isize; +pub type Natural = usize; diff --git a/src/grammar.lalrpop b/src/grammar.lalrpop new file mode 100644 index 0000000..e940da9 --- /dev/null +++ b/src/grammar.lalrpop @@ -0,0 +1,94 @@ +use core; +use core::Expr::*; +use grammar_util::*; +use lexer::{Keyword, LexicalError, Tok}; + +grammar; + +extern { + type Location = usize; + type Error = LexicalError; + + enum Tok { + Pi => Tok::Pi, + Lambda => Tok::Lambda, + Combine => Tok::Combine, + "->" => Tok::Arrow, + + Int => Tok::Integer(<isize>), + Nat => Tok::Natural(<usize>), + Bool => Tok::Bool(<bool>), + Label => Tok::Identifier(<String>), + Reserved => Tok::Reserved(<Keyword>), + + "(" => Tok::ParenL, + ")" => Tok::ParenR, + "&&" => Tok::BoolAnd, + "||" => Tok::BoolOr, + "==" => Tok::CompareEQ, + "!=" => Tok::CompareNE, + "++" => Tok::Append, + "*" => Tok::Times, + "+" => Tok::Plus, + "." => Tok::Dot, + ":" => Tok::Ascription, + "=" => Tok::Equals, + } +} + +pub Expr: BoxExpr = { // exprA + <ExprB> ":" <Expr> => bx(Annot(<>)), + ExprB, +}; + +ExprB: BoxExpr = { + Lambda "(" <Label> ":" <Expr> ")" "->" <ExprB> => bx(Lam(<>)), + ExprC0, +}; + +BoolOr: ExprOpFn = { "||" => BoolOr }; +NaturalPlus: ExprOpFn = { "+" => NaturalPlus }; +TextAppend: ExprOpFn = { "++" => TextAppend }; +BoolAnd: ExprOpFn = { "&&" => BoolAnd }; +CombineOp: ExprOpFn = { Combine => Combine }; +NaturalTimes: ExprOpFn = { "*" => NaturalTimes }; +BoolEQ: ExprOpFn = { "==" => BoolEQ }; +BoolNE: ExprOpFn = { "!=" => BoolNE }; + +Tier<NextTier, Op>: BoxExpr = { + <a:NextTier> <f:Op> <b:Tier<NextTier, Op>> => bx(f(a, b)), + // <b:Tier<NextTier, Op>> <f:Op> <a:NextTier> => bx(f(a, b)), + NextTier, +}; + +ExprC0 = Tier<ExprC1, BoolOr>; +ExprC1 = Tier<ExprC2, NaturalPlus>; +ExprC2 = Tier<ExprC3, TextAppend>; +ExprC3 = Tier<ExprC4, BoolAnd>; +ExprC4 = Tier<ExprC5, CombineOp>; +ExprC5 = Tier<ExprC6, NaturalTimes>; +ExprC6 = Tier<ExprC7, BoolEQ>; +ExprC7 = Tier<ExprD, BoolNE>; + +ExprD: BoxExpr = { + <v:(ExprE)+> => { + let mut it = v.into_iter(); + let f = it.next().unwrap(); + it.fold(f, |f, x| bx(App(f, x))) + } +}; + +ExprE: BoxExpr = { + <a:ExprF> <fields:("." <Label>)*> => { + fields.into_iter().fold(a, |x, f| bx(Field(x, f))) + }, +}; + +ExprF: BoxExpr = { + Nat => bx(NaturalLit(<>)), + Int => bx(IntegerLit(<>)), + Label => bx(Var(core::Var(<>, 0))), // FIXME support var@n syntax + Reserved => bx(Bool), // FIXME + Bool => bx(BoolLit(<>)), + "(" <Expr> ")", +}; diff --git a/src/grammar_util.rs b/src/grammar_util.rs new file mode 100644 index 0000000..cbd1e59 --- /dev/null +++ b/src/grammar_util.rs @@ -0,0 +1,8 @@ +use core::Expr; + +pub type BoxExpr = Box<Expr<(), ()>>; +pub type ExprOpFn = fn(BoxExpr, BoxExpr) -> Expr<(), ()>; + +pub fn bx<T>(x: T) -> Box<T> { + Box::new(x) +} diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..f141f81 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,240 @@ +use nom; + +use std::str::FromStr; + +#[derive(Debug, PartialEq, Eq)] +pub enum Keyword { + Natural, + NaturalFold, + NaturalBuild, + NaturalIsZero, + NaturalEven, + NaturalOdd, + Integer, + Double, + Text, + List, + ListBuild, + ListFold, + ListLength, + ListHead, + ListLast, + ListIndexed, + ListReverse, + Optional, + OptionalFold, + Bool, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum Tok { + Identifier(String), + Reserved(Keyword), + Bool(bool), + Integer(isize), + Natural(usize), + + // Symbols + ParenL, + ParenR, + Arrow, + Lambda, + Pi, + Combine, + BoolAnd, + BoolOr, + CompareEQ, + CompareNE, + Append, + Times, + Plus, + Dot, + Ascription, + Equals, +} + +#[derive(Debug)] +pub enum LexicalError { + Error(nom::simple_errors::Err<u32>), + Incomplete(nom::Needed), +} + +pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>; + +/* +macro_rules! one_of_chars { + ($c:expr, [$($cs:pat),*]) => { + match $c { + $($cs => true),*, + _ => false, + } + } +} + +fn is_symbol(c: char) -> bool { + one_of_chars!(c, [ + '!', + '&', + '(', + ')', + '*', + '+', + '-', + '/', + ':', + '=', + '>', + '\\', + '|', + '∧', + 'λ' + ]) +} +named!(symbol<&str, &str>, take_while1_s!(is_symbol)); +*/ + +fn is_decimal(c: char) -> bool { + c.is_digit(10) +} + +named!(identifier<&str, &str>, take_while1_s!(char::is_alphabetic)); // FIXME [A-Za-z_][A-Za-z0-9_/]* +named!(natural<&str, &str>, preceded!(tag!("+"), take_while1_s!(is_decimal))); +named!(integral<&str, isize>, map_res!(take_while1_s!(is_decimal), |s| isize::from_str(s))); +named!(integer<&str, isize>, alt!( + preceded!(tag!("-"), map!(integral, |i: isize| -i)) | + integral +)); +named!(boolean<&str, bool>, alt!( + value!(true, tag!("True")) | + value!(false, tag!("False")) +)); + +named!(keyword<&str, Keyword>, alt!( + value!(Keyword::Natural, tag!("Natural")) | + value!(Keyword::NaturalFold, tag!("Natural/fold")) | + value!(Keyword::NaturalBuild, tag!("Natural/build")) | + value!(Keyword::NaturalIsZero, tag!("Natural/isZero")) | + value!(Keyword::NaturalEven, tag!("Natural/even")) | + value!(Keyword::NaturalOdd, tag!("Natural/odd")) | + value!(Keyword::Integer, tag!("Integer")) | + value!(Keyword::Double, tag!("Double")) | + value!(Keyword::Text, tag!("Text")) | + value!(Keyword::List, tag!("List")) | + value!(Keyword::ListBuild, tag!("List/build")) | + value!(Keyword::ListFold, tag!("List/fold")) | + value!(Keyword::ListLength, tag!("List/length")) | + value!(Keyword::ListHead, tag!("List/head")) | + value!(Keyword::ListLast, tag!("List/last")) | + value!(Keyword::ListIndexed, tag!("List/indexed")) | + value!(Keyword::ListReverse, tag!("List/reverse")) | + value!(Keyword::Optional, tag!("Optional")) | + value!(Keyword::OptionalFold, tag!("Optional/fold")) | + value!(Keyword::Bool, tag!("Bool")) +)); + +named!(token<&str, Tok>, alt!( + value!(Tok::Pi, tag!("forall")) | + value!(Tok::Pi, tag!("∀")) | + value!(Tok::Lambda, tag!("\\")) | + value!(Tok::Lambda, tag!("λ")) | + value!(Tok::Combine, tag!("/\\")) | + value!(Tok::Combine, tag!("∧")) | + value!(Tok::Arrow, tag!("->")) | + value!(Tok::Arrow, tag!("→")) | + + map!(boolean, Tok::Bool) | + map!(keyword, Tok::Reserved) | + map_opt!(natural, |s| usize::from_str(s).ok().map(|n| Tok::Natural(n))) | + map!(integer, Tok::Integer) | + map!(identifier, |s: &str| Tok::Identifier(s.to_owned())) | + + value!(Tok::ParenL, tag!("(")) | + value!(Tok::ParenR, tag!(")")) | + value!(Tok::BoolAnd, tag!("&&")) | + value!(Tok::BoolOr, tag!("||")) | + value!(Tok::CompareEQ, tag!("==")) | + value!(Tok::CompareNE, tag!("!=")) | + value!(Tok::Append, tag!("++")) | + value!(Tok::Times, tag!("*")) | + value!(Tok::Plus, tag!("+")) | + value!(Tok::Dot, tag!(".")) | + value!(Tok::Ascription, tag!(":")) | + value!(Tok::Equals, tag!("=")) +)); + +pub struct Lexer<'input> { + input: &'input str, + offset: usize, +} + +impl<'input> Lexer<'input> { + pub fn new(input: &'input str) -> Self { + Lexer { + input: input, + offset: 0, + } + } + + fn current_input(&mut self) -> &'input str { + &self.input[self.offset..] + } + + fn skip_whitespace(&mut self) { + let input = self.current_input(); + let trimmed = input.trim_left(); + let whitespace_len = input.len() - trimmed.len(); + if whitespace_len > 0 { + //println!("skipped {} whitespace bytes", whitespace_len); + self.offset += whitespace_len; + } + } +} + +impl<'input> Iterator for Lexer<'input> { + type Item = Spanned<Tok, usize, LexicalError>; + + fn next(&mut self) -> Option<Self::Item> { + if self.offset >= self.input.len() { + return None; + } + + use nom::IResult::*; + self.skip_whitespace(); + let input = self.current_input(); + match token(input) { + Done(rest, t) => { + let parsed_len = input.len() - rest.len(); + //println!("parsed {} bytes => {:?}", parsed_len, t); + let start = self.offset; + self.offset += parsed_len; + Some(Ok((start, t, self.offset))) + } + Error(e) => { + self.offset = self.input.len(); + Some(Err(LexicalError::Error(e))) + } + Incomplete(needed) => { + Some(Err(LexicalError::Incomplete(needed))) + } + } + } +} + +#[test] +fn test_lex() { + use self::Tok::*; + let s = "λ(b : Bool) → b == False"; + let expected = [Lambda, + ParenL, + Identifier("b".to_owned()), + Ascription, + Reserved(Keyword::Bool), + ParenR, + Arrow, + Identifier("b".to_owned()), + CompareEQ, + Bool(false)]; + let lexer = Lexer::new(s); + let tokens = lexer.map(|r| r.unwrap().1).collect::<Vec<_>>(); + assert_eq!(&tokens, &expected); +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..aebbad9 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,31 @@ +extern crate lalrpop_util; +#[macro_use] +extern crate nom; + +mod core; +pub use core::*; +pub mod grammar; +mod grammar_util; +pub mod lexer; +pub mod parser; + +fn main() { + println!("Hello, world!"); + + /* + inText <- Data.Text.Lazy.IO.getContents + + expr <- case exprFromText (Directed "(stdin)" 0 0 0 0) inText of + Left err -> Control.Exception.throwIO err + Right expr -> return expr + + expr' <- load expr + + typeExpr <- case Dhall.TypeCheck.typeOf expr' of + Left err -> Control.Exception.throwIO err + Right typeExpr -> return typeExpr + Data.Text.Lazy.IO.hPutStrLn stderr (pretty (normalize typeExpr)) + Data.Text.Lazy.IO.hPutStrLn stderr mempty + Data.Text.Lazy.IO.putStrLn (pretty (normalize expr')) ) + */ +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..1561230 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,34 @@ +use lalrpop_util; + +use grammar; +use grammar_util::BoxExpr; +use lexer::{Lexer, LexicalError, Tok}; + +pub type ParseError = lalrpop_util::ParseError<usize, Tok, LexicalError>; + +fn parse_expr(s: &str) -> Result<BoxExpr, ParseError> { + grammar::parse_Expr(Lexer::new(s)) +} + +#[test] +fn test_parse() { + use core::Expr::*; + println!("test {:?}", parse_expr("+3 + +5 * +10")); + assert!(parse_expr("22").is_ok()); + assert!(parse_expr("(22)").is_ok()); + assert_eq!(parse_expr("+3 + +5 * +10").ok(), + Some(Box::new(NaturalPlus(Box::new(NaturalLit(3)), + Box::new(NaturalTimes(Box::new(NaturalLit(5)), + Box::new(NaturalLit(10)))))))); + // The original parser is apparently right-associative + assert_eq!(parse_expr("+2 * +3 * +4").ok(), + Some(Box::new(NaturalTimes(Box::new(NaturalLit(2)), + Box::new(NaturalTimes(Box::new(NaturalLit(3)), + Box::new(NaturalLit(4)))))))); + assert!(parse_expr("((((22))))").is_ok()); + assert!(parse_expr("((22)").is_err()); + println!("{:?}", parse_expr("\\(b : Bool) -> b == False")); + assert!(parse_expr("\\(b : Bool) -> b == False").is_ok()); + println!("{:?}", parse_expr("foo.bar")); + assert!(parse_expr("foo.bar").is_ok()); +} |