use nom; use std::str::FromStr; #[derive(Debug, PartialEq, Eq)] pub enum Keyword { Let, In, If, Then, Else, } #[derive(Debug, PartialEq, Eq)] pub enum Builtin { Natural, NaturalFold, NaturalBuild, NaturalIsZero, NaturalEven, NaturalOdd, Integer, Double, Text, List, ListBuild, ListFold, ListLength, ListHead, ListLast, ListIndexed, ListReverse, Optional, OptionalFold, Bool, } #[derive(Debug, PartialEq, Eq)] pub enum Tok { Identifier(String), Keyword(Keyword), Builtin(Builtin), Bool(bool), Integer(isize), Natural(usize), // Symbols ParenL, ParenR, Arrow, Lambda, Pi, Combine, BoolAnd, BoolOr, CompareEQ, CompareNE, Append, Times, Plus, Dot, Ascription, Equals, } #[derive(Debug)] pub enum LexicalError { Error(usize, nom::simple_errors::Err), Incomplete(nom::Needed), } pub type Spanned = Result<(Loc, Tok, Loc), Error>; /* macro_rules! one_of_chars { ($c:expr, [$($cs:pat),*]) => { match $c { $($cs => true),*, _ => false, } } } fn is_symbol(c: char) -> bool { one_of_chars!(c, [ '!', '&', '(', ')', '*', '+', '-', '/', ':', '=', '>', '\\', '|', '∧', 'λ' ]) } named!(symbol<&str, &str>, take_while1_s!(is_symbol)); */ #[allow(dead_code)] fn is_identifier_first_char(c: char) -> bool { c.is_alphabetic() || c == '_' } fn is_identifier_rest_char(c: char) -> bool { c.is_alphabetic() || is_decimal(c) || c == '_' || c == '/' } fn is_decimal(c: char) -> bool { c.is_digit(10) } named!(identifier<&str, &str>, take_while1_s!(is_identifier_rest_char)); // FIXME use is_identifier_first_char named!(natural<&str, &str>, preceded!(tag!("+"), take_while1_s!(is_decimal))); named!(integral<&str, isize>, map_res!(take_while1_s!(is_decimal), |s| isize::from_str(s))); named!(integer<&str, isize>, alt!( preceded!(tag!("-"), map!(integral, |i: isize| -i)) | integral )); named!(boolean<&str, bool>, alt!( value!(true, tag!("True")) | value!(false, tag!("False")) )); named!(keyword<&str, Keyword>, alt!( value!(Keyword::Let, tag!("let")) | value!(Keyword::In, tag!("in")) | value!(Keyword::If, tag!("if")) | value!(Keyword::Then, tag!("then")) | value!(Keyword::Else, tag!("else")) )); named!(builtin<&str, Builtin>, alt!( value!(Builtin::NaturalFold, tag!("Natural/fold")) | value!(Builtin::NaturalBuild, tag!("Natural/build")) | value!(Builtin::NaturalIsZero, tag!("Natural/isZero")) | value!(Builtin::NaturalEven, tag!("Natural/even")) | value!(Builtin::NaturalOdd, tag!("Natural/odd")) | value!(Builtin::Natural, tag!("Natural")) | value!(Builtin::Integer, tag!("Integer")) | value!(Builtin::Double, tag!("Double")) | value!(Builtin::Text, tag!("Text")) | value!(Builtin::ListBuild, tag!("List/build")) | value!(Builtin::ListFold, tag!("List/fold")) | value!(Builtin::ListLength, tag!("List/length")) | value!(Builtin::ListHead, tag!("List/head")) | value!(Builtin::ListLast, tag!("List/last")) | value!(Builtin::ListIndexed, tag!("List/indexed")) | value!(Builtin::ListReverse, tag!("List/reverse")) | value!(Builtin::List, tag!("List")) | value!(Builtin::OptionalFold, tag!("Optional/fold")) | value!(Builtin::Optional, tag!("Optional")) | value!(Builtin::Bool, tag!("Bool")) )); named!(token<&str, Tok>, alt!( value!(Tok::Pi, tag!("forall")) | value!(Tok::Pi, tag!("∀")) | value!(Tok::Lambda, tag!("\\")) | value!(Tok::Lambda, tag!("λ")) | value!(Tok::Combine, tag!("/\\")) | value!(Tok::Combine, tag!("∧")) | value!(Tok::Arrow, tag!("->")) | value!(Tok::Arrow, tag!("→")) | map!(boolean, Tok::Bool) | map!(keyword, Tok::Keyword) | map!(builtin, Tok::Builtin) | map_opt!(natural, |s| usize::from_str(s).ok().map(|n| Tok::Natural(n))) | map!(integer, Tok::Integer) | map!(identifier, |s: &str| Tok::Identifier(s.to_owned())) | value!(Tok::ParenL, tag!("(")) | value!(Tok::ParenR, tag!(")")) | value!(Tok::BoolAnd, tag!("&&")) | value!(Tok::BoolOr, tag!("||")) | value!(Tok::CompareEQ, tag!("==")) | value!(Tok::CompareNE, tag!("!=")) | value!(Tok::Append, tag!("++")) | value!(Tok::Times, tag!("*")) | value!(Tok::Plus, tag!("+")) | value!(Tok::Dot, tag!(".")) | value!(Tok::Ascription, tag!(":")) | value!(Tok::Equals, tag!("=")) )); pub struct Lexer<'input> { input: &'input str, offset: usize, } impl<'input> Lexer<'input> { pub fn new(input: &'input str) -> Self { Lexer { input: input, offset: 0, } } fn current_input(&mut self) -> &'input str { &self.input[self.offset..] } fn skip_whitespace(&mut self) -> bool { let input = self.current_input(); let trimmed = input.trim_left(); let whitespace_len = input.len() - trimmed.len(); let skipped = whitespace_len > 0; if skipped { // println!("skipped {} whitespace bytes in {}..{}", whitespace_len, self.offset, self.offset + whitespace_len); self.offset += whitespace_len; } skipped } fn skip_comments(&mut self) -> bool { let input = self.current_input(); if !input.is_char_boundary(0) || !input.is_char_boundary(2) { return false; } let skip = match &input[0..2] { "{-" => { if let Some(i) = input.find("-}") { // println!("skipped {} bytes of block comment", i + 2); i + 2 } else { 0 } } "--" => { if let Some(i) = input.find("\n") { // FIXME Find CRLF too // println!("skipped {} bytes of line comment", i + 1); i + 1 } else { 0 } } _ => 0, }; self.offset += skip; skip != 0 } fn skip_comments_and_whitespace(&mut self) { while self.skip_whitespace() || self.skip_comments() {} } } impl<'input> Iterator for Lexer<'input> { type Item = Spanned; fn next(&mut self) -> Option { use nom::IResult::*; self.skip_comments_and_whitespace(); let input = self.current_input(); if input.len() == 0 { return None; } match token(input) { Done(rest, t) => { let parsed_len = input.len() - rest.len(); //println!("parsed {} bytes => {:?}", parsed_len, t); let start = self.offset; self.offset += parsed_len; Some(Ok((start, t, self.offset))) } Error(e) => { let offset = self.offset; self.offset = self.input.len(); Some(Err(LexicalError::Error(offset, e))) } Incomplete(needed) => { Some(Err(LexicalError::Incomplete(needed))) } } } } #[test] fn test_lex() { use self::Tok::*; let s = "λ(b : Bool) → b == False"; let expected = [Lambda, ParenL, Identifier("b".to_owned()), Ascription, Builtin(self::Builtin::Bool), ParenR, Arrow, Identifier("b".to_owned()), CompareEQ, Bool(false)]; let lexer = Lexer::new(s); let tokens = lexer.map(|r| r.unwrap().1).collect::>(); assert_eq!(&tokens, &expected); }