From d6240a4b184cac1fbf61ab35f9f3813efb780d31 Mon Sep 17 00:00:00 2001 From: NanoTech Date: Tue, 6 Dec 2016 09:10:15 +0000 Subject: Initial commit --- src/lexer.rs | 240 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 src/lexer.rs (limited to 'src/lexer.rs') diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..f141f81 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,240 @@ +use nom; + +use std::str::FromStr; + +#[derive(Debug, PartialEq, Eq)] +pub enum Keyword { + Natural, + NaturalFold, + NaturalBuild, + NaturalIsZero, + NaturalEven, + NaturalOdd, + Integer, + Double, + Text, + List, + ListBuild, + ListFold, + ListLength, + ListHead, + ListLast, + ListIndexed, + ListReverse, + Optional, + OptionalFold, + Bool, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum Tok { + Identifier(String), + Reserved(Keyword), + Bool(bool), + Integer(isize), + Natural(usize), + + // Symbols + ParenL, + ParenR, + Arrow, + Lambda, + Pi, + Combine, + BoolAnd, + BoolOr, + CompareEQ, + CompareNE, + Append, + Times, + Plus, + Dot, + Ascription, + Equals, +} + +#[derive(Debug)] +pub enum LexicalError { + Error(nom::simple_errors::Err), + Incomplete(nom::Needed), +} + +pub type Spanned = Result<(Loc, Tok, Loc), Error>; + +/* +macro_rules! one_of_chars { + ($c:expr, [$($cs:pat),*]) => { + match $c { + $($cs => true),*, + _ => false, + } + } +} + +fn is_symbol(c: char) -> bool { + one_of_chars!(c, [ + '!', + '&', + '(', + ')', + '*', + '+', + '-', + '/', + ':', + '=', + '>', + '\\', + '|', + '∧', + 'λ' + ]) +} +named!(symbol<&str, &str>, take_while1_s!(is_symbol)); +*/ + +fn is_decimal(c: char) -> bool { + c.is_digit(10) +} + +named!(identifier<&str, &str>, take_while1_s!(char::is_alphabetic)); // FIXME [A-Za-z_][A-Za-z0-9_/]* +named!(natural<&str, &str>, preceded!(tag!("+"), take_while1_s!(is_decimal))); +named!(integral<&str, isize>, map_res!(take_while1_s!(is_decimal), |s| isize::from_str(s))); +named!(integer<&str, isize>, alt!( + preceded!(tag!("-"), map!(integral, |i: isize| -i)) | + integral +)); +named!(boolean<&str, bool>, alt!( + value!(true, tag!("True")) | + value!(false, tag!("False")) +)); + +named!(keyword<&str, Keyword>, alt!( + value!(Keyword::Natural, tag!("Natural")) | + value!(Keyword::NaturalFold, tag!("Natural/fold")) | + value!(Keyword::NaturalBuild, tag!("Natural/build")) | + value!(Keyword::NaturalIsZero, tag!("Natural/isZero")) | + value!(Keyword::NaturalEven, tag!("Natural/even")) | + value!(Keyword::NaturalOdd, tag!("Natural/odd")) | + value!(Keyword::Integer, tag!("Integer")) | + value!(Keyword::Double, tag!("Double")) | + value!(Keyword::Text, tag!("Text")) | + value!(Keyword::List, tag!("List")) | + value!(Keyword::ListBuild, tag!("List/build")) | + value!(Keyword::ListFold, tag!("List/fold")) | + value!(Keyword::ListLength, tag!("List/length")) | + value!(Keyword::ListHead, tag!("List/head")) | + value!(Keyword::ListLast, tag!("List/last")) | + value!(Keyword::ListIndexed, tag!("List/indexed")) | + value!(Keyword::ListReverse, tag!("List/reverse")) | + value!(Keyword::Optional, tag!("Optional")) | + value!(Keyword::OptionalFold, tag!("Optional/fold")) | + value!(Keyword::Bool, tag!("Bool")) +)); + +named!(token<&str, Tok>, alt!( + value!(Tok::Pi, tag!("forall")) | + value!(Tok::Pi, tag!("∀")) | + value!(Tok::Lambda, tag!("\\")) | + value!(Tok::Lambda, tag!("λ")) | + value!(Tok::Combine, tag!("/\\")) | + value!(Tok::Combine, tag!("∧")) | + value!(Tok::Arrow, tag!("->")) | + value!(Tok::Arrow, tag!("→")) | + + map!(boolean, Tok::Bool) | + map!(keyword, Tok::Reserved) | + map_opt!(natural, |s| usize::from_str(s).ok().map(|n| Tok::Natural(n))) | + map!(integer, Tok::Integer) | + map!(identifier, |s: &str| Tok::Identifier(s.to_owned())) | + + value!(Tok::ParenL, tag!("(")) | + value!(Tok::ParenR, tag!(")")) | + value!(Tok::BoolAnd, tag!("&&")) | + value!(Tok::BoolOr, tag!("||")) | + value!(Tok::CompareEQ, tag!("==")) | + value!(Tok::CompareNE, tag!("!=")) | + value!(Tok::Append, tag!("++")) | + value!(Tok::Times, tag!("*")) | + value!(Tok::Plus, tag!("+")) | + value!(Tok::Dot, tag!(".")) | + value!(Tok::Ascription, tag!(":")) | + value!(Tok::Equals, tag!("=")) +)); + +pub struct Lexer<'input> { + input: &'input str, + offset: usize, +} + +impl<'input> Lexer<'input> { + pub fn new(input: &'input str) -> Self { + Lexer { + input: input, + offset: 0, + } + } + + fn current_input(&mut self) -> &'input str { + &self.input[self.offset..] + } + + fn skip_whitespace(&mut self) { + let input = self.current_input(); + let trimmed = input.trim_left(); + let whitespace_len = input.len() - trimmed.len(); + if whitespace_len > 0 { + //println!("skipped {} whitespace bytes", whitespace_len); + self.offset += whitespace_len; + } + } +} + +impl<'input> Iterator for Lexer<'input> { + type Item = Spanned; + + fn next(&mut self) -> Option { + if self.offset >= self.input.len() { + return None; + } + + use nom::IResult::*; + self.skip_whitespace(); + let input = self.current_input(); + match token(input) { + Done(rest, t) => { + let parsed_len = input.len() - rest.len(); + //println!("parsed {} bytes => {:?}", parsed_len, t); + let start = self.offset; + self.offset += parsed_len; + Some(Ok((start, t, self.offset))) + } + Error(e) => { + self.offset = self.input.len(); + Some(Err(LexicalError::Error(e))) + } + Incomplete(needed) => { + Some(Err(LexicalError::Incomplete(needed))) + } + } + } +} + +#[test] +fn test_lex() { + use self::Tok::*; + let s = "λ(b : Bool) → b == False"; + let expected = [Lambda, + ParenL, + Identifier("b".to_owned()), + Ascription, + Reserved(Keyword::Bool), + ParenR, + Arrow, + Identifier("b".to_owned()), + CompareEQ, + Bool(false)]; + let lexer = Lexer::new(s); + let tokens = lexer.map(|r| r.unwrap().1).collect::>(); + assert_eq!(&tokens, &expected); +} -- cgit v1.2.3