From e5d9aee00b0c775df1d8e2d8819aeb80dffa73c2 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Fri, 1 Mar 2019 17:28:19 +0100 Subject: Split abnf_to_pest and dhall into their own crates --- src/lexer.rs | 378 ----------------------------------------------------------- 1 file changed, 378 deletions(-) delete mode 100644 src/lexer.rs (limited to 'src/lexer.rs') diff --git a/src/lexer.rs b/src/lexer.rs deleted file mode 100644 index 5b4dcaa..0000000 --- a/src/lexer.rs +++ /dev/null @@ -1,378 +0,0 @@ -use nom::*; - -use crate::core::Const; -use crate::core::BuiltinType; -use crate::core::BuiltinType::*; -use crate::core::BuiltinValue; -use crate::core::BuiltinValue::*; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Keyword { - Let, - In, - If, - Then, - Else, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum ListLike { - List, - Optional, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Builtin { - Type(BuiltinType), - Value(BuiltinValue), -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Tok<'i> { - Identifier(&'i str), - Keyword(Keyword), - Builtin(Builtin), - ListLike(ListLike), - Const(Const), - Bool(bool), - Integer(isize), - Natural(usize), - Text(String), - - // Symbols - BraceL, - BraceR, - BracketL, - BracketR, - ParenL, - ParenR, - Arrow, - Lambda, - Pi, - Combine, - BoolAnd, - BoolOr, - CompareEQ, - CompareNE, - Append, - Times, - Plus, - Comma, - Dot, - Ascription, - Equals, -} - -#[derive(Debug)] -pub enum LexicalError { - Error(usize, nom::simple_errors::Err), - Incomplete(nom::Needed), -} - -pub type Spanned = Result<(Loc, Tok, Loc), Error>; - -#[allow(dead_code)] -fn is_identifier_first_char(c: char) -> bool { - c.is_alphabetic() || c == '_' -} - -fn is_identifier_rest_char(c: char) -> bool { - is_identifier_first_char(c) || c.is_digit(10) || c == '/' -} - -macro_rules! digits { - ($i:expr, $t:tt, $radix:expr) => {{ - let r: nom::IResult<&str, $t> = - map_res!($i, take_while1_s!(call!(|c: char| c.is_digit($radix))), - |s| $t::from_str_radix(s, $radix)); - r - }} -} - -named!(natural<&str, usize>, digits!(usize, 10)); -named!(integral<&str, isize>, digits!(isize, 10)); -named!(integer<&str, isize>, alt!( - preceded!(tag!("-"), map!(integral, |i: isize| -i)) | - preceded!(tag!("+"), integral) -)); -named!(boolean<&str, bool>, alt!( - value!(true, tag!("True")) | - value!(false, tag!("False")) -)); - -named!(identifier<&str, &str>, recognize!(preceded!( - take_while1_s!(is_identifier_first_char), - take_while_s!(is_identifier_rest_char)) -)); - -/// Parse an identifier, ensuring a whole identifier is parsed and not just a prefix. -macro_rules! ident_tag { - ($i:expr, $tag:expr) => { - match identifier($i) { - nom::IResult::Done(i, s) => { - if s == $tag { - nom::IResult::Done(i, s) - } else { - nom::IResult::Error(error_position!(nom::ErrorKind::Tag, $i)) - } - } - r => r, - } - } -} - -fn string_escape_single(c: char) -> Option<&'static str> { - match c { - 'n' => Some("\n"), - 'r' => Some("\r"), - 't' => Some("\t"), - '"' => Some("\""), - '\'' => Some("'"), - '\\' => Some("\\"), - '0' => Some("\0"), - 'a' => Some("\x07"), - 'b' => Some("\x08"), - 'f' => Some("\x0c"), - 'v' => Some("\x0b"), - '&' => Some(""), - _ => None, - } -} - -named!(string_escape_numeric<&str, char>, map_opt!(alt!( - preceded!(tag!("x"), digits!(u32, 16)) | - preceded!(tag!("o"), digits!(u32, 8)) | - digits!(u32, 10) -), ::std::char::from_u32)); - -fn string_lit_inner(input: &str) -> nom::IResult<&str, String> { - use nom::IResult::*;; - use nom::ErrorKind; - let mut s = String::new(); - let mut cs = input.char_indices().peekable(); - while let Some((i, c)) = cs.next() { - match c { - '"' => return nom::IResult::Done(&input[i..], s), - '\\' => match cs.next() { - Some((_, s)) if s.is_whitespace() => { - while cs.peek().map(|&(_, s)| s.is_whitespace()) == Some(true) { - let _ = cs.next(); - } - if cs.next().map(|p| p.1) != Some('\\') { - return Error(error_position!(ErrorKind::Custom(4 /* FIXME */), input)); - } - } - Some((j, ec)) => { - if let Some(esc) = string_escape_single(ec) { - s.push_str(esc); - // FIXME Named ASCII escapes and control character escapes - } else { - match string_escape_numeric(&input[j..]) { - Done(rest, esc) => { - let &(k, _) = cs.peek().unwrap(); - // digits are always single byte ASCII characters - let consumed = input[k..].len() - rest.len(); - for _ in 0..consumed { let _ = cs.next(); } - s.push(esc); - } - Incomplete(s) => return Incomplete(s), - Error(e) => return Error(e), - } - } - }, - _ => return Error(error_position!(ErrorKind::Custom(5 /* FIXME */), input)), - }, - _ => s.push(c), - } - } - Error(error_position!(ErrorKind::Custom(3 /* FIXME */), input)) -} - -named!(string_lit<&str, String>, delimited!(tag!("\""), string_lit_inner, tag!("\""))); - -named!(keyword<&str, Keyword>, alt!( - value!(Keyword::Let, ident_tag!("let")) | - value!(Keyword::In, ident_tag!("in")) | - value!(Keyword::If, ident_tag!("if")) | - value!(Keyword::Then, ident_tag!("then")) | - value!(Keyword::Else, ident_tag!("else")) -)); - -named!(type_const<&str, Const>, alt!( - value!(Const::Type, ident_tag!("Type")) | - value!(Const::Kind, ident_tag!("Kind")) -)); - -named!(list_like<&str, ListLike>, alt!( - value!(ListLike::List, ident_tag!("List")) | - value!(ListLike::Optional, ident_tag!("Optional")) -)); - -named!(builtin<&str, Builtin>, alt!( - value!(Builtin::Value(NaturalFold), ident_tag!("Natural/fold")) | - value!(Builtin::Value(NaturalBuild), ident_tag!("Natural/build")) | - value!(Builtin::Value(NaturalIsZero), ident_tag!("Natural/isZero")) | - value!(Builtin::Value(NaturalEven), ident_tag!("Natural/even")) | - value!(Builtin::Value(NaturalOdd), ident_tag!("Natural/odd")) | - value!(Builtin::Value(NaturalShow), ident_tag!("Natural/show")) | - value!(Builtin::Type(Natural), ident_tag!("Natural")) | - value!(Builtin::Type(Integer), ident_tag!("Integer")) | - value!(Builtin::Type(Double), ident_tag!("Double")) | - value!(Builtin::Type(Text), ident_tag!("Text")) | - value!(Builtin::Value(ListBuild), ident_tag!("List/build")) | - value!(Builtin::Value(ListFold), ident_tag!("List/fold")) | - value!(Builtin::Value(ListLength), ident_tag!("List/length")) | - value!(Builtin::Value(ListHead), ident_tag!("List/head")) | - value!(Builtin::Value(ListLast), ident_tag!("List/last")) | - value!(Builtin::Value(ListIndexed), ident_tag!("List/indexed")) | - value!(Builtin::Value(ListReverse), ident_tag!("List/reverse")) | - value!(Builtin::Value(OptionalFold), ident_tag!("Optional/fold")) | - value!(Builtin::Type(Bool), ident_tag!("Bool")) -)); - -named!(token<&str, Tok>, alt!( - value!(Tok::Pi, ident_tag!("forall")) | - value!(Tok::Pi, tag!("∀")) | - value!(Tok::Lambda, tag!("\\")) | - value!(Tok::Lambda, tag!("λ")) | - value!(Tok::Combine, tag!("/\\")) | - value!(Tok::Combine, tag!("∧")) | - value!(Tok::Arrow, tag!("->")) | - value!(Tok::Arrow, tag!("→")) | - - map!(type_const, Tok::Const) | - map!(boolean, Tok::Bool) | - map!(keyword, Tok::Keyword) | - map!(builtin, Tok::Builtin) | - map!(list_like, Tok::ListLike) | - map!(natural, Tok::Natural) | - map!(integer, Tok::Integer) | - map!(identifier, Tok::Identifier) | - map!(string_lit, Tok::Text) | - - value!(Tok::BraceL, tag!("{")) | - value!(Tok::BraceR, tag!("}")) | - value!(Tok::BracketL, tag!("[")) | - value!(Tok::BracketR, tag!("]")) | - value!(Tok::ParenL, tag!("(")) | - value!(Tok::ParenR, tag!(")")) | - value!(Tok::BoolAnd, tag!("&&")) | - value!(Tok::BoolOr, tag!("||")) | - value!(Tok::CompareEQ, tag!("==")) | - value!(Tok::CompareNE, tag!("!=")) | - value!(Tok::Append, tag!("++")) | - value!(Tok::Times, tag!("*")) | - value!(Tok::Plus, tag!("+")) | - value!(Tok::Comma, tag!(",")) | - value!(Tok::Dot, tag!(".")) | - value!(Tok::Ascription, tag!(":")) | - value!(Tok::Equals, tag!("=")) -)); - -fn find_end(input: &str, ending: &str) -> Option { - input.find(ending).map(|i| i + ending.len()) -} - -pub struct Lexer<'input> { - input: &'input str, - offset: usize, -} - -impl<'input> Lexer<'input> { - pub fn new(input: &'input str) -> Self { - Lexer { - input: input, - offset: 0, - } - } - - fn current_input(&mut self) -> &'input str { - &self.input[self.offset..] - } - - fn skip_whitespace(&mut self) -> bool { - let input = self.current_input(); - let trimmed = input.trim_start(); - let whitespace_len = input.len() - trimmed.len(); - let skipped = whitespace_len > 0; - if skipped { - // println!("skipped {} whitespace bytes in {}..{}", whitespace_len, self.offset, self.offset + whitespace_len); - self.offset += whitespace_len; - } - skipped - } - - fn skip_comments(&mut self) -> bool { - let input = self.current_input(); - if !input.is_char_boundary(0) || !input.is_char_boundary(2) { - return false; - } - let skip = match &input[0..2] { - "{-" => find_end(input, "-}"), - "--" => find_end(input, "\n"), // Also skips past \r\n (CRLF) - _ => None, - }.unwrap_or(0); - // println!("skipped {} bytes of comment", skip); - self.offset += skip; - skip != 0 - } - - fn skip_comments_and_whitespace(&mut self) { - while self.skip_whitespace() || self.skip_comments() {} - } -} - -impl<'input> Iterator for Lexer<'input> { - type Item = Spanned, usize, LexicalError>; - - fn next(&mut self) -> Option { - use nom::IResult::*; - self.skip_comments_and_whitespace(); - let input = self.current_input(); - if input.is_empty() { - return None; - } - match token(input) { - Done(rest, t) => { - let parsed_len = input.len() - rest.len(); - //println!("parsed {} bytes => {:?}", parsed_len, t); - let start = self.offset; - self.offset += parsed_len; - Some(Ok((start, t, self.offset))) - } - Error(e) => { - let offset = self.offset; - self.offset = self.input.len(); - Some(Err(LexicalError::Error(offset, e))) - } - Incomplete(needed) => { - Some(Err(LexicalError::Incomplete(needed))) - } - } - } -} - -#[test] -fn test_lex() { - use self::Tok::*; - let s = "λ(b : Bool) → b == False"; - let expected = [Lambda, - ParenL, - Identifier("b"), - Ascription, - Builtin(self::Builtin::Type(crate::core::BuiltinType::Bool)), - ParenR, - Arrow, - Identifier("b"), - CompareEQ, - Bool(false)]; - let lexer = Lexer::new(s); - let tokens = lexer.map(|r| r.unwrap().1).collect::>(); - assert_eq!(&tokens, &expected); - - assert_eq!(string_lit(r#""a\&b""#).to_result(), Ok("ab".to_owned())); - assert_eq!(string_lit(r#""a\ \b""#).to_result(), Ok("ab".to_owned())); - assert!(string_lit(r#""a\ b""#).is_err()); - assert_eq!(string_lit(r#""a\nb""#).to_result(), Ok("a\nb".to_owned())); - assert_eq!(string_lit(r#""\o141\x62\99""#).to_result(), Ok("abc".to_owned())); -} -- cgit v1.2.3