summaryrefslogtreecommitdiff
path: root/src/lexer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lexer.rs')
-rw-r--r--src/lexer.rs240
1 files changed, 240 insertions, 0 deletions
diff --git a/src/lexer.rs b/src/lexer.rs
new file mode 100644
index 0000000..f141f81
--- /dev/null
+++ b/src/lexer.rs
@@ -0,0 +1,240 @@
+use nom;
+
+use std::str::FromStr;
+
+#[derive(Debug, PartialEq, Eq)]
+pub enum Keyword {
+ Natural,
+ NaturalFold,
+ NaturalBuild,
+ NaturalIsZero,
+ NaturalEven,
+ NaturalOdd,
+ Integer,
+ Double,
+ Text,
+ List,
+ ListBuild,
+ ListFold,
+ ListLength,
+ ListHead,
+ ListLast,
+ ListIndexed,
+ ListReverse,
+ Optional,
+ OptionalFold,
+ Bool,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub enum Tok {
+ Identifier(String),
+ Reserved(Keyword),
+ Bool(bool),
+ Integer(isize),
+ Natural(usize),
+
+ // Symbols
+ ParenL,
+ ParenR,
+ Arrow,
+ Lambda,
+ Pi,
+ Combine,
+ BoolAnd,
+ BoolOr,
+ CompareEQ,
+ CompareNE,
+ Append,
+ Times,
+ Plus,
+ Dot,
+ Ascription,
+ Equals,
+}
+
+#[derive(Debug)]
+pub enum LexicalError {
+ Error(nom::simple_errors::Err<u32>),
+ Incomplete(nom::Needed),
+}
+
+pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
+
+/*
+macro_rules! one_of_chars {
+ ($c:expr, [$($cs:pat),*]) => {
+ match $c {
+ $($cs => true),*,
+ _ => false,
+ }
+ }
+}
+
+fn is_symbol(c: char) -> bool {
+ one_of_chars!(c, [
+ '!',
+ '&',
+ '(',
+ ')',
+ '*',
+ '+',
+ '-',
+ '/',
+ ':',
+ '=',
+ '>',
+ '\\',
+ '|',
+ '∧',
+ 'λ'
+ ])
+}
+named!(symbol<&str, &str>, take_while1_s!(is_symbol));
+*/
+
+fn is_decimal(c: char) -> bool {
+ c.is_digit(10)
+}
+
+named!(identifier<&str, &str>, take_while1_s!(char::is_alphabetic)); // FIXME [A-Za-z_][A-Za-z0-9_/]*
+named!(natural<&str, &str>, preceded!(tag!("+"), take_while1_s!(is_decimal)));
+named!(integral<&str, isize>, map_res!(take_while1_s!(is_decimal), |s| isize::from_str(s)));
+named!(integer<&str, isize>, alt!(
+ preceded!(tag!("-"), map!(integral, |i: isize| -i)) |
+ integral
+));
+named!(boolean<&str, bool>, alt!(
+ value!(true, tag!("True")) |
+ value!(false, tag!("False"))
+));
+
+named!(keyword<&str, Keyword>, alt!(
+ value!(Keyword::Natural, tag!("Natural")) |
+ value!(Keyword::NaturalFold, tag!("Natural/fold")) |
+ value!(Keyword::NaturalBuild, tag!("Natural/build")) |
+ value!(Keyword::NaturalIsZero, tag!("Natural/isZero")) |
+ value!(Keyword::NaturalEven, tag!("Natural/even")) |
+ value!(Keyword::NaturalOdd, tag!("Natural/odd")) |
+ value!(Keyword::Integer, tag!("Integer")) |
+ value!(Keyword::Double, tag!("Double")) |
+ value!(Keyword::Text, tag!("Text")) |
+ value!(Keyword::List, tag!("List")) |
+ value!(Keyword::ListBuild, tag!("List/build")) |
+ value!(Keyword::ListFold, tag!("List/fold")) |
+ value!(Keyword::ListLength, tag!("List/length")) |
+ value!(Keyword::ListHead, tag!("List/head")) |
+ value!(Keyword::ListLast, tag!("List/last")) |
+ value!(Keyword::ListIndexed, tag!("List/indexed")) |
+ value!(Keyword::ListReverse, tag!("List/reverse")) |
+ value!(Keyword::Optional, tag!("Optional")) |
+ value!(Keyword::OptionalFold, tag!("Optional/fold")) |
+ value!(Keyword::Bool, tag!("Bool"))
+));
+
+named!(token<&str, Tok>, alt!(
+ value!(Tok::Pi, tag!("forall")) |
+ value!(Tok::Pi, tag!("∀")) |
+ value!(Tok::Lambda, tag!("\\")) |
+ value!(Tok::Lambda, tag!("λ")) |
+ value!(Tok::Combine, tag!("/\\")) |
+ value!(Tok::Combine, tag!("∧")) |
+ value!(Tok::Arrow, tag!("->")) |
+ value!(Tok::Arrow, tag!("→")) |
+
+ map!(boolean, Tok::Bool) |
+ map!(keyword, Tok::Reserved) |
+ map_opt!(natural, |s| usize::from_str(s).ok().map(|n| Tok::Natural(n))) |
+ map!(integer, Tok::Integer) |
+ map!(identifier, |s: &str| Tok::Identifier(s.to_owned())) |
+
+ value!(Tok::ParenL, tag!("(")) |
+ value!(Tok::ParenR, tag!(")")) |
+ value!(Tok::BoolAnd, tag!("&&")) |
+ value!(Tok::BoolOr, tag!("||")) |
+ value!(Tok::CompareEQ, tag!("==")) |
+ value!(Tok::CompareNE, tag!("!=")) |
+ value!(Tok::Append, tag!("++")) |
+ value!(Tok::Times, tag!("*")) |
+ value!(Tok::Plus, tag!("+")) |
+ value!(Tok::Dot, tag!(".")) |
+ value!(Tok::Ascription, tag!(":")) |
+ value!(Tok::Equals, tag!("="))
+));
+
+pub struct Lexer<'input> {
+ input: &'input str,
+ offset: usize,
+}
+
+impl<'input> Lexer<'input> {
+ pub fn new(input: &'input str) -> Self {
+ Lexer {
+ input: input,
+ offset: 0,
+ }
+ }
+
+ fn current_input(&mut self) -> &'input str {
+ &self.input[self.offset..]
+ }
+
+ fn skip_whitespace(&mut self) {
+ let input = self.current_input();
+ let trimmed = input.trim_left();
+ let whitespace_len = input.len() - trimmed.len();
+ if whitespace_len > 0 {
+ //println!("skipped {} whitespace bytes", whitespace_len);
+ self.offset += whitespace_len;
+ }
+ }
+}
+
+impl<'input> Iterator for Lexer<'input> {
+ type Item = Spanned<Tok, usize, LexicalError>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.offset >= self.input.len() {
+ return None;
+ }
+
+ use nom::IResult::*;
+ self.skip_whitespace();
+ let input = self.current_input();
+ match token(input) {
+ Done(rest, t) => {
+ let parsed_len = input.len() - rest.len();
+ //println!("parsed {} bytes => {:?}", parsed_len, t);
+ let start = self.offset;
+ self.offset += parsed_len;
+ Some(Ok((start, t, self.offset)))
+ }
+ Error(e) => {
+ self.offset = self.input.len();
+ Some(Err(LexicalError::Error(e)))
+ }
+ Incomplete(needed) => {
+ Some(Err(LexicalError::Incomplete(needed)))
+ }
+ }
+ }
+}
+
+#[test]
+fn test_lex() {
+ use self::Tok::*;
+ let s = "λ(b : Bool) → b == False";
+ let expected = [Lambda,
+ ParenL,
+ Identifier("b".to_owned()),
+ Ascription,
+ Reserved(Keyword::Bool),
+ ParenR,
+ Arrow,
+ Identifier("b".to_owned()),
+ CompareEQ,
+ Bool(false)];
+ let lexer = Lexer::new(s);
+ let tokens = lexer.map(|r| r.unwrap().1).collect::<Vec<_>>();
+ assert_eq!(&tokens, &expected);
+}