// this is mostly based on the s-exp tutorial
// https://github.com/rust-analyzer/rowan/blob/master/examples/s_expressions.rs

use rowan::{GreenNode, GreenNodeBuilder};

#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[allow(non_camel_case_types)]
#[repr(u16)]
pub enum SyntaxKind {
    L_BRACKET = 0, // '['
    R_BRACKET,     // ']'
    WORD,          // 'Attrset', 'meta', '.', '>', ...
    WHITESPACE,    // whitespaces is explicit
    ERROR,         // as well as errors

    // composite nodes
    LIST, // `[..]`
    ATOM, // wraps WORD
    ROOT, // top-level (a complete query)
}
use SyntaxKind::*;

impl From<SyntaxKind> for rowan::SyntaxKind {
    fn from(kind: SyntaxKind) -> Self {
        Self(kind as u16)
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum QueryLang {}
impl rowan::Language for QueryLang {
    type Kind = SyntaxKind;
    fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
        assert!(raw.0 <= ROOT as u16);
        unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
    }
    fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
        kind.into()
    }
}

fn lex(text: &str) -> Vec<(SyntaxKind, String)> {
    fn tok(t: SyntaxKind) -> m_lexer::TokenKind {
        m_lexer::TokenKind(rowan::SyntaxKind::from(t).0)
    }
    fn kind(t: m_lexer::TokenKind) -> SyntaxKind {
        match t.0 {
            0 => L_BRACKET,
            1 => R_BRACKET,
            2 => WORD,
            3 => WHITESPACE,
            4 => ERROR,
            _ => unreachable!(),
        }
    }

    let lexer = m_lexer::LexerBuilder::new()
        .error_token(tok(ERROR))
        .tokens(&[
            (tok(L_BRACKET), r"\["),
            (tok(R_BRACKET), r"\]"),
            (tok(WORD), r"[^\s\[\]]+"),
            (tok(WHITESPACE), r"\s+"),
        ])
        .build();

    lexer
        .tokenize(text)
        .into_iter()
        .map(|t| (t.len, kind(t.kind)))
        .scan(0usize, |start_offset, (len, kind)| {
            let s: String = text[*start_offset..*start_offset + len].into();
            *start_offset += len;
            Some((kind, s))
        })
        .collect()
}


#[derive(Clone)]
pub struct Parse {
    pub green_node: GreenNode,
    pub errors: Vec<String>,
}

pub fn parse(text: &str) -> Parse {
    struct Parser {
        /// input tokens, including whitespace,
        /// in *reverse* order.
        tokens: Vec<(SyntaxKind, String)>,
        /// the in-progress tree.
        builder: GreenNodeBuilder<'static>,
        /// the list of syntax errors we've accumulated
        /// so far.
        errors: Vec<String>,
    }

    #[derive(Debug)]
    enum QexpRes {
        Ok,
        Eof,
        RBracket,
        LBracket
    }

    impl Parser {
        fn parse(mut self) -> Parse {
            // Make sure that the root node covers all source
            self.builder.start_node(ROOT.into());
            // Parse zero or more S-expressions
            loop {
                match self.word() {
                    QexpRes::Eof => break,
                    QexpRes::Ok => (),
                    unmatched_bracket => {
                        self.builder.start_node(ERROR.into());
                        self.errors.push(format!("lone `{:?}`", unmatched_bracket));
                        self.bump(); // be sure to chug along in case of error
                        self.builder.finish_node();
                    }
                }
            }
            // eat remaining whitespace
            self.skip_ws();
            self.builder.finish_node();

            Parse { green_node: self.builder.finish(), errors: self.errors }
        }
        fn list(&mut self) {
            assert_eq!(self.current(), Some(L_BRACKET));
            // Start the list node
            self.builder.start_node(LIST.into());
            self.bump(); // '['
            loop {
                match self.word() {
                    QexpRes::Eof => {
                        self.errors.push("expected `]`".to_string());
                        break;
                    }
                    QexpRes::RBracket => {
                        self.bump();
                        break;
                    }
                    QexpRes::LBracket => {
                        self.builder.start_node(ERROR.into());
                        self.errors.push("unexpected list".to_string());
                        self.bump();
                        self.builder.finish_node();
                    }
                    QexpRes::Ok => (),
                }
            }
            // close the list node
            self.builder.finish_node();
        }
        fn word(&mut self) -> QexpRes {
            // Eat leading whitespace
            self.skip_ws();
            // Either a list, an atom, a closing paren,
            // or an eof.
            let t = match self.current() {
                None => return QexpRes::Eof,
                Some(R_BRACKET) => return QexpRes::RBracket,
                Some(L_BRACKET) => return QexpRes::LBracket,
                Some(t) => t,
            };
            match t {
                WORD => {
                    self.builder.start_node(ATOM.into());
                    self.bump();
                    self.skip_ws();
                    if Some(L_BRACKET) == self.current() {
                        self.list();
                    }
                    self.builder.finish_node();
                }
                ERROR => self.bump(),
                _ => unreachable!(),
            }
            QexpRes::Ok
        }
        /// Advance one token, adding it to the current branch of the tree builder.
        fn bump(&mut self) {
            let (kind, text) = self.tokens.pop().unwrap();
            self.builder.token(kind.into(), text.as_str());
        }
        /// Peek at the first unprocessed token
        fn current(&self) -> Option<SyntaxKind> {
            self.tokens.last().map(|(kind, _)| *kind)
        }
        fn skip_ws(&mut self) {
            while self.current() == Some(WHITESPACE) {
                self.bump()
            }
        }
    }

    let mut tokens = lex(text);
    tokens.reverse();
    Parser { tokens, builder: GreenNodeBuilder::new(), errors: Vec::new() }.parse()
}

/// To work with the parse results we need a view into the
/// green tree - the Syntax tree.
/// It is also immutable, like a GreenNode,
/// but it contains parent pointers, offsets, and
/// has identity semantics.

pub type SyntaxNode = rowan::SyntaxNode<QueryLang>;
#[allow(unused)]
type SyntaxToken = rowan::SyntaxToken<QueryLang>;
#[allow(unused)]
type SyntaxElement = rowan::NodeOrToken<SyntaxNode, SyntaxToken>;

impl Parse {
    pub fn syntax(&self) -> SyntaxNode {
        SyntaxNode::new_root(self.green_node.clone())
    }
}

/// Let's check that the parser works as expected
#[test]
fn test_parser() {
    let text = "Inherit > mdDoc[something]";
    let node = parse(text).syntax();
    assert_eq!(
        format!("{:?}", node),
        "ROOT@0..26"
    );
    assert_eq!(node.children().count(), 3);
    let children = node
        .descendants_with_tokens()
        .map(|child| format!("{:?}@{:?}", child.kind(), child.text_range()))
        .collect::<Vec<_>>();

    assert_eq!(
        children,
        vec![
            "ROOT@0..26".to_string(),
             "ATOM@0..8".to_string(),
              "WORD@0..7".to_string(),
              "WHITESPACE@7..8".to_string(), // note, explicit whitespace!
             "ATOM@8..10".to_string(),
              "WORD@8..9".to_string(),
              "WHITESPACE@9..10".to_string(),
             "ATOM@10..26".to_string(),
              "WORD@10..15".to_string(),
              "LIST@15..26".to_string(),
               "L_BRACKET@15..16".to_string(),
                "ATOM@16..25".to_string(),
                 "WORD@16..25".to_string(),
               "R_BRACKET@25..26".to_string()
        ]
    );
}