From 0567f916d4365c8dc0be99d194fe6d157befbc81 Mon Sep 17 00:00:00 2001 From: stuebinm Date: Wed, 3 Apr 2024 22:22:38 +0200 Subject: very basic query language --- Cargo.lock | 42 +++++ Cargo.toml | 12 +- src/batchmode.rs | 61 ++++++++ src/main.rs | 408 ++++++++++-------------------------------------- src/queries.rs | 411 +++++++++++++++++++++++++++++++++++++++++++++++++ src/status_reporter.rs | 93 +++++++++++ src/util.rs | 154 ++++++++++++++++++ 7 files changed, 853 insertions(+), 328 deletions(-) create mode 100644 src/batchmode.rs create mode 100644 src/queries.rs create mode 100644 src/status_reporter.rs create mode 100644 src/util.rs diff --git a/Cargo.lock b/Cargo.lock index 5dc3db8..bf58e1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + [[package]] name = "anyhow" version = "1.0.81" @@ -23,6 +29,31 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "clap" +version = "4.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" + [[package]] name = "countme" version = "3.0.1" @@ -68,6 +99,15 @@ version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +[[package]] +name = "m_lexer" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7e51ebf91162d585a5bae05e4779efc4a276171cb880d61dd6fab11c98467a7" +dependencies = [ + "regex", +] + [[package]] name = "memchr" version = "2.7.1" @@ -88,7 +128,9 @@ name = "nixq" version = "0.1.0" dependencies = [ "anyhow", + "clap", "itertools", + "m_lexer", "regex", "rnix", "rowan", diff --git a/Cargo.toml b/Cargo.toml index 7c3c8d6..4705207 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,10 +8,12 @@ authors = [ "stuebinm " ] [dependencies] rnix = "0.11" -anyhow = "1.0.57" +anyhow = "1.0" regex = "1.6" -threadpool = "1.8.1" -rowan = "0.15.15" -serde_json = "1.0.114" -serde = { version = "1.0.197", features = ["derive"] } +threadpool = "1.8" +rowan = "0.15" +serde_json = "1.0" +serde = { version = "1.0", features = ["derive"] } itertools = "0.12.1" +m_lexer = "0.0.4" +clap = { version = "4.5", features = [ "cargo", "help", "std" ], default-features = false } diff --git a/src/batchmode.rs b/src/batchmode.rs new file mode 100644 index 0000000..8398bce --- /dev/null +++ b/src/batchmode.rs @@ -0,0 +1,61 @@ +use std::{path::PathBuf, fs, sync::{Arc, Mutex}}; +use threadpool::ThreadPool; + +use crate::status_reporter::*; + +// TODO: make this usable +// (this module just here to keep old code around for a bit) +pub enum Task {} + +#[allow(unreachable_code, unused)] +pub fn batchmode(tasks: Vec<(PathBuf, Task)>) { + + let pool = ThreadPool::new(16); + + let results = Arc::new(Mutex::new(vec![])); + let printer = Arc::new(StatusReport::new(tasks.len(), tasks.len())); + + for (path, task) in tasks { + pool.execute({ + let results = Arc::clone(&results); + let printer = Arc::clone(&printer); + + move || { + printer.enter_file(&format!("{path:?}")); + + let result: anyhow::Result<(PathBuf, String)> = todo!(); + + results.lock().unwrap().push(result); + } + }); + } + + pool.join(); + + println!("\n\nSummary:"); + let mut c_errors = 0; + let mut c_total = 0; + for r in results.lock().unwrap().iter() { + match r { + Err(e) => { + println!(" {}", e); + c_errors += 1; + }, + _ => () + } + c_total += 1; + } + + println!("\n ({c_total} sites total, {c_errors} errors, generated {} edits)", c_total - c_errors); + + let edits: Vec<_> = Arc::into_inner(results).unwrap().into_inner().unwrap() + .into_iter() + .filter_map(|r| r.ok()) + .collect(); + + println!("applying changes ..."); + for (filename, content) in edits { + fs::write(&filename, content.as_bytes()).unwrap(); + // println!("{}", content); + } +} diff --git a/src/main.rs b/src/main.rs index 5e86c89..a09f4ef 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,356 +1,118 @@ -use std::{path::{Path, PathBuf}, fs, str::FromStr, sync::{Arc, Mutex}}; -use anyhow::{Result, Context, anyhow}; -use itertools::Itertools; -use rnix::{SyntaxKind, ast::{AttrpathValue, AttrSet, HasEntry, Entry::*}, SyntaxNode}; +use std::{path::PathBuf, fs, process::exit}; use rowan::{ast::AstNode, TextSize}; -use serde::Deserialize; -use threadpool::ThreadPool; +use clap::{arg, command, value_parser}; -struct StatusReportData { - files: usize, - items: usize, - total_files: usize, - total_items: usize, - changed_items: usize, - last_file: String, - last_item: String, -} - -impl StatusReportData { - fn print(&self, clear: bool) { - if clear { - print!("\x1b[1F\x1b[2K\x1b[1F\x1b[2K"); - } - println!("{}/{} files ({})", self.files, self.total_files, self.last_file); - println!("{}/{} ({}) items ({})", self.items, self.total_items, - self.changed_items, self.last_item); - } -} - -struct StatusReport(Mutex); - -impl StatusReport { - fn new(total_files: usize, total_items: usize) -> Self { - Self(Mutex::new(StatusReportData { - files: 0, - items: 0, - total_files, - total_items, - changed_items: 0, - last_file: "".to_string(), - last_item: "".to_string(), - })) - } - - fn enter_file(&self, f: &str) { - let mut m = self.0.lock().unwrap(); - m.files += 1; - m.last_file = f.to_string(); - m.print(m.files > 1 || m.items >= 1); - } - - fn enter_item(&self, i: String) { - let mut m = self.0.lock().unwrap(); - m.items += 1; - m.last_item = i; - m.print(m.files >= 1 || m.items > 1); - } - - fn update_item(&self, i: String) { - let mut m = self.0.lock().unwrap(); - m.last_item = i; - m.print(true); - } - - fn changed_item(&self) { - let mut m = self.0.lock().unwrap(); - m.changed_items += 1; - m.print(true); - } - - fn skip_items(&self, i: usize) { - let mut m = self.0.lock().unwrap(); - m.items += i; - m.print(m.files >= 1 || m.items >= 1); - } -} - -struct StatusPart<'a>(&'a StatusReport, usize); - -impl<'a> StatusPart<'a> { - fn enter_item(&mut self, i: String) { - self.0.enter_item(i); - self.1 -= 1; - } - - fn update_item(&mut self, i: String) { - self.0.update_item(i); - } - - fn changed_item(&mut self) { - self.0.changed_item(); - } -} - -impl<'a> Drop for StatusPart<'a> { - fn drop(&mut self) { - self.0.skip_items(self.1); - } -} - - - -fn textsize_at_line(s: &str, line: usize) -> TextSize { - s - .split('\n') - .map(|l| - TextSize::of(l) + TextSize::new(1) - ) - .take(line-1) - .sum() -} - - -fn dig_to_kind(kind: SyntaxKind, node: &SyntaxNode) -> Option { - if node.kind() == kind { - return Some(node.clone()); - } - - node.descendants() - .filter(|node| node.kind() == kind) - .next() -} - -fn add_to_meta_block(rough_pos: SyntaxNode, content: &str, main_program: &str, file: &Path, line: usize) -> Result<(String, usize)> { - let meta_node = dig_to_kind(SyntaxKind::NODE_ATTR_SET, &rough_pos).unwrap(); - let meta_set = AttrSet::cast(meta_node.clone()).unwrap(); +#[allow(dead_code)] +mod queries; +#[allow(dead_code)] +mod status_reporter; +mod batchmode; +#[allow(dead_code)] +mod util; - let description_entry = meta_set.entries() - .filter(|entry| { - match &entry { - Inherit(it) => it.attrs().any(|c| c.to_string() == "description"), - AttrpathValue(it) => it.attrpath().unwrap().to_string() == "description", - } - }) - .exactly_one().ok() - .with_context(|| format!("meta node has no description attribute in {:?} at {}", file, line))?; - let description = description_entry.syntax(); - let pos = description.text_range(); - let indent = content[..pos.start().into()].chars().rev().position(|c| c == '\n').unwrap(); +fn parse_nexp(path: &PathBuf) -> anyhow::Result<(String, rnix::Root)> { + let content = fs::read_to_string(path)?; - let patch = String::new() - + "\n" - + &" ".repeat(indent) - + "mainProgram = \"" + main_program + "\";"; - - Ok((patch, pos.end().into())) -} - -fn edit_one(file: &Path, line: usize, main_program: &str, p: &StatusReport) -> Result { - let mut content = fs::read_to_string(file)?; - let searchpos = textsize_at_line(&content, line); - - p.update_item(format!("doing {:?}", file)); let parse = rnix::Root::parse(&content); if !parse.errors().is_empty() { anyhow::bail!("error: {:?}", parse.errors()); } - let tree = parse.tree(); - - let pos_node = tree.syntax().descendants() - .filter(|node| { - if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE { - let value = AttrpathValue::cast(node.clone()).unwrap(); - node.text_range().contains(searchpos) && value.attrpath().unwrap().to_string() == "meta" - } else { false } - }) - .exactly_one().ok(); - - // do we have a meta attrset already? - let (patch, insert_offset) = match pos_node { - None => { - let version_node = tree - .syntax() - .descendants() - .filter(|node| { - if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE { - let value = AttrpathValue::cast(node.clone()).unwrap(); - let name = value.attrpath().unwrap().to_string(); - node.text_range().contains(searchpos + TextSize::new(5)) - && (name == "version" || name == "pname" || name == "name") - } else { false } - }) - .exactly_one().ok() - .with_context(|| format!("neither meta nor version node found for {:?} at {}", file, line))?; + Ok((content, parse.tree())) +} - let attrset = version_node.parent().unwrap(); - if attrset.kind() != SyntaxKind::NODE_ATTR_SET { - anyhow::bail!("name not in an attrset in {:?} at {}", file, line) - } +fn main() { - // does a meta block already exist? - let maybe_meta_block = attrset - .descendants() - .filter(|node| { - if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE { - let value = AttrpathValue::cast(node.clone()).unwrap(); - let name = value.attrpath().unwrap().to_string(); - name == "meta" - } else { false } - }) - .exactly_one(); + let matches = command!() + .arg(arg!(--batchmode "run in batch mode") + .required(false) + ) + .arg(arg!([query] "query to run") + .required(true)) + .arg(arg!([file] "file to operate on") + .required(true) + .value_parser(value_parser!(PathBuf)) + ) + .arg(arg!(--edit "what to do") + .required(false)) + .get_matches(); + + let query_string = matches.get_one::("query").unwrap(); + let files = matches.get_one::("file").unwrap(); + + let parse = queries::parse(query_string); + if parse.errors.len() != 0 { + eprintln!( + "syntax {}: \n {}", + if parse.errors.len() == 1 { "error" } else { "errors" }, + parse.errors.join(" \n") + ); + exit(1); + } + + let (content, nexp) = match parse_nexp(files) { + Err(e) => { + eprintln!("could not parse file: {e}"); + exit(2); + }, + Ok(exp) => exp + }; - if let Ok(meta) = maybe_meta_block { - add_to_meta_block(meta.clone(), &content, main_program, file, line)? - } else { - let before_attrset_end = Into::::into(attrset.text_range().end()) - - 1 - - content[..attrset.text_range().end().into()] - .chars().rev().position(|c| c == '\n').unwrap(); + // println!("{nexp:#?}"); - let indent = content[..version_node.text_range().start().into()] - .chars().rev().position(|c| c == '\n').unwrap(); + let results = parse.apply(&content, nexp.syntax().clone()).unwrap(); - // some language specific build systems don't use meta as its own attrset - // there's no good way to recognise these, but this seems to work fine - let weird_nonstandard_meta = attrset - .descendants() - .any(|node| { - if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE { - let value = AttrpathValue::cast(node.clone()).unwrap(); - let name = value.attrpath().unwrap().to_string(); - name == "description" || name == "homepage" || name == "license" - } else { false } - }); - let patch = String::new() - + "\n" - + &" ".repeat(indent) - + if weird_nonstandard_meta { "mainProgram = \"" } else { "meta.mainProgram = \"" } - + main_program + "\";"; - (patch, before_attrset_end) + if let Some(op) = matches.get_one::("edit") { + match &op[..] { + "remove" => { + let new = remove_nodes(content, &results); + println!("{new}"); } - }, - Some(pos) => { - add_to_meta_block(pos.clone(), &content, main_program, file, line)? + _ => () } - }; - - - content = String::new() - + &content[..insert_offset] - + &patch - + &content[insert_offset..]; - - p.changed_item(); - Ok(content) -} + } else { + for result in &results { + println!("{result}"); + } + } -#[derive(Deserialize, Clone)] -struct TrivialProgram { - pos: Option, - name: String } -fn main() { - - let raw_inputs = fs::read_to_string("trivials.json").unwrap(); - let inputs: Vec = serde_json::de::from_str::>(&raw_inputs).unwrap(); - // .into_iter().take(200).collect(); +fn remove_nodes(content: String, results: &Vec) -> String { + assert!(results.len() == 1); - // TODO: group edits in the same file - let pool = ThreadPool::new(16); + let span = &results[0]; - let mut tasks: Vec<(TrivialProgram, PathBuf, usize)> = inputs.into_iter() - .filter_map(|i| { - if i.pos.is_none() { - println!("no position for name {}", i.name); - None + let (before, after) = match (span.prev_sibling_or_token(), span.next_sibling_or_token()) { + (Some(prev), Some(next)) + if prev.kind() == rnix::SyntaxKind::TOKEN_WHITESPACE + && next.kind() == rnix::SyntaxKind::TOKEN_WHITESPACE + => { + if prev.to_string().lines().count() < next.to_string().lines().count() { + (prev.text_range().len(), TextSize::new(0)) } else { - let pos = i.pos.as_ref().unwrap(); - let (filename, line) = { - let l = pos.split(':').collect::>(); - assert!(l.len() == 2); - - (PathBuf::from_str(l[0]).unwrap(), l[1].parse().unwrap()) - }; - Some((i, filename, line)) - } - }) - .collect(); - - tasks.sort_by_key(|(_ ,filename, _)| filename.clone()); - - let grouped_tasks: Vec<(TrivialProgram, PathBuf, Vec<(usize, String)>)> = - tasks.into_iter() - .map(|(i, f, l)| (i.clone(), f, vec![(l, i.name)])) - .coalesce(|(i1, f1, l1), (i2, f2, l2)| { - if f1 == f2 { - if l1 == l2 && i1.name == i2.name { - Ok((i1, f1, l1)) - } else { - Ok((i1, f1, l1.into_iter().chain(l2.into_iter()).collect())) - } - } else { - Err(((i1,f1,l1),(i2,f2,l2))) - } - }).collect(); - - let results = Arc::new(Mutex::new(vec![])); - let printer = Arc::new(StatusReport::new(grouped_tasks.len(), grouped_tasks.len())); - - for (i, filename, sites) in grouped_tasks { - pool.execute({ - let results = Arc::clone(&results); - let printer = Arc::clone(&printer); - - move || { - let pos = i.pos.unwrap(); - printer.enter_file(&pos); - if sites.len() == 1 { - let result = edit_one(&filename, sites[0].0, &sites[0].1, &printer) - .map(|ok| (filename, ok)); - results.lock().unwrap().push(result); - } else { - results.lock().unwrap().push(Err(anyhow!("skipped {:?} as it has multiple edits", filename))); - } + (TextSize::new(0), next.text_range().len()) } - }); - } - - pool.join(); - - println!("\n\nSummary:"); - let mut c_errors = 0; - let mut c_total = 0; - for r in results.lock().unwrap().iter() { - match r { - Err(e) => { - println!(" {}", e); - c_errors += 1; - }, - _ => () } - c_total += 1; - } + _ => (TextSize::default(),TextSize::default()) + }; + + String::new() + + &content[..Into::::into(span.text_range().start() - before) - previous_indentation(span).unwrap_or(0)] + + &content[(span.text_range().end() + after).into()..] +} - println!("\n ({c_total} sites total, {c_errors} errors, generated {} edits)", c_total - c_errors); - let edits: Vec<_> = Arc::into_inner(results).unwrap().into_inner().unwrap() - .into_iter() - .filter_map(|r| r.ok()) - .collect(); - // check we didn't miss any duplicate edits - let duplicates = edits.iter().duplicates_by(|(filename, _)| filename).count(); - println!("{duplicates} edits were not the only one in their file"); +fn previous_indentation(node: &rnix::SyntaxNode) -> Option { + let whitespace_token = node.prev_sibling_or_token()?; - println!("applying changes ..."); - for (filename, content) in edits { - fs::write(&filename, content.as_bytes()).unwrap(); - // println!("{}", content); + if whitespace_token.kind() == rnix::SyntaxKind::TOKEN_WHITESPACE { + Some(whitespace_token.to_string().lines().last().unwrap().len()) + } else { + None } } diff --git a/src/queries.rs b/src/queries.rs new file mode 100644 index 0000000..b07224a --- /dev/null +++ b/src/queries.rs @@ -0,0 +1,411 @@ +// this is mostly based on the s-exp tutorial +// https://github.com/rust-analyzer/rowan/blob/master/examples/s_expressions.rs + +use rnix::{match_ast, ast}; +use rowan::{GreenNode, GreenNodeBuilder, ast::AstNode}; + + +fn lex(text: &str) -> Vec<(SyntaxKind, String)> { + fn tok(t: SyntaxKind) -> m_lexer::TokenKind { + m_lexer::TokenKind(rowan::SyntaxKind::from(t).0) + } + fn kind(t: m_lexer::TokenKind) -> SyntaxKind { + match t.0 { + 0 => L_BRACKET, + 1 => R_BRACKET, + 2 => WORD, + 3 => WHITESPACE, + 4 => ERROR, + _ => unreachable!(), + } + } + + let lexer = m_lexer::LexerBuilder::new() + .error_token(tok(ERROR)) + .tokens(&[ + (tok(L_BRACKET), r"\["), + (tok(R_BRACKET), r"\]"), + (tok(WORD), r"[^\s\[\]]+"), + (tok(WHITESPACE), r"\s+"), + ]) + .build(); + + lexer + .tokenize(text) + .into_iter() + .map(|t| (t.len, kind(t.kind))) + .scan(0usize, |start_offset, (len, kind)| { + let s: String = text[*start_offset..*start_offset + len].into(); + *start_offset += len; + Some((kind, s)) + }) + .collect() +} + + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[allow(non_camel_case_types)] +#[repr(u16)] +enum SyntaxKind { + L_BRACKET = 0, // '[' + R_BRACKET, // ']' + WORD, // 'Attrset', 'meta', '.', '>', ... + WHITESPACE, // whitespaces is explicit + ERROR, // as well as errors + + // composite nodes + LIST, // `[..]` + ATOM, // wraps WORD + ROOT, // top-level (a complete query) +} +use SyntaxKind::*; + +impl From for rowan::SyntaxKind { + fn from(kind: SyntaxKind) -> Self { + Self(kind as u16) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum Lang {} +impl rowan::Language for Lang { + type Kind = SyntaxKind; + fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind { + assert!(raw.0 <= ROOT as u16); + unsafe { std::mem::transmute::(raw.0) } + } + fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind { + kind.into() + } +} + +pub struct Parse { + pub green_node: GreenNode, + pub errors: Vec, +} + +pub fn parse(text: &str) -> Parse { + struct Parser { + /// input tokens, including whitespace, + /// in *reverse* order. + tokens: Vec<(SyntaxKind, String)>, + /// the in-progress tree. + builder: GreenNodeBuilder<'static>, + /// the list of syntax errors we've accumulated + /// so far. + errors: Vec, + } + + #[derive(Debug)] + enum QexpRes { + Ok, + Eof, + RBracket, + LBracket + } + + impl Parser { + fn parse(mut self) -> Parse { + // Make sure that the root node covers all source + self.builder.start_node(ROOT.into()); + // Parse zero or more S-expressions + loop { + match self.word() { + QexpRes::Eof => break, + QexpRes::Ok => (), + unmatched_bracket => { + self.builder.start_node(ERROR.into()); + self.errors.push(format!("lone `{:?}`", unmatched_bracket)); + self.bump(); // be sure to chug along in case of error + self.builder.finish_node(); + } + } + } + // eat remaining whitespace + self.skip_ws(); + self.builder.finish_node(); + + Parse { green_node: self.builder.finish(), errors: self.errors } + } + fn list(&mut self) { + assert_eq!(self.current(), Some(L_BRACKET)); + // Start the list node + self.builder.start_node(LIST.into()); + self.bump(); // '[' + loop { + match self.word() { + QexpRes::Eof => { + self.errors.push("expected `]`".to_string()); + break; + } + QexpRes::RBracket => { + self.bump(); + break; + } + QexpRes::LBracket => { + self.builder.start_node(ERROR.into()); + self.errors.push("unexpected list".to_string()); + self.bump(); + self.builder.finish_node(); + } + QexpRes::Ok => (), + } + } + // close the list node + self.builder.finish_node(); + } + fn word(&mut self) -> QexpRes { + // Eat leading whitespace + self.skip_ws(); + // Either a list, an atom, a closing paren, + // or an eof. + let t = match self.current() { + None => return QexpRes::Eof, + Some(R_BRACKET) => return QexpRes::RBracket, + Some(L_BRACKET) => return QexpRes::LBracket, + Some(t) => t, + }; + match t { + WORD => { + self.builder.start_node(ATOM.into()); + self.bump(); + self.skip_ws(); + if Some(L_BRACKET) == self.current() { + self.list(); + } + self.builder.finish_node(); + } + ERROR => self.bump(), + _ => unreachable!(), + } + QexpRes::Ok + } + /// Advance one token, adding it to the current branch of the tree builder. + fn bump(&mut self) { + let (kind, text) = self.tokens.pop().unwrap(); + self.builder.token(kind.into(), text.as_str()); + } + /// Peek at the first unprocessed token + fn current(&self) -> Option { + self.tokens.last().map(|(kind, _)| *kind) + } + fn skip_ws(&mut self) { + while self.current() == Some(WHITESPACE) { + self.bump() + } + } + } + + let mut tokens = lex(text); + tokens.reverse(); + Parser { tokens, builder: GreenNodeBuilder::new(), errors: Vec::new() }.parse() +} + +/// To work with the parse results we need a view into the +/// green tree - the Syntax tree. +/// It is also immutable, like a GreenNode, +/// but it contains parent pointers, offsets, and +/// has identity semantics. + +type SyntaxNode = rowan::SyntaxNode; +#[allow(unused)] +type SyntaxToken = rowan::SyntaxToken; +#[allow(unused)] +type SyntaxElement = rowan::NodeOrToken; + +impl Parse { + fn syntax(&self) -> SyntaxNode { + SyntaxNode::new_root(self.green_node.clone()) + } +} + +/// Let's check that the parser works as expected +#[test] +fn test_parser() { + let text = "Inherit > mdDoc[something]"; + let node = parse(text).syntax(); + assert_eq!( + format!("{:?}", node), + "ROOT@0..26" + ); + assert_eq!(node.children().count(), 3); + let children = node + .descendants_with_tokens() + .map(|child| format!("{:?}@{:?}", child.kind(), child.text_range())) + .collect::>(); + + assert_eq!( + children, + vec![ + "ROOT@0..26".to_string(), + "ATOM@0..8".to_string(), + "WORD@0..7".to_string(), + "WHITESPACE@7..8".to_string(), // note, explicit whitespace! + "ATOM@8..10".to_string(), + "WORD@8..9".to_string(), + "WHITESPACE@9..10".to_string(), + "ATOM@10..26".to_string(), + "WORD@10..15".to_string(), + "LIST@15..26".to_string(), + "L_BRACKET@15..16".to_string(), + "ATOM@16..25".to_string(), + "WORD@16..25".to_string(), + "R_BRACKET@25..26".to_string() + ] + ); +} + + + +type NixExprs = Box>; + +macro_rules! ast_node { + ($ast:ident, $kind:ident) => { + #[derive(PartialEq, Eq, Hash)] + #[repr(transparent)] + struct $ast(SyntaxNode); + impl $ast { + #[allow(unused)] + fn cast(node: SyntaxNode) -> Option { + if node.kind() == $kind { + Some(Self(node)) + } else { + None + } + } + } + }; +} + +ast_node!(Root, ROOT); +ast_node!(Atom, ATOM); +ast_node!(List, LIST); + +// Sexp is slightly different, so let's do it by hand. +#[derive(PartialEq, Eq, Hash, Debug)] +#[repr(transparent)] +struct Qexp(SyntaxNode); + +enum QexpKind { + Atom(Atom), + List(List), +} + +impl Qexp { + fn cast(node: SyntaxNode) -> Option { + if Atom::cast(node.clone()).is_some() || List::cast(node.clone()).is_some() { + Some(Qexp(node)) + } else { + None + } + } + + fn kind(&self) -> QexpKind { + Atom::cast(self.0.clone()) + .map(QexpKind::Atom) + .or_else(|| List::cast(self.0.clone()).map(QexpKind::List)) + .unwrap() + } + + fn apply(&self, _acc: NixExprs) -> NixExprs { + todo!() + } +} + +// Let's enhance AST nodes with ancillary functions and +// eval. +impl Root { + fn qexps(&self) -> impl Iterator + '_ { + self.0.children().filter_map(Qexp::cast) + } +} + +enum Op { + Down, + DownRecursive, + Up, + UpRecursive, + Named(String) +} + +impl Atom { + fn eval(&self) -> Option { + self.text().parse().ok() + } + fn as_op(&self) -> Option { + let op = match self.text().as_str() { + ">" => Op::Down, + ">>" => Op::DownRecursive, + "<" => Op::Up, + "<<" => Op::UpRecursive, + name => Op::Named(name.to_owned()), + }; + Some(op) + } + fn text(&self) -> String { + match self.0.green().children().next() { + Some(rowan::NodeOrToken::Token(token)) => token.text().to_string(), + _ => unreachable!(), + } + } + fn apply(&self, acc: NixExprs) -> NixExprs { + match self.as_op() { + Some(Op::Down) => Box::new(acc.map(|s| s.children()).flatten()), + Some(Op::DownRecursive) => Box::new(acc.map(|s| s.descendants()).flatten()), + Some(Op::Up) => Box::new(acc.filter_map(|s| s.parent())), + Some(Op::UpRecursive) => Box::new(acc.map(|s| s.ancestors()).flatten()), + Some(Op::Named(name)) => + Box::new(acc + .filter(move |node| match_ast! { match node { + ast::AttrpathValue(value) => { + name == value.attrpath().unwrap().to_string() + }, + ast::Apply(value) => { + // TODO: special case lambda = NODE_SELECT here? + name == value.lambda().unwrap().to_string() + }, + // TODO: this is difficult — I want to use free-form names + // to select things below, too, but that might not always be + // possible + ast::Ident(value) => { + name == value.to_string() + }, + _ => false + }})), + _ => todo!() + } + } +} + +impl List { + fn sexps(&self) -> impl Iterator + '_ { + self.0.children().filter_map(Qexp::cast) + } +} + + +impl Parse { + fn root(&self) -> Root { + Root::cast(self.syntax()).unwrap() + } + + pub fn apply(&self, _content: &str, nexp: rnix::SyntaxNode) -> anyhow::Result> { + + let mut acc: NixExprs = Box::new(std::iter::once(nexp)); + + for qexp in self.root().qexps() { + match qexp.kind() { + QexpKind::Atom(filter) => { + acc = filter.apply(acc); + } + _ => panic!("???") + } + } + + // let results = + // acc.map(|node| content[node.text_range().start().into()..node.text_range().end().into()].to_owned()) + // .collect(); + + Ok(acc.collect()) + } +} diff --git a/src/status_reporter.rs b/src/status_reporter.rs new file mode 100644 index 0000000..c8faee4 --- /dev/null +++ b/src/status_reporter.rs @@ -0,0 +1,93 @@ +use std::sync::Mutex; + +struct StatusReportData { + files: usize, + items: usize, + total_files: usize, + total_items: usize, + changed_items: usize, + last_file: String, + last_item: String, +} + +impl StatusReportData { + fn print(&self, clear: bool) { + if clear { + print!("\x1b[1F\x1b[2K\x1b[1F\x1b[2K"); + } + println!("{}/{} files ({})", self.files, self.total_files, self.last_file); + println!("{}/{} ({}) items ({})", self.items, self.total_items, + self.changed_items, self.last_item); + } +} + +pub struct StatusReport(Mutex); + +impl StatusReport { + pub fn new(total_files: usize, total_items: usize) -> Self { + Self(Mutex::new(StatusReportData { + files: 0, + items: 0, + total_files, + total_items, + changed_items: 0, + last_file: "".to_string(), + last_item: "".to_string(), + })) + } + + pub fn enter_file(&self, f: &str) { + let mut m = self.0.lock().unwrap(); + m.files += 1; + m.last_file = f.to_string(); + m.print(m.files > 1 || m.items >= 1); + } + + fn enter_item(&self, i: String) { + let mut m = self.0.lock().unwrap(); + m.items += 1; + m.last_item = i; + m.print(m.files >= 1 || m.items > 1); + } + + pub fn update_item(&self, i: String) { + let mut m = self.0.lock().unwrap(); + m.last_item = i; + m.print(true); + } + + pub fn changed_item(&self) { + let mut m = self.0.lock().unwrap(); + m.changed_items += 1; + m.print(true); + } + + fn skip_items(&self, i: usize) { + let mut m = self.0.lock().unwrap(); + m.items += i; + m.print(m.files >= 1 || m.items >= 1); + } +} + +struct StatusPart<'a>(&'a StatusReport, usize); + +impl<'a> StatusPart<'a> { + fn enter_item(&mut self, i: String) { + self.0.enter_item(i); + self.1 -= 1; + } + + fn update_item(&mut self, i: String) { + self.0.update_item(i); + } + + fn changed_item(&mut self) { + self.0.changed_item(); + } +} + +impl<'a> Drop for StatusPart<'a> { + fn drop(&mut self) { + self.0.skip_items(self.1); + } +} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..d49d043 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,154 @@ +use std::{path::Path, fs}; +use anyhow::{Result, Context}; +use itertools::Itertools; +use rnix::{SyntaxKind, ast::{AttrpathValue, AttrSet, HasEntry, Entry::*}, SyntaxNode}; +use rowan::{ast::AstNode, TextSize}; + +use crate::status_reporter::StatusReport; + +fn textsize_at_line(s: &str, line: usize) -> TextSize { + s + .split('\n') + .map(|l| + TextSize::of(l) + TextSize::new(1) + ) + .take(line-1) + .sum() +} + + +fn dig_to_kind(kind: SyntaxKind, node: &SyntaxNode) -> Option { + if node.kind() == kind { + return Some(node.clone()); + } + + node.descendants() + .filter(|node| node.kind() == kind) + .next() +} + +fn add_to_meta_block(rough_pos: SyntaxNode, content: &str, main_program: &str, file: &Path, line: usize) -> Result<(String, usize)> { + let meta_node = dig_to_kind(SyntaxKind::NODE_ATTR_SET, &rough_pos).unwrap(); + let meta_set = AttrSet::cast(meta_node.clone()).unwrap(); + + let description_entry = meta_set.entries() + .filter(|entry| { + match &entry { + Inherit(it) => it.attrs().any(|c| c.to_string() == "description"), + AttrpathValue(it) => it.attrpath().unwrap().to_string() == "description", + } + }) + .exactly_one().ok() + .with_context(|| format!("meta node has no description attribute in {:?} at {}", file, line))?; + let description = description_entry.syntax(); + + let pos = description.text_range(); + let indent = content[..pos.start().into()].chars().rev().position(|c| c == '\n').unwrap(); + + let patch = String::new() + + "\n" + + &" ".repeat(indent) + + "mainProgram = \"" + main_program + "\";"; + + Ok((patch, pos.end().into())) +} + +fn edit_one(file: &Path, line: usize, main_program: &str, p: &StatusReport) -> Result { + let mut content = fs::read_to_string(file)?; + let searchpos = textsize_at_line(&content, line); + + p.update_item(format!("doing {:?}", file)); + let parse = rnix::Root::parse(&content); + if !parse.errors().is_empty() { + anyhow::bail!("error: {:?}", parse.errors()); + } + let tree = parse.tree(); + + let pos_node = tree.syntax().descendants() + .filter(|node| { + if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE { + let value = AttrpathValue::cast(node.clone()).unwrap(); + node.text_range().contains(searchpos) && value.attrpath().unwrap().to_string() == "meta" + } else { false } + }) + .exactly_one().ok(); + + // do we have a meta attrset already? + let (patch, insert_offset) = match pos_node { + None => { + let version_node = tree + .syntax() + .descendants() + .filter(|node| { + if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE { + let value = AttrpathValue::cast(node.clone()).unwrap(); + let name = value.attrpath().unwrap().to_string(); + node.text_range().contains(searchpos + TextSize::new(5)) + && (name == "version" || name == "pname" || name == "name") + } else { false } + }) + .exactly_one().ok() + .with_context(|| format!("neither meta nor version node found for {:?} at {}", file, line))?; + + let attrset = version_node.parent().unwrap(); + if attrset.kind() != SyntaxKind::NODE_ATTR_SET { + anyhow::bail!("name not in an attrset in {:?} at {}", file, line) + } + + // does a meta block already exist? + let maybe_meta_block = attrset + .descendants() + .filter(|node| { + if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE { + let value = AttrpathValue::cast(node.clone()).unwrap(); + let name = value.attrpath().unwrap().to_string(); + name == "meta" + } else { false } + }) + .exactly_one(); + + if let Ok(meta) = maybe_meta_block { + add_to_meta_block(meta.clone(), &content, main_program, file, line)? + } else { + let before_attrset_end = Into::::into(attrset.text_range().end()) + - 1 + - content[..attrset.text_range().end().into()] + .chars().rev().position(|c| c == '\n').unwrap(); + + let indent = content[..version_node.text_range().start().into()] + .chars().rev().position(|c| c == '\n').unwrap(); + + // some language specific build systems don't use meta as its own attrset + // there's no good way to recognise these, but this seems to work fine + let weird_nonstandard_meta = attrset + .descendants() + .any(|node| { + if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE { + let value = AttrpathValue::cast(node.clone()).unwrap(); + let name = value.attrpath().unwrap().to_string(); + name == "description" || name == "homepage" || name == "license" + } else { false } + }); + let patch = String::new() + + "\n" + + &" ".repeat(indent) + + if weird_nonstandard_meta { "mainProgram = \"" } else { "meta.mainProgram = \"" } + + main_program + "\";"; + + (patch, before_attrset_end) + } + }, + Some(pos) => { + add_to_meta_block(pos.clone(), &content, main_program, file, line)? + } + }; + + + content = String::new() + + &content[..insert_offset] + + &patch + + &content[insert_offset..]; + + p.changed_item(); + Ok(content) +} -- cgit v1.2.3