very basic query language

author: stuebinm 2024-04-03 22:22:38 +0200
committer: stuebinm 2024-04-03 23:36:33 +0200
commit: 0567f916d4365c8dc0be99d194fe6d157befbc81 (patch)
tree: 8e1123ae8112abab0f3726da75bec2c08787ce0e
parent: 48534f8c321cb33190a3cc80a9c364ffbf68c878 (diff)
7 files changed, 853 insertions, 328 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 5dc3db8..bf58e1f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -12,6 +12,12 @@ dependencies = [
 ]
 
 [[package]]
+name = "anstyle"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc"
+
+[[package]]
 name = "anyhow"
 version = "1.0.81"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -24,6 +30,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 
 [[package]]
+name = "clap"
+version = "4.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0"
+dependencies = [
+ "clap_builder",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4"
+dependencies = [
+ "anstyle",
+ "clap_lex",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce"
+
+[[package]]
 name = "countme"
 version = "3.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -69,6 +100,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
 
 [[package]]
+name = "m_lexer"
+version = "0.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7e51ebf91162d585a5bae05e4779efc4a276171cb880d61dd6fab11c98467a7"
+dependencies = [
+ "regex",
+]
+
+[[package]]
 name = "memchr"
 version = "2.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -88,7 +128,9 @@ name = "nixq"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "clap",
  "itertools",
+ "m_lexer",
  "regex",
  "rnix",
  "rowan",
diff --git a/Cargo.toml b/Cargo.toml
index 7c3c8d6..4705207 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,10 +8,12 @@ authors = [ "stuebinm <stuebinm@disroot.org>" ]
 
 [dependencies]
 rnix = "0.11"
-anyhow = "1.0.57"
+anyhow = "1.0"
 regex = "1.6"
-threadpool = "1.8.1"
-rowan = "0.15.15"
-serde_json = "1.0.114"
-serde = { version = "1.0.197", features = ["derive"] }
+threadpool = "1.8"
+rowan = "0.15"
+serde_json = "1.0"
+serde = { version = "1.0", features = ["derive"] }
 itertools = "0.12.1"
+m_lexer = "0.0.4"
+clap = { version = "4.5", features = [ "cargo", "help", "std" ], default-features = false }
diff --git a/src/batchmode.rs b/src/batchmode.rs
new file mode 100644
index 0000000..8398bce
--- /dev/null
+++ b/src/batchmode.rs
@@ -0,0 +1,61 @@
+use std::{path::PathBuf, fs, sync::{Arc, Mutex}};
+use threadpool::ThreadPool;
+
+use crate::status_reporter::*;
+
+// TODO: make this usable
+// (this module just here to keep old code around for a bit)
+pub enum Task {}
+
+#[allow(unreachable_code, unused)]
+pub fn batchmode(tasks: Vec<(PathBuf, Task)>) {
+
+    let pool = ThreadPool::new(16);
+
+    let results = Arc::new(Mutex::new(vec![]));
+    let printer = Arc::new(StatusReport::new(tasks.len(), tasks.len()));
+
+    for (path, task) in tasks {
+        pool.execute({
+            let results = Arc::clone(&results);
+            let printer = Arc::clone(&printer);
+
+            move || {
+                printer.enter_file(&format!("{path:?}"));
+
+                let result: anyhow::Result<(PathBuf, String)> = todo!();
+
+                results.lock().unwrap().push(result);
+            }
+        });
+    }
+
+    pool.join();
+
+    println!("\n\nSummary:");
+    let mut c_errors = 0;
+    let mut c_total = 0;
+    for r in results.lock().unwrap().iter() {
+        match r {
+            Err(e) => {
+                println!(" {}", e);
+                c_errors += 1;
+            },
+            _ => ()
+        }
+        c_total += 1;
+    }
+
+    println!("\n  ({c_total} sites total, {c_errors} errors, generated {} edits)", c_total - c_errors);
+
+    let edits: Vec<_> = Arc::into_inner(results).unwrap().into_inner().unwrap()
+        .into_iter()
+        .filter_map(|r| r.ok())
+        .collect();
+
+    println!("applying changes ...");
+    for (filename, content) in edits {
+        fs::write(&filename, content.as_bytes()).unwrap();
+        // println!("{}", content);
+    }
+}
diff --git a/src/main.rs b/src/main.rs
index 5e86c89..a09f4ef 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,356 +1,118 @@
-use std::{path::{Path, PathBuf}, fs, str::FromStr, sync::{Arc, Mutex}};
-use anyhow::{Result, Context, anyhow};
-use itertools::Itertools;
-use rnix::{SyntaxKind, ast::{AttrpathValue, AttrSet, HasEntry, Entry::*}, SyntaxNode};
+use std::{path::PathBuf, fs, process::exit};
 use rowan::{ast::AstNode, TextSize};
-use serde::Deserialize;
-use threadpool::ThreadPool;
+use clap::{arg, command, value_parser};
 
-struct StatusReportData {
-    files: usize,
-    items: usize,
-    total_files: usize,
-    total_items: usize,
-    changed_items: usize,
-    last_file: String,
-    last_item: String,
-}
-
-impl StatusReportData {
-    fn print(&self, clear: bool) {
-        if clear {
-            print!("\x1b[1F\x1b[2K\x1b[1F\x1b[2K");
-        }
-        println!("{}/{} files ({})", self.files, self.total_files, self.last_file);
-        println!("{}/{} ({}) items ({})", self.items, self.total_items,
-                 self.changed_items, self.last_item);
-    }
-}
-
-struct StatusReport(Mutex<StatusReportData>);
-
-impl StatusReport {
-    fn new(total_files: usize, total_items: usize) -> Self {
-        Self(Mutex::new(StatusReportData {
-            files: 0,
-            items: 0,
-            total_files,
-            total_items,
-            changed_items: 0,
-            last_file: "".to_string(),
-            last_item: "".to_string(),
-        }))
-    }
-
-    fn enter_file(&self, f: &str) {
-        let mut m = self.0.lock().unwrap();
-        m.files += 1;
-        m.last_file = f.to_string();
-        m.print(m.files > 1 || m.items >= 1);
-    }
-
-    fn enter_item(&self, i: String) {
-        let mut m = self.0.lock().unwrap();
-        m.items += 1;
-        m.last_item = i;
-        m.print(m.files >= 1 || m.items > 1);
-    }
-
-    fn update_item(&self, i: String) {
-        let mut m = self.0.lock().unwrap();
-        m.last_item = i;
-        m.print(true);
-    }
-
-    fn changed_item(&self) {
-        let mut m = self.0.lock().unwrap();
-        m.changed_items += 1;
-        m.print(true);
-    }
-
-    fn skip_items(&self, i: usize) {
-        let mut m = self.0.lock().unwrap();
-        m.items += i;
-        m.print(m.files >= 1 || m.items >= 1);
-    }
-}
-
-struct StatusPart<'a>(&'a StatusReport, usize);
-
-impl<'a> StatusPart<'a> {
-    fn enter_item(&mut self, i: String) {
-        self.0.enter_item(i);
-        self.1 -= 1;
-    }
-
-    fn update_item(&mut self, i: String) {
-        self.0.update_item(i);
-    }
-
-    fn changed_item(&mut self) {
-        self.0.changed_item();
-    }
-}
-
-impl<'a> Drop for StatusPart<'a> {
-    fn drop(&mut self) {
-        self.0.skip_items(self.1);
-    }
-}
-
-
-
-fn textsize_at_line(s: &str, line: usize) -> TextSize {
-    s
-        .split('\n')
-        .map(|l|
-            TextSize::of(l) + TextSize::new(1)
-        )
-        .take(line-1)
-        .sum()
-}
-
-
-fn dig_to_kind(kind: SyntaxKind, node: &SyntaxNode) -> Option<SyntaxNode> {
-    if node.kind() == kind {
-        return Some(node.clone());
-    }
-
-    node.descendants()
-        .filter(|node| node.kind() == kind)
-        .next()
-}
-
-fn add_to_meta_block(rough_pos: SyntaxNode, content: &str, main_program: &str, file: &Path, line: usize) -> Result<(String, usize)> {
-    let meta_node = dig_to_kind(SyntaxKind::NODE_ATTR_SET, &rough_pos).unwrap();
-    let meta_set = AttrSet::cast(meta_node.clone()).unwrap();
+#[allow(dead_code)]
+mod queries;
+#[allow(dead_code)]
+mod status_reporter;
+mod batchmode;
+#[allow(dead_code)]
+mod util;
 
-    let description_entry = meta_set.entries()
-         .filter(|entry| {
-             match &entry {
-                 Inherit(it) => it.attrs().any(|c| c.to_string() == "description"),
-                 AttrpathValue(it) => it.attrpath().unwrap().to_string() == "description",
-             }
-         })
-         .exactly_one().ok()
-         .with_context(|| format!("meta node has no description attribute in {:?} at {}", file, line))?;
-    let description = description_entry.syntax();
 
-    let pos = description.text_range();
-    let indent = content[..pos.start().into()].chars().rev().position(|c| c == '\n').unwrap();
+fn parse_nexp(path: &PathBuf) -> anyhow::Result<(String, rnix::Root)> {
+    let content = fs::read_to_string(path)?;
 
-    let patch = String::new()
-        + "\n"
-        + &" ".repeat(indent)
-        + "mainProgram = \"" + main_program + "\";";
-
-    Ok((patch, pos.end().into()))
-}
-
-fn edit_one(file: &Path, line: usize, main_program: &str, p: &StatusReport) -> Result<String> {
-    let mut content = fs::read_to_string(file)?;
-    let searchpos = textsize_at_line(&content, line);
-
-    p.update_item(format!("doing {:?}", file));
     let parse = rnix::Root::parse(&content);
     if !parse.errors().is_empty() {
         anyhow::bail!("error: {:?}", parse.errors());
     }
-    let tree = parse.tree();
-
-    let pos_node = tree.syntax().descendants()
-        .filter(|node| {
-            if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE {
-                let value = AttrpathValue::cast(node.clone()).unwrap();
-                node.text_range().contains(searchpos) && value.attrpath().unwrap().to_string() == "meta"
-            } else { false }
-        })
-        .exactly_one().ok();
-
-    // do we have a meta attrset already?
-    let (patch, insert_offset) = match pos_node {
-        None => {
-            let version_node = tree
-                .syntax()
-                .descendants()
-                .filter(|node| {
-                    if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE {
-                        let value = AttrpathValue::cast(node.clone()).unwrap();
-                        let name =  value.attrpath().unwrap().to_string();
-                        node.text_range().contains(searchpos + TextSize::new(5))
-                            && (name == "version" || name == "pname" || name == "name")
-                    } else { false }
-                })
-                .exactly_one().ok()
-                .with_context(|| format!("neither meta nor version node found for {:?} at {}", file, line))?;
+    Ok((content, parse.tree()))
+}
 
-            let attrset = version_node.parent().unwrap();
-            if attrset.kind() != SyntaxKind::NODE_ATTR_SET {
-                anyhow::bail!("name not in an attrset in {:?} at {}", file, line)
-            }
+fn main() {
 
-            // does a meta block already exist?
-            let maybe_meta_block = attrset
-                .descendants()
-                .filter(|node| {
-                    if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE {
-                        let value = AttrpathValue::cast(node.clone()).unwrap();
-                        let name =  value.attrpath().unwrap().to_string();
-                        name == "meta"
-                    } else { false }
-                })
-                .exactly_one();
+    let matches = command!()
+        .arg(arg!(--batchmode "run in batch mode")
+             .required(false)
+        )
+        .arg(arg!([query] "query to run")
+             .required(true))
+        .arg(arg!([file] "file to operate on")
+            .required(true)
+             .value_parser(value_parser!(PathBuf))
+        )
+        .arg(arg!(--edit <operation> "what to do")
+        .required(false))
+        .get_matches();
+
+    let query_string = matches.get_one::<String>("query").unwrap();
+    let files = matches.get_one::<PathBuf>("file").unwrap();
+
+    let parse = queries::parse(query_string);
+    if parse.errors.len() != 0 {
+        eprintln!(
+            "syntax {}: \n  {}",
+            if parse.errors.len() == 1 { "error" } else { "errors" },
+            parse.errors.join("  \n")
+        );
+        exit(1);
+    }
+
+    let (content, nexp) = match parse_nexp(files) {
+        Err(e) => {
+            eprintln!("could not parse file: {e}");
+            exit(2);
+        },
+        Ok(exp) => exp
+    };
 
-            if let Ok(meta) = maybe_meta_block {
-                add_to_meta_block(meta.clone(), &content, main_program, file, line)?
-            } else {
-                let before_attrset_end = Into::<usize>::into(attrset.text_range().end())
-                    - 1
-                    - content[..attrset.text_range().end().into()]
-                    .chars().rev().position(|c| c == '\n').unwrap();
+    // println!("{nexp:#?}");
 
-                let indent = content[..version_node.text_range().start().into()]
-                    .chars().rev().position(|c| c == '\n').unwrap();
+    let results = parse.apply(&content, nexp.syntax().clone()).unwrap();
 
-                // some language specific build systems don't use meta as its own attrset
-                // there's no good way to recognise these, but this seems to work fine
-                let weird_nonstandard_meta = attrset
-                    .descendants()
-                    .any(|node| {
-                        if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE {
-                            let value = AttrpathValue::cast(node.clone()).unwrap();
-                            let name =  value.attrpath().unwrap().to_string();
-                            name == "description" || name == "homepage" || name == "license"
-                        } else { false }
-                    });
-                let patch = String::new()
-                    + "\n"
-                    + &" ".repeat(indent)
-                    + if weird_nonstandard_meta { "mainProgram = \"" } else { "meta.mainProgram = \"" }
-                    + main_program + "\";";
 
-                (patch, before_attrset_end)
+    if let Some(op) = matches.get_one::<String>("edit") {
+        match &op[..] {
+            "remove" => {
+                let new = remove_nodes(content, &results);
+                println!("{new}");
             }
-        },
-        Some(pos) => {
-            add_to_meta_block(pos.clone(), &content, main_program, file, line)?
+            _ => ()
         }
-    };
-
-
-    content = String::new()
-        + &content[..insert_offset]
-        + &patch
-        + &content[insert_offset..];
-
-    p.changed_item();
-    Ok(content)
-}
+    } else {
+        for result in &results {
+            println!("{result}");
+        }
+    }
 
 
-#[derive(Deserialize, Clone)]
-struct TrivialProgram {
-    pos: Option<String>,
-    name: String
 }
 
-fn main() {
-
-    let raw_inputs = fs::read_to_string("trivials.json").unwrap();
-    let inputs: Vec<TrivialProgram> = serde_json::de::from_str::<Vec<_>>(&raw_inputs).unwrap();
-        // .into_iter().take(200).collect();
+fn remove_nodes(content: String, results: &Vec<rnix::SyntaxNode>) -> String {
 
+    assert!(results.len() == 1);
 
-    // TODO: group edits in the same file
-    let pool = ThreadPool::new(16);
+    let span = &results[0];
 
-    let mut tasks: Vec<(TrivialProgram, PathBuf, usize)> = inputs.into_iter()
-        .filter_map(|i| {
-            if i.pos.is_none() {
-                println!("no position for name {}", i.name);
-                None
+    let (before, after) = match (span.prev_sibling_or_token(), span.next_sibling_or_token()) {
+        (Some(prev), Some(next))
+            if prev.kind() == rnix::SyntaxKind::TOKEN_WHITESPACE
+              && next.kind() == rnix::SyntaxKind::TOKEN_WHITESPACE
+        => {
+            if prev.to_string().lines().count() < next.to_string().lines().count() {
+                (prev.text_range().len(), TextSize::new(0))
             } else {
-                let pos = i.pos.as_ref().unwrap();
-                let (filename, line) = {
-                    let l = pos.split(':').collect::<Vec<_>>();
-                    assert!(l.len() == 2);
-
-                    (PathBuf::from_str(l[0]).unwrap(), l[1].parse().unwrap())
-                };
-                Some((i, filename, line))
-            }
-        })
-        .collect();
-
-    tasks.sort_by_key(|(_ ,filename, _)| filename.clone());
-
-    let grouped_tasks: Vec<(TrivialProgram, PathBuf, Vec<(usize, String)>)> =
-        tasks.into_iter()
-             .map(|(i, f, l)| (i.clone(), f, vec![(l, i.name)]))
-             .coalesce(|(i1, f1, l1), (i2, f2, l2)| {
-                 if f1 == f2 {
-                     if l1 == l2 && i1.name == i2.name {
-                         Ok((i1, f1, l1))
-                     } else {
-                         Ok((i1, f1, l1.into_iter().chain(l2.into_iter()).collect()))
-                     }
-                 } else {
-                     Err(((i1,f1,l1),(i2,f2,l2)))
-                 }
-             }).collect();
-
-    let results = Arc::new(Mutex::new(vec![]));
-    let printer = Arc::new(StatusReport::new(grouped_tasks.len(), grouped_tasks.len()));
-
-    for (i, filename, sites) in grouped_tasks {
-        pool.execute({
-            let results = Arc::clone(&results);
-            let printer = Arc::clone(&printer);
-
-            move || {
-                let pos = i.pos.unwrap();
-                printer.enter_file(&pos);
-                if sites.len() == 1 {
-                    let result = edit_one(&filename, sites[0].0, &sites[0].1, &printer)
-                        .map(|ok| (filename, ok));
-                    results.lock().unwrap().push(result);
-                } else {
-                    results.lock().unwrap().push(Err(anyhow!("skipped {:?} as it has multiple edits", filename)));
-                }
+                (TextSize::new(0), next.text_range().len())
             }
-        });
-    }
-
-    pool.join();
-
-    println!("\n\nSummary:");
-    let mut c_errors = 0;
-    let mut c_total = 0;
-    for r in results.lock().unwrap().iter() {
-        match r {
-            Err(e) => {
-                println!(" {}", e);
-                c_errors += 1;
-            },
-            _ => ()
         }
-        c_total += 1;
-    }
+        _ => (TextSize::default(),TextSize::default())
+    };
+
+    String::new()
+        + &content[..Into::<usize>::into(span.text_range().start() - before) - previous_indentation(span).unwrap_or(0)]
+        + &content[(span.text_range().end() + after).into()..]
+}
 
-    println!("\n  ({c_total} sites total, {c_errors} errors, generated {} edits)", c_total - c_errors);
 
-    let edits: Vec<_> = Arc::into_inner(results).unwrap().into_inner().unwrap()
-        .into_iter()
-        .filter_map(|r| r.ok())
-        .collect();
 
-    // check we didn't miss any duplicate edits
-    let duplicates = edits.iter().duplicates_by(|(filename, _)| filename).count();
-    println!("{duplicates} edits were not the only one in their file");
+fn previous_indentation(node: &rnix::SyntaxNode) -> Option<usize> {
+    let whitespace_token = node.prev_sibling_or_token()?;
 
-    println!("applying changes ...");
-    for (filename, content) in edits {
-        fs::write(&filename, content.as_bytes()).unwrap();
-        // println!("{}", content);
+    if whitespace_token.kind() == rnix::SyntaxKind::TOKEN_WHITESPACE {
+        Some(whitespace_token.to_string().lines().last().unwrap().len())
+    } else {
+        None
     }
 }
diff --git a/src/queries.rs b/src/queries.rs
new file mode 100644
index 0000000..b07224a
--- /dev/null
+++ b/src/queries.rs
@@ -0,0 +1,411 @@
+// this is mostly based on the s-exp tutorial
+// https://github.com/rust-analyzer/rowan/blob/master/examples/s_expressions.rs
+
+use rnix::{match_ast, ast};
+use rowan::{GreenNode, GreenNodeBuilder, ast::AstNode};
+
+
+fn lex(text: &str) -> Vec<(SyntaxKind, String)> {
+    fn tok(t: SyntaxKind) -> m_lexer::TokenKind {
+        m_lexer::TokenKind(rowan::SyntaxKind::from(t).0)
+    }
+    fn kind(t: m_lexer::TokenKind) -> SyntaxKind {
+        match t.0 {
+            0 => L_BRACKET,
+            1 => R_BRACKET,
+            2 => WORD,
+            3 => WHITESPACE,
+            4 => ERROR,
+            _ => unreachable!(),
+        }
+    }
+
+    let lexer = m_lexer::LexerBuilder::new()
+        .error_token(tok(ERROR))
+        .tokens(&[
+            (tok(L_BRACKET), r"\["),
+            (tok(R_BRACKET), r"\]"),
+            (tok(WORD), r"[^\s\[\]]+"),
+            (tok(WHITESPACE), r"\s+"),
+        ])
+        .build();
+
+    lexer
+        .tokenize(text)
+        .into_iter()
+        .map(|t| (t.len, kind(t.kind)))
+        .scan(0usize, |start_offset, (len, kind)| {
+            let s: String = text[*start_offset..*start_offset + len].into();
+            *start_offset += len;
+            Some((kind, s))
+        })
+        .collect()
+}
+
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[allow(non_camel_case_types)]
+#[repr(u16)]
+enum SyntaxKind {
+    L_BRACKET = 0, // '['
+    R_BRACKET,     // ']'
+    WORD,          // 'Attrset', 'meta', '.', '>', ...
+    WHITESPACE,    // whitespaces is explicit
+    ERROR,         // as well as errors
+
+    // composite nodes
+    LIST, // `[..]`
+    ATOM, // wraps WORD
+    ROOT, // top-level (a complete query)
+}
+use SyntaxKind::*;
+
+impl From<SyntaxKind> for rowan::SyntaxKind {
+    fn from(kind: SyntaxKind) -> Self {
+        Self(kind as u16)
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+enum Lang {}
+impl rowan::Language for Lang {
+    type Kind = SyntaxKind;
+    fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
+        assert!(raw.0 <= ROOT as u16);
+        unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
+    }
+    fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
+        kind.into()
+    }
+}
+
+pub struct Parse {
+    pub green_node: GreenNode,
+    pub errors: Vec<String>,
+}
+
+pub fn parse(text: &str) -> Parse {
+    struct Parser {
+        /// input tokens, including whitespace,
+        /// in *reverse* order.
+        tokens: Vec<(SyntaxKind, String)>,
+        /// the in-progress tree.
+        builder: GreenNodeBuilder<'static>,
+        /// the list of syntax errors we've accumulated
+        /// so far.
+        errors: Vec<String>,
+    }
+
+    #[derive(Debug)]
+    enum QexpRes {
+        Ok,
+        Eof,
+        RBracket,
+        LBracket
+    }
+
+    impl Parser {
+        fn parse(mut self) -> Parse {
+            // Make sure that the root node covers all source
+            self.builder.start_node(ROOT.into());
+            // Parse zero or more S-expressions
+            loop {
+                match self.word() {
+                    QexpRes::Eof => break,
+                    QexpRes::Ok => (),
+                    unmatched_bracket => {
+                        self.builder.start_node(ERROR.into());
+                        self.errors.push(format!("lone `{:?}`", unmatched_bracket));
+                        self.bump(); // be sure to chug along in case of error
+                        self.builder.finish_node();
+                    }
+                }
+            }
+            // eat remaining whitespace
+            self.skip_ws();
+            self.builder.finish_node();
+
+            Parse { green_node: self.builder.finish(), errors: self.errors }
+        }
+        fn list(&mut self) {
+            assert_eq!(self.current(), Some(L_BRACKET));
+            // Start the list node
+            self.builder.start_node(LIST.into());
+            self.bump(); // '['
+            loop {
+                match self.word() {
+                    QexpRes::Eof => {
+                        self.errors.push("expected `]`".to_string());
+                        break;
+                    }
+                    QexpRes::RBracket => {
+                        self.bump();
+                        break;
+                    }
+                    QexpRes::LBracket => {
+                        self.builder.start_node(ERROR.into());
+                        self.errors.push("unexpected list".to_string());
+                        self.bump();
+                        self.builder.finish_node();
+                    }
+                    QexpRes::Ok => (),
+                }
+            }
+            // close the list node
+            self.builder.finish_node();
+        }
+        fn word(&mut self) -> QexpRes {
+            // Eat leading whitespace
+            self.skip_ws();
+            // Either a list, an atom, a closing paren,
+            // or an eof.
+            let t = match self.current() {
+                None => return QexpRes::Eof,
+                Some(R_BRACKET) => return QexpRes::RBracket,
+                Some(L_BRACKET) => return QexpRes::LBracket,
+                Some(t) => t,
+            };
+            match t {
+                WORD => {
+                    self.builder.start_node(ATOM.into());
+                    self.bump();
+                    self.skip_ws();
+                    if Some(L_BRACKET) == self.current() {
+                        self.list();
+                    }
+                    self.builder.finish_node();
+                }
+                ERROR => self.bump(),
+                _ => unreachable!(),
+            }
+            QexpRes::Ok
+        }
+        /// Advance one token, adding it to the current branch of the tree builder.
+        fn bump(&mut self) {
+            let (kind, text) = self.tokens.pop().unwrap();
+            self.builder.token(kind.into(), text.as_str());
+        }
+        /// Peek at the first unprocessed token
+        fn current(&self) -> Option<SyntaxKind> {
+            self.tokens.last().map(|(kind, _)| *kind)
+        }
+        fn skip_ws(&mut self) {
+            while self.current() == Some(WHITESPACE) {
+                self.bump()
+            }
+        }
+    }
+
+    let mut tokens = lex(text);
+    tokens.reverse();
+    Parser { tokens, builder: GreenNodeBuilder::new(), errors: Vec::new() }.parse()
+}
+
+/// To work with the parse results we need a view into the
+/// green tree - the Syntax tree.
+/// It is also immutable, like a GreenNode,
+/// but it contains parent pointers, offsets, and
+/// has identity semantics.
+
+type SyntaxNode = rowan::SyntaxNode<Lang>;
+#[allow(unused)]
+type SyntaxToken = rowan::SyntaxToken<Lang>;
+#[allow(unused)]
+type SyntaxElement = rowan::NodeOrToken<SyntaxNode, SyntaxToken>;
+
+impl Parse {
+    fn syntax(&self) -> SyntaxNode {
+        SyntaxNode::new_root(self.green_node.clone())
+    }
+}
+
+/// Let's check that the parser works as expected
+#[test]
+fn test_parser() {
+    let text = "Inherit > mdDoc[something]";
+    let node = parse(text).syntax();
+    assert_eq!(
+        format!("{:?}", node),
+        "ROOT@0..26"
+    );
+    assert_eq!(node.children().count(), 3);
+    let children = node
+        .descendants_with_tokens()
+        .map(|child| format!("{:?}@{:?}", child.kind(), child.text_range()))
+        .collect::<Vec<_>>();
+
+    assert_eq!(
+        children,
+        vec![
+            "ROOT@0..26".to_string(),
+             "ATOM@0..8".to_string(),
+              "WORD@0..7".to_string(),
+              "WHITESPACE@7..8".to_string(), // note, explicit whitespace!
+             "ATOM@8..10".to_string(),
+              "WORD@8..9".to_string(),
+              "WHITESPACE@9..10".to_string(),
+             "ATOM@10..26".to_string(),
+              "WORD@10..15".to_string(),
+              "LIST@15..26".to_string(),
+               "L_BRACKET@15..16".to_string(),
+                "ATOM@16..25".to_string(),
+                 "WORD@16..25".to_string(),
+               "R_BRACKET@25..26".to_string()
+        ]
+    );
+}
+
+
+
+type NixExprs = Box<dyn Iterator<Item = rnix::SyntaxNode>>;
+
+macro_rules! ast_node {
+    ($ast:ident, $kind:ident) => {
+        #[derive(PartialEq, Eq, Hash)]
+        #[repr(transparent)]
+        struct $ast(SyntaxNode);
+        impl $ast {
+            #[allow(unused)]
+            fn cast(node: SyntaxNode) -> Option<Self> {
+                if node.kind() == $kind {
+                    Some(Self(node))
+                } else {
+                    None
+                }
+            }
+        }
+    };
+}
+
+ast_node!(Root, ROOT);
+ast_node!(Atom, ATOM);
+ast_node!(List, LIST);
+
+// Sexp is slightly different, so let's do it by hand.
+#[derive(PartialEq, Eq, Hash, Debug)]
+#[repr(transparent)]
+struct Qexp(SyntaxNode);
+
+enum QexpKind {
+    Atom(Atom),
+    List(List),
+}
+
+impl Qexp {
+    fn cast(node: SyntaxNode) -> Option<Self> {
+        if Atom::cast(node.clone()).is_some() || List::cast(node.clone()).is_some() {
+            Some(Qexp(node))
+        } else {
+            None
+        }
+    }
+
+    fn kind(&self) -> QexpKind {
+        Atom::cast(self.0.clone())
+            .map(QexpKind::Atom)
+            .or_else(|| List::cast(self.0.clone()).map(QexpKind::List))
+            .unwrap()
+    }
+
+    fn apply(&self, _acc: NixExprs) -> NixExprs {
+        todo!()
+    }
+}
+
+// Let's enhance AST nodes with ancillary functions and
+// eval.
+impl Root {
+    fn qexps(&self) -> impl Iterator<Item = Qexp> + '_ {
+        self.0.children().filter_map(Qexp::cast)
+    }
+}
+
+enum Op {
+    Down,
+    DownRecursive,
+    Up,
+    UpRecursive,
+    Named(String)
+}
+
+impl Atom {
+    fn eval(&self) -> Option<i64> {
+        self.text().parse().ok()
+    }
+    fn as_op(&self) -> Option<Op> {
+        let op = match self.text().as_str() {
+            ">" => Op::Down,
+            ">>" => Op::DownRecursive,
+            "<" => Op::Up,
+            "<<" => Op::UpRecursive,
+            name => Op::Named(name.to_owned()),
+        };
+        Some(op)
+    }
+    fn text(&self) -> String {
+        match self.0.green().children().next() {
+            Some(rowan::NodeOrToken::Token(token)) => token.text().to_string(),
+            _ => unreachable!(),
+        }
+    }
+    fn apply(&self, acc: NixExprs) -> NixExprs {
+        match self.as_op() {
+            Some(Op::Down) => Box::new(acc.map(|s| s.children()).flatten()),
+            Some(Op::DownRecursive) => Box::new(acc.map(|s| s.descendants()).flatten()),
+            Some(Op::Up) => Box::new(acc.filter_map(|s| s.parent())),
+            Some(Op::UpRecursive) => Box::new(acc.map(|s| s.ancestors()).flatten()),
+            Some(Op::Named(name)) =>
+                Box::new(acc
+                .filter(move |node| match_ast! { match node {
+                    ast::AttrpathValue(value) => {
+                        name == value.attrpath().unwrap().to_string()
+                    },
+                    ast::Apply(value) => {
+                        // TODO: special case lambda = NODE_SELECT here?
+                        name == value.lambda().unwrap().to_string()
+                    },
+                    // TODO: this is difficult — I want to use free-form names
+                    // to select things below, too, but that might not always be
+                    // possible
+                    ast::Ident(value) => {
+                        name == value.to_string()
+                    },
+                    _ => false
+                }})),
+            _ => todo!()
+        }
+    }
+}
+
+impl List {
+    fn sexps(&self) -> impl Iterator<Item = Qexp> + '_ {
+        self.0.children().filter_map(Qexp::cast)
+    }
+}
+
+
+impl Parse {
+    fn root(&self) -> Root {
+        Root::cast(self.syntax()).unwrap()
+    }
+
+    pub fn apply(&self, _content: &str, nexp: rnix::SyntaxNode) -> anyhow::Result<Vec<rnix::SyntaxNode>> {
+
+        let mut acc: NixExprs = Box::new(std::iter::once(nexp));
+
+        for qexp in self.root().qexps() {
+            match qexp.kind() {
+                QexpKind::Atom(filter) => {
+                    acc = filter.apply(acc);
+                }
+                _ => panic!("???")
+            }
+        }
+
+        // let results =
+        //     acc.map(|node| content[node.text_range().start().into()..node.text_range().end().into()].to_owned())
+        //     .collect();
+
+        Ok(acc.collect())
+    }
+}
diff --git a/src/status_reporter.rs b/src/status_reporter.rs
new file mode 100644
index 0000000..c8faee4
--- /dev/null
+++ b/src/status_reporter.rs
@@ -0,0 +1,93 @@
+use std::sync::Mutex;
+
+struct StatusReportData {
+    files: usize,
+    items: usize,
+    total_files: usize,
+    total_items: usize,
+    changed_items: usize,
+    last_file: String,
+    last_item: String,
+}
+
+impl StatusReportData {
+    fn print(&self, clear: bool) {
+        if clear {
+            print!("\x1b[1F\x1b[2K\x1b[1F\x1b[2K");
+        }
+        println!("{}/{} files ({})", self.files, self.total_files, self.last_file);
+        println!("{}/{} ({}) items ({})", self.items, self.total_items,
+                 self.changed_items, self.last_item);
+    }
+}
+
+pub struct StatusReport(Mutex<StatusReportData>);
+
+impl StatusReport {
+    pub fn new(total_files: usize, total_items: usize) -> Self {
+        Self(Mutex::new(StatusReportData {
+            files: 0,
+            items: 0,
+            total_files,
+            total_items,
+            changed_items: 0,
+            last_file: "".to_string(),
+            last_item: "".to_string(),
+        }))
+    }
+
+    pub fn enter_file(&self, f: &str) {
+        let mut m = self.0.lock().unwrap();
+        m.files += 1;
+        m.last_file = f.to_string();
+        m.print(m.files > 1 || m.items >= 1);
+    }
+
+    fn enter_item(&self, i: String) {
+        let mut m = self.0.lock().unwrap();
+        m.items += 1;
+        m.last_item = i;
+        m.print(m.files >= 1 || m.items > 1);
+    }
+
+    pub fn update_item(&self, i: String) {
+        let mut m = self.0.lock().unwrap();
+        m.last_item = i;
+        m.print(true);
+    }
+
+    pub fn changed_item(&self) {
+        let mut m = self.0.lock().unwrap();
+        m.changed_items += 1;
+        m.print(true);
+    }
+
+    fn skip_items(&self, i: usize) {
+        let mut m = self.0.lock().unwrap();
+        m.items += i;
+        m.print(m.files >= 1 || m.items >= 1);
+    }
+}
+
+struct StatusPart<'a>(&'a StatusReport, usize);
+
+impl<'a> StatusPart<'a> {
+    fn enter_item(&mut self, i: String) {
+        self.0.enter_item(i);
+        self.1 -= 1;
+    }
+
+    fn update_item(&mut self, i: String) {
+        self.0.update_item(i);
+    }
+
+    fn changed_item(&mut self) {
+        self.0.changed_item();
+    }
+}
+
+impl<'a> Drop for StatusPart<'a> {
+    fn drop(&mut self) {
+        self.0.skip_items(self.1);
+    }
+}
diff --git a/src/util.rs b/src/util.rs
new file mode 100644
index 0000000..d49d043
--- /dev/null
+++ b/src/util.rs
@@ -0,0 +1,154 @@
+use std::{path::Path, fs};
+use anyhow::{Result, Context};
+use itertools::Itertools;
+use rnix::{SyntaxKind, ast::{AttrpathValue, AttrSet, HasEntry, Entry::*}, SyntaxNode};
+use rowan::{ast::AstNode, TextSize};
+
+use crate::status_reporter::StatusReport;
+
+fn textsize_at_line(s: &str, line: usize) -> TextSize {
+    s
+        .split('\n')
+        .map(|l|
+            TextSize::of(l) + TextSize::new(1)
+        )
+        .take(line-1)
+        .sum()
+}
+
+
+fn dig_to_kind(kind: SyntaxKind, node: &SyntaxNode) -> Option<SyntaxNode> {
+    if node.kind() == kind {
+        return Some(node.clone());
+    }
+
+    node.descendants()
+        .filter(|node| node.kind() == kind)
+        .next()
+}
+
+fn add_to_meta_block(rough_pos: SyntaxNode, content: &str, main_program: &str, file: &Path, line: usize) -> Result<(String, usize)> {
+    let meta_node = dig_to_kind(SyntaxKind::NODE_ATTR_SET, &rough_pos).unwrap();
+    let meta_set = AttrSet::cast(meta_node.clone()).unwrap();
+
+    let description_entry = meta_set.entries()
+         .filter(|entry| {
+             match &entry {
+                 Inherit(it) => it.attrs().any(|c| c.to_string() == "description"),
+                 AttrpathValue(it) => it.attrpath().unwrap().to_string() == "description",
+             }
+         })
+         .exactly_one().ok()
+         .with_context(|| format!("meta node has no description attribute in {:?} at {}", file, line))?;
+    let description = description_entry.syntax();
+
+    let pos = description.text_range();
+    let indent = content[..pos.start().into()].chars().rev().position(|c| c == '\n').unwrap();
+
+    let patch = String::new()
+        + "\n"
+        + &" ".repeat(indent)
+        + "mainProgram = \"" + main_program + "\";";
+
+    Ok((patch, pos.end().into()))
+}
+
+fn edit_one(file: &Path, line: usize, main_program: &str, p: &StatusReport) -> Result<String> {
+    let mut content = fs::read_to_string(file)?;
+    let searchpos = textsize_at_line(&content, line);
+
+    p.update_item(format!("doing {:?}", file));
+    let parse = rnix::Root::parse(&content);
+    if !parse.errors().is_empty() {
+        anyhow::bail!("error: {:?}", parse.errors());
+    }
+    let tree = parse.tree();
+
+    let pos_node = tree.syntax().descendants()
+        .filter(|node| {
+            if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE {
+                let value = AttrpathValue::cast(node.clone()).unwrap();
+                node.text_range().contains(searchpos) && value.attrpath().unwrap().to_string() == "meta"
+            } else { false }
+        })
+        .exactly_one().ok();
+
+    // do we have a meta attrset already?
+    let (patch, insert_offset) = match pos_node {
+        None => {
+            let version_node = tree
+                .syntax()
+                .descendants()
+                .filter(|node| {
+                    if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE {
+                        let value = AttrpathValue::cast(node.clone()).unwrap();
+                        let name =  value.attrpath().unwrap().to_string();
+                        node.text_range().contains(searchpos + TextSize::new(5))
+                            && (name == "version" || name == "pname" || name == "name")
+                    } else { false }
+                })
+                .exactly_one().ok()
+                .with_context(|| format!("neither meta nor version node found for {:?} at {}", file, line))?;
+
+            let attrset = version_node.parent().unwrap();
+            if attrset.kind() != SyntaxKind::NODE_ATTR_SET {
+                anyhow::bail!("name not in an attrset in {:?} at {}", file, line)
+            }
+
+            // does a meta block already exist?
+            let maybe_meta_block = attrset
+                .descendants()
+                .filter(|node| {
+                    if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE {
+                        let value = AttrpathValue::cast(node.clone()).unwrap();
+                        let name =  value.attrpath().unwrap().to_string();
+                        name == "meta"
+                    } else { false }
+                })
+                .exactly_one();
+
+            if let Ok(meta) = maybe_meta_block {
+                add_to_meta_block(meta.clone(), &content, main_program, file, line)?
+            } else {
+                let before_attrset_end = Into::<usize>::into(attrset.text_range().end())
+                    - 1
+                    - content[..attrset.text_range().end().into()]
+                    .chars().rev().position(|c| c == '\n').unwrap();
+
+                let indent = content[..version_node.text_range().start().into()]
+                    .chars().rev().position(|c| c == '\n').unwrap();
+
+                // some language specific build systems don't use meta as its own attrset
+                // there's no good way to recognise these, but this seems to work fine
+                let weird_nonstandard_meta = attrset
+                    .descendants()
+                    .any(|node| {
+                        if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE {
+                            let value = AttrpathValue::cast(node.clone()).unwrap();
+                            let name =  value.attrpath().unwrap().to_string();
+                            name == "description" || name == "homepage" || name == "license"
+                        } else { false }
+                    });
+                let patch = String::new()
+                    + "\n"
+                    + &" ".repeat(indent)
+                    + if weird_nonstandard_meta { "mainProgram = \"" } else { "meta.mainProgram = \"" }
+                    + main_program + "\";";
+
+                (patch, before_attrset_end)
+            }
+        },
+        Some(pos) => {
+            add_to_meta_block(pos.clone(), &content, main_program, file, line)?
+        }
+    };
+
+
+    content = String::new()
+        + &content[..insert_offset]
+        + &patch
+        + &content[insert_offset..];
+
+    p.changed_item();
+    Ok(content)
+}
author	stuebinm	2024-04-03 22:22:38 +0200
committer	stuebinm	2024-04-03 23:36:33 +0200
commit	0567f916d4365c8dc0be99d194fe6d157befbc81 (patch)
tree	8e1123ae8112abab0f3726da75bec2c08787ce0e
parent	48534f8c321cb33190a3cc80a9c364ffbf68c878 (diff)