From 48534f8c321cb33190a3cc80a9c364ffbf68c878 Mon Sep 17 00:00:00 2001 From: stuebinm Date: Tue, 19 Mar 2024 02:44:58 +0100 Subject: rust script to automatically add meta.mainProgram to nixpkgs with thanks to pennae, who showed that this approach was possible (and wrote the status-report widget) https://github.com/pennae/nix-doc-munge --- src/main.rs | 356 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 356 insertions(+) create mode 100644 src/main.rs (limited to 'src/main.rs') diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..5e86c89 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,356 @@ +use std::{path::{Path, PathBuf}, fs, str::FromStr, sync::{Arc, Mutex}}; +use anyhow::{Result, Context, anyhow}; +use itertools::Itertools; +use rnix::{SyntaxKind, ast::{AttrpathValue, AttrSet, HasEntry, Entry::*}, SyntaxNode}; +use rowan::{ast::AstNode, TextSize}; +use serde::Deserialize; +use threadpool::ThreadPool; + +struct StatusReportData { + files: usize, + items: usize, + total_files: usize, + total_items: usize, + changed_items: usize, + last_file: String, + last_item: String, +} + +impl StatusReportData { + fn print(&self, clear: bool) { + if clear { + print!("\x1b[1F\x1b[2K\x1b[1F\x1b[2K"); + } + println!("{}/{} files ({})", self.files, self.total_files, self.last_file); + println!("{}/{} ({}) items ({})", self.items, self.total_items, + self.changed_items, self.last_item); + } +} + +struct StatusReport(Mutex); + +impl StatusReport { + fn new(total_files: usize, total_items: usize) -> Self { + Self(Mutex::new(StatusReportData { + files: 0, + items: 0, + total_files, + total_items, + changed_items: 0, + last_file: "".to_string(), + last_item: "".to_string(), + })) + } + + fn enter_file(&self, f: &str) { + let mut m = self.0.lock().unwrap(); + m.files += 1; + m.last_file = f.to_string(); + m.print(m.files > 1 || m.items >= 1); + } + + fn enter_item(&self, i: String) { + let mut m = self.0.lock().unwrap(); + m.items += 1; + m.last_item = i; + m.print(m.files >= 1 || m.items > 1); + } + + fn update_item(&self, i: String) { + let mut m = self.0.lock().unwrap(); + m.last_item = i; + m.print(true); + } + + fn changed_item(&self) { + let mut m = self.0.lock().unwrap(); + m.changed_items += 1; + m.print(true); + } + + fn skip_items(&self, i: usize) { + let mut m = self.0.lock().unwrap(); + m.items += i; + m.print(m.files >= 1 || m.items >= 1); + } +} + +struct StatusPart<'a>(&'a StatusReport, usize); + +impl<'a> StatusPart<'a> { + fn enter_item(&mut self, i: String) { + self.0.enter_item(i); + self.1 -= 1; + } + + fn update_item(&mut self, i: String) { + self.0.update_item(i); + } + + fn changed_item(&mut self) { + self.0.changed_item(); + } +} + +impl<'a> Drop for StatusPart<'a> { + fn drop(&mut self) { + self.0.skip_items(self.1); + } +} + + + +fn textsize_at_line(s: &str, line: usize) -> TextSize { + s + .split('\n') + .map(|l| + TextSize::of(l) + TextSize::new(1) + ) + .take(line-1) + .sum() +} + + +fn dig_to_kind(kind: SyntaxKind, node: &SyntaxNode) -> Option { + if node.kind() == kind { + return Some(node.clone()); + } + + node.descendants() + .filter(|node| node.kind() == kind) + .next() +} + +fn add_to_meta_block(rough_pos: SyntaxNode, content: &str, main_program: &str, file: &Path, line: usize) -> Result<(String, usize)> { + let meta_node = dig_to_kind(SyntaxKind::NODE_ATTR_SET, &rough_pos).unwrap(); + let meta_set = AttrSet::cast(meta_node.clone()).unwrap(); + + let description_entry = meta_set.entries() + .filter(|entry| { + match &entry { + Inherit(it) => it.attrs().any(|c| c.to_string() == "description"), + AttrpathValue(it) => it.attrpath().unwrap().to_string() == "description", + } + }) + .exactly_one().ok() + .with_context(|| format!("meta node has no description attribute in {:?} at {}", file, line))?; + let description = description_entry.syntax(); + + let pos = description.text_range(); + let indent = content[..pos.start().into()].chars().rev().position(|c| c == '\n').unwrap(); + + let patch = String::new() + + "\n" + + &" ".repeat(indent) + + "mainProgram = \"" + main_program + "\";"; + + Ok((patch, pos.end().into())) +} + +fn edit_one(file: &Path, line: usize, main_program: &str, p: &StatusReport) -> Result { + let mut content = fs::read_to_string(file)?; + let searchpos = textsize_at_line(&content, line); + + p.update_item(format!("doing {:?}", file)); + let parse = rnix::Root::parse(&content); + if !parse.errors().is_empty() { + anyhow::bail!("error: {:?}", parse.errors()); + } + let tree = parse.tree(); + + let pos_node = tree.syntax().descendants() + .filter(|node| { + if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE { + let value = AttrpathValue::cast(node.clone()).unwrap(); + node.text_range().contains(searchpos) && value.attrpath().unwrap().to_string() == "meta" + } else { false } + }) + .exactly_one().ok(); + + // do we have a meta attrset already? + let (patch, insert_offset) = match pos_node { + None => { + let version_node = tree + .syntax() + .descendants() + .filter(|node| { + if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE { + let value = AttrpathValue::cast(node.clone()).unwrap(); + let name = value.attrpath().unwrap().to_string(); + node.text_range().contains(searchpos + TextSize::new(5)) + && (name == "version" || name == "pname" || name == "name") + } else { false } + }) + .exactly_one().ok() + .with_context(|| format!("neither meta nor version node found for {:?} at {}", file, line))?; + + let attrset = version_node.parent().unwrap(); + if attrset.kind() != SyntaxKind::NODE_ATTR_SET { + anyhow::bail!("name not in an attrset in {:?} at {}", file, line) + } + + // does a meta block already exist? + let maybe_meta_block = attrset + .descendants() + .filter(|node| { + if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE { + let value = AttrpathValue::cast(node.clone()).unwrap(); + let name = value.attrpath().unwrap().to_string(); + name == "meta" + } else { false } + }) + .exactly_one(); + + if let Ok(meta) = maybe_meta_block { + add_to_meta_block(meta.clone(), &content, main_program, file, line)? + } else { + let before_attrset_end = Into::::into(attrset.text_range().end()) + - 1 + - content[..attrset.text_range().end().into()] + .chars().rev().position(|c| c == '\n').unwrap(); + + let indent = content[..version_node.text_range().start().into()] + .chars().rev().position(|c| c == '\n').unwrap(); + + // some language specific build systems don't use meta as its own attrset + // there's no good way to recognise these, but this seems to work fine + let weird_nonstandard_meta = attrset + .descendants() + .any(|node| { + if node.kind() == SyntaxKind::NODE_ATTRPATH_VALUE { + let value = AttrpathValue::cast(node.clone()).unwrap(); + let name = value.attrpath().unwrap().to_string(); + name == "description" || name == "homepage" || name == "license" + } else { false } + }); + let patch = String::new() + + "\n" + + &" ".repeat(indent) + + if weird_nonstandard_meta { "mainProgram = \"" } else { "meta.mainProgram = \"" } + + main_program + "\";"; + + (patch, before_attrset_end) + } + }, + Some(pos) => { + add_to_meta_block(pos.clone(), &content, main_program, file, line)? + } + }; + + + content = String::new() + + &content[..insert_offset] + + &patch + + &content[insert_offset..]; + + p.changed_item(); + Ok(content) +} + + +#[derive(Deserialize, Clone)] +struct TrivialProgram { + pos: Option, + name: String +} + +fn main() { + + let raw_inputs = fs::read_to_string("trivials.json").unwrap(); + let inputs: Vec = serde_json::de::from_str::>(&raw_inputs).unwrap(); + // .into_iter().take(200).collect(); + + + // TODO: group edits in the same file + let pool = ThreadPool::new(16); + + let mut tasks: Vec<(TrivialProgram, PathBuf, usize)> = inputs.into_iter() + .filter_map(|i| { + if i.pos.is_none() { + println!("no position for name {}", i.name); + None + } else { + let pos = i.pos.as_ref().unwrap(); + let (filename, line) = { + let l = pos.split(':').collect::>(); + assert!(l.len() == 2); + + (PathBuf::from_str(l[0]).unwrap(), l[1].parse().unwrap()) + }; + Some((i, filename, line)) + } + }) + .collect(); + + tasks.sort_by_key(|(_ ,filename, _)| filename.clone()); + + let grouped_tasks: Vec<(TrivialProgram, PathBuf, Vec<(usize, String)>)> = + tasks.into_iter() + .map(|(i, f, l)| (i.clone(), f, vec![(l, i.name)])) + .coalesce(|(i1, f1, l1), (i2, f2, l2)| { + if f1 == f2 { + if l1 == l2 && i1.name == i2.name { + Ok((i1, f1, l1)) + } else { + Ok((i1, f1, l1.into_iter().chain(l2.into_iter()).collect())) + } + } else { + Err(((i1,f1,l1),(i2,f2,l2))) + } + }).collect(); + + let results = Arc::new(Mutex::new(vec![])); + let printer = Arc::new(StatusReport::new(grouped_tasks.len(), grouped_tasks.len())); + + for (i, filename, sites) in grouped_tasks { + pool.execute({ + let results = Arc::clone(&results); + let printer = Arc::clone(&printer); + + move || { + let pos = i.pos.unwrap(); + printer.enter_file(&pos); + if sites.len() == 1 { + let result = edit_one(&filename, sites[0].0, &sites[0].1, &printer) + .map(|ok| (filename, ok)); + results.lock().unwrap().push(result); + } else { + results.lock().unwrap().push(Err(anyhow!("skipped {:?} as it has multiple edits", filename))); + } + } + }); + } + + pool.join(); + + println!("\n\nSummary:"); + let mut c_errors = 0; + let mut c_total = 0; + for r in results.lock().unwrap().iter() { + match r { + Err(e) => { + println!(" {}", e); + c_errors += 1; + }, + _ => () + } + c_total += 1; + } + + println!("\n ({c_total} sites total, {c_errors} errors, generated {} edits)", c_total - c_errors); + + let edits: Vec<_> = Arc::into_inner(results).unwrap().into_inner().unwrap() + .into_iter() + .filter_map(|r| r.ok()) + .collect(); + + // check we didn't miss any duplicate edits + let duplicates = edits.iter().duplicates_by(|(filename, _)| filename).count(); + println!("{duplicates} edits were not the only one in their file"); + + println!("applying changes ..."); + for (filename, content) in edits { + fs::write(&filename, content.as_bytes()).unwrap(); + // println!("{}", content); + } +} -- cgit v1.2.3