From b6a6abe104d348ba696054c930a3dafe0edf8d00 Mon Sep 17 00:00:00 2001 From: notgne2 Date: Wed, 30 Dec 2020 20:25:59 -0700 Subject: Modify activation logic to keep SSH connection alive for magic-rollback and avoid forking or disconnecting std{out,err} --- Cargo.lock | 15 ++++- Cargo.toml | 1 + src/activate.rs | 182 ++++++++++++++++++++++++++++++++++++++++------------ src/main.rs | 6 +- src/utils/deploy.rs | 167 +++++++++++++++++++++++++++++++++++++---------- src/utils/mod.rs | 54 +++++++++++++--- 6 files changed, 339 insertions(+), 86 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b2ec71c..8a96a33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,6 +146,7 @@ dependencies = [ "serde", "serde_derive", "serde_json", + "signal-hook", "smol_str", "thiserror", "tokio", @@ -792,11 +793,21 @@ dependencies = [ "serde", ] +[[package]] +name = "signal-hook" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b3799fa361789a685db59e3986fb5f6f949e478728b9913c6759f7b014d0372" +dependencies = [ + "libc", + "signal-hook-registry", +] + [[package]] name = "signal-hook-registry" -version = "1.2.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce32ea0c6c56d5eacaeb814fbed9960547021d3edd010ded1425f180536b20ab" +checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6" dependencies = [ "libc", ] diff --git a/Cargo.toml b/Cargo.toml index f276453..b48eea3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ thiserror = "1.0" toml = "0.5" yn = "0.1" rnix = "0.8" +signal-hook = "0.3" # smol_str is required by rnix, but 0.1.17 doesn't build on rustc # 1.45.2 (shipped in nixos-20.09); it requires rustc 1.46.0. See diff --git a/src/activate.rs b/src/activate.rs index 9c23068..49d16af 100644 --- a/src/activate.rs +++ b/src/activate.rs @@ -3,6 +3,8 @@ // // SPDX-License-Identifier: MPL-2.0 +use signal_hook::{consts::signal::SIGHUP, iterator::Signals}; + use clap::Clap; use tokio::fs; @@ -27,24 +29,39 @@ extern crate serde_derive; #[macro_use] mod utils; -/// Activation portion of the simple Rust Nix deploy tool +/// Remote activation utility for deploy-rs #[derive(Clap, Debug)] #[clap(version = "1.0", author = "Serokell ")] struct Opts { - profile_path: String, - closure: String, - /// Print debug logs to output #[clap(short, long)] debug_logs: bool, - /// Directory to print logs to (including the background activation process) + /// Directory to print logs to #[clap(long)] log_dir: Option, - /// Temp path for any temporary files that may be needed during activation + /// Path for any temporary files that may be needed during activation #[clap(long)] temp_path: String, + #[clap(subcommand)] + subcmd: SubCommand, +} + +#[derive(Clap, Debug)] +enum SubCommand { + Activate(ActivateOpts), + Wait(WaitOpts), +} + +/// Activate a profile +#[derive(Clap, Debug)] +struct ActivateOpts { + /// The closure to activate + closure: String, + /// The profile path to install into + profile_path: String, + /// Maximum time to wait for confirmation after activation #[clap(long)] confirm_timeout: u16, @@ -58,6 +75,13 @@ struct Opts { auto_rollback: bool, } +/// Activate a profile +#[derive(Clap, Debug)] +struct WaitOpts { + /// The closure to wait for + closure: String, +} + #[derive(Error, Debug)] pub enum DeactivateError { #[error("Failed to execute the rollback command: {0}")] @@ -201,8 +225,9 @@ pub async fn activation_confirmation( confirm_timeout: u16, closure: String, ) -> Result<(), ActivationConfirmationError> { - let lock_hash = &closure["/nix/store/".len()..]; - let lock_path = format!("{}/deploy-rs-canary-{}", temp_path, lock_hash); + let lock_path = utils::make_lock_path(&temp_path, &closure); + + debug!("Ensuring parent directory exists for canary file"); if let Some(parent) = Path::new(&lock_path).parent() { fs::create_dir_all(parent) @@ -210,49 +235,98 @@ pub async fn activation_confirmation( .map_err(ActivationConfirmationError::CreateConfirmDirError)?; } + debug!("Creating canary file"); + fs::File::create(&lock_path) .await - .map_err(ActivationConfirmationError::CreateConfirmDirError)?; + .map_err(ActivationConfirmationError::CreateConfirmFileError)?; + + debug!("Creating notify watcher"); let (deleted, done) = mpsc::channel(1); + let mut watcher: RecommendedWatcher = Watcher::new_immediate(move |res: Result| { let send_result = match res { Ok(e) if e.kind == notify::EventKind::Remove(notify::event::RemoveKind::File) => { - deleted.blocking_send(Ok(())) + debug!("Got worthy removal event, sending on channel"); + deleted.try_send(Ok(())) + } + Err(e) => { + debug!("Got error waiting for removal event, sending on channel"); + deleted.try_send(Err(e)) } Ok(_) => Ok(()), // ignore non-removal events - Err(e) => deleted.blocking_send(Err(e)), }; + if let Err(e) = send_result { error!("Could not send file system event to watcher: {}", e); } })?; - watcher.watch(lock_path, RecursiveMode::Recursive)?; - if let fork::Fork::Child = - fork::daemon(false, false).map_err(ActivationConfirmationError::ForkError)? - { - std::thread::spawn(move || { - let rt = tokio::runtime::Runtime::new().unwrap(); + watcher.watch(&lock_path, RecursiveMode::NonRecursive)?; + + if let Err(err) = danger_zone(done, confirm_timeout).await { + error!("Error waiting for confirmation event: {}", err); - rt.block_on(async move { - if let Err(err) = danger_zone(done, confirm_timeout).await { - if let Err(err) = deactivate(&profile_path).await { - error!("Error de-activating due to another error in confirmation thread, oh no...: {}", err); - } + if let Err(err) = deactivate(&profile_path).await { + error!( + "Error de-activating due to another error waiting for confirmation, oh no...: {}", + err + ); + } + } + + Ok(()) +} - error!("Error in confirmation thread: {}", err); +#[derive(Error, Debug)] +pub enum WaitError { + #[error("Error creating watcher for activation: {0}")] + Watcher(#[from] notify::Error), + #[error("Error waiting for activation: {0}")] + Waiting(#[from] DangerZoneError), +} +pub async fn wait(temp_path: String, closure: String) -> Result<(), WaitError> { + let lock_path = utils::make_lock_path(&temp_path, &closure); + + let (created, done) = mpsc::channel(1); + + let mut watcher: RecommendedWatcher = { + // TODO: fix wasteful clone + let lock_path = lock_path.clone(); + + Watcher::new_immediate(move |res: Result| { + let send_result = match res { + Ok(e) if e.kind == notify::EventKind::Create(notify::event::CreateKind::File) => { + match &e.paths[..] { + [x] if x == Path::new(&lock_path) => created.try_send(Ok(())), + _ => Ok(()), } - }); - }) - .join() - .unwrap(); + } + Err(e) => created.try_send(Err(e)), + Ok(_) => Ok(()), // ignore non-removal events + }; + + if let Err(e) = send_result { + error!("Could not send file system event to watcher: {}", e); + } + })? + }; - info!("Confirmation successful!"); + watcher.watch(&temp_path, RecursiveMode::NonRecursive)?; + + // Avoid a potential race condition by checking for existence after watcher creation + if fs::metadata(&lock_path).await.is_ok() { + watcher.unwatch(&temp_path)?; + return Ok(()); } - std::process::exit(0); + danger_zone(done, 60).await?; + + info!("Found canary file, done waiting!"); + + Ok(()) } #[derive(Error, Debug)] @@ -303,6 +377,8 @@ pub async fn activate( } }; + debug!("Running activation script"); + let activate_status = match Command::new(format!("{}/deploy-rs-activate", profile_path)) .env("PROFILE", &profile_path) .current_dir(&profile_path) @@ -333,6 +409,7 @@ pub async fn activate( if magic_rollback { info!("Magic rollback is enabled, setting up confirmation hook..."); + match activation_confirmation(profile_path.clone(), temp_path, confirm_timeout, closure) .await { @@ -349,20 +426,43 @@ pub async fn activate( #[tokio::main] async fn main() -> Result<(), Box> { + // Ensure that this process stays alive after the SSH connection dies + let mut signals = Signals::new(&[SIGHUP])?; + std::thread::spawn(move || { + for sig in signals.forever() { + println!("Received NOHUP - ignoring..."); + } + }); + let opts: Opts = Opts::parse(); - utils::init_logger(opts.debug_logs, opts.log_dir.as_deref(), true)?; - - match activate( - opts.profile_path, - opts.closure, - opts.auto_rollback, - opts.temp_path, - opts.confirm_timeout, - opts.magic_rollback, - ) - .await - { + utils::init_logger( + opts.debug_logs, + opts.log_dir.as_deref(), + match opts.subcmd { + SubCommand::Activate(_) => utils::LoggerType::Activate, + SubCommand::Wait(_) => utils::LoggerType::Wait, + }, + )?; + + let r = match opts.subcmd { + SubCommand::Activate(activate_opts) => activate( + activate_opts.profile_path, + activate_opts.closure, + activate_opts.auto_rollback, + opts.temp_path, + activate_opts.confirm_timeout, + activate_opts.magic_rollback, + ) + .await + .map_err(|x| Box::new(x) as Box), + + SubCommand::Wait(wait_opts) => wait(opts.temp_path, wait_opts.closure) + .await + .map_err(|x| Box::new(x) as Box), + }; + + match r { Ok(()) => (), Err(err) => good_panic!("{}", err), } diff --git a/src/main.rs b/src/main.rs index 6783c40..1544fed 100644 --- a/src/main.rs +++ b/src/main.rs @@ -498,7 +498,11 @@ enum RunError { async fn run() -> Result<(), RunError> { let opts: Opts = Opts::parse(); - utils::init_logger(opts.debug_logs, opts.log_dir.as_deref(), false)?; + utils::init_logger( + opts.debug_logs, + opts.log_dir.as_deref(), + utils::LoggerType::Deploy, + )?; let deploy_flake = utils::parse_flake(opts.flake.as_str())?; diff --git a/src/utils/deploy.rs b/src/utils/deploy.rs index 93cebee..3371160 100644 --- a/src/utils/deploy.rs +++ b/src/utils/deploy.rs @@ -19,9 +19,24 @@ fn build_activate_command( debug_logs: bool, log_dir: Option<&str>, ) -> String { - let mut self_activate_command = format!( - "{}/activate-rs '{}' '{}' --temp-path {} --confirm-timeout {}", - closure, profile_path, closure, temp_path, confirm_timeout + let mut self_activate_command = format!("{}/activate-rs", closure); + + if debug_logs { + self_activate_command = format!("{} --debug-logs", self_activate_command); + } + + if let Some(log_dir) = log_dir { + self_activate_command = format!("{} --log-dir {}", self_activate_command, log_dir); + } + + self_activate_command = format!( + "{} --temp-path '{}' activate '{}' '{}'", + self_activate_command, temp_path, closure, profile_path + ); + + self_activate_command = format!( + "{} --confirm-timeout {}", + self_activate_command, confirm_timeout ); if magic_rollback { @@ -32,14 +47,6 @@ fn build_activate_command( self_activate_command = format!("{} --auto-rollback", self_activate_command); } - if debug_logs { - self_activate_command = format!("{} --debug-logs", self_activate_command); - } - - if let Some(log_dir) = log_dir { - self_activate_command = format!("{} --log-dir {}", self_activate_command, log_dir); - } - if let Some(sudo_cmd) = &sudo { self_activate_command = format!("{} {}", sudo_cmd, self_activate_command); } @@ -49,7 +56,6 @@ fn build_activate_command( #[test] fn test_activation_command_builder() { - let activate_path_str = "/blah/bin/activate".to_string(); let sudo = Some("sudo -u test".to_string()); let profile_path = "/blah/profiles/test"; let closure = "/nix/store/blah/etc"; @@ -72,7 +78,57 @@ fn test_activation_command_builder() { debug_logs, log_dir ), - "sudo -u test /nix/store/blah/etc/activate-rs '/blah/profiles/test' '/nix/store/blah/etc' --temp-path /tmp --confirm-timeout 30 --magic-rollback --auto-rollback --debug-logs --log-dir /tmp/something.txt" + "sudo -u test /nix/store/blah/etc/activate-rs --debug-logs --log-dir /tmp/something.txt --temp-path '/tmp' activate '/nix/store/blah/etc' '/blah/profiles/test' --confirm-timeout 30 --magic-rollback --auto-rollback" + .to_string(), + ); +} + +fn build_wait_command( + sudo: &Option, + closure: &str, + temp_path: &Cow, + debug_logs: bool, + log_dir: Option<&str>, +) -> String { + let mut self_activate_command = format!("{}/activate-rs", closure); + + if debug_logs { + self_activate_command = format!("{} --debug-logs", self_activate_command); + } + + if let Some(log_dir) = log_dir { + self_activate_command = format!("{} --log-dir {}", self_activate_command, log_dir); + } + + self_activate_command = format!( + "{} --temp-path '{}' wait '{}'", + self_activate_command, temp_path, closure + ); + + if let Some(sudo_cmd) = &sudo { + self_activate_command = format!("{} {}", sudo_cmd, self_activate_command); + } + + self_activate_command +} + +#[test] +fn test_wait_command_builder() { + let sudo = Some("sudo -u test".to_string()); + let closure = "/nix/store/blah/etc"; + let temp_path = &"/tmp".into(); + let debug_logs = true; + let log_dir = Some("/tmp/something.txt"); + + assert_eq!( + build_wait_command( + &sudo, + closure, + temp_path, + debug_logs, + log_dir + ), + "sudo -u test /nix/store/blah/etc/activate-rs --debug-logs --log-dir /tmp/something.txt --temp-path '/tmp' wait '/nix/store/blah/etc'" .to_string(), ); } @@ -81,10 +137,20 @@ fn test_activation_command_builder() { pub enum DeployProfileError { #[error("Failed to calculate activate bin path from deploy bin path: {0}")] DeployPathToActivatePathError(#[from] super::DeployPathToActivatePathError), + + #[error("Failed to spawn activation command over SSH: {0}")] + SSHSpawnActivateError(std::io::Error), + #[error("Failed to run activation command over SSH: {0}")] SSHActivateError(std::io::Error), - #[error("Activation over SSH resulted in a bad exit code: {0:?}")] + #[error("Activating over SSH resulted in a bad exit code: {0:?}")] SSHActivateExitError(Option), + + #[error("Failed to run wait command over SSH: {0}")] + SSHWaitError(std::io::Error), + #[error("Waiting over SSH resulted in a bad exit code: {0:?}")] + SSHWaitExitError(Option), + #[error("Failed to run confirmation command over SSH (the server should roll back): {0}")] SSHConfirmError(std::io::Error), #[error( @@ -127,35 +193,70 @@ pub async fn deploy_profile( debug!("Constructed activation command: {}", self_activate_command); + let self_wait_command = build_wait_command( + &deploy_defs.sudo, + &deploy_data.profile.profile_settings.path, + &temp_path, + deploy_data.debug_logs, + deploy_data.log_dir, + ); + + debug!("Constructed wait command: {}", self_wait_command); + let hostname = match deploy_data.cmd_overrides.hostname { Some(ref x) => x, None => &deploy_data.node.node_settings.hostname, }; - let mut c = Command::new("ssh"); - let mut ssh_command = c - .arg("-t") - .arg(format!("ssh://{}@{}", deploy_defs.ssh_user, hostname)); + let ssh_addr = format!("ssh://{}@{}", deploy_defs.ssh_user, hostname); + + let mut ssh_activate_command_ = Command::new("ssh"); + let ssh_activate_command = ssh_activate_command_.arg(&ssh_addr); for ssh_opt in &deploy_data.merged_settings.ssh_opts { - ssh_command = ssh_command.arg(ssh_opt); + ssh_activate_command.arg(&ssh_opt); } - let ssh_exit_status = ssh_command - .arg(self_activate_command) - .status() - .await - .map_err(DeployProfileError::SSHActivateError)?; + if !magic_rollback { + let ssh_activate_exit_status = ssh_activate_command + .arg(self_activate_command) + .status() + .await + .map_err(DeployProfileError::SSHActivateError)?; - match ssh_exit_status.code() { - Some(0) => (), - a => return Err(DeployProfileError::SSHActivateExitError(a)), - }; + match ssh_activate_exit_status.code() { + Some(0) => (), + a => return Err(DeployProfileError::SSHActivateExitError(a)), + }; - info!("Success activating!"); + info!("Success activating, done!"); + } else { + let ssh_activate = ssh_activate_command + .arg(self_activate_command) + .spawn() + .map_err(DeployProfileError::SSHSpawnActivateError)?; - if magic_rollback { - info!("Attempting to confirm activation"); + info!("Creating activation waiter"); + + let mut ssh_wait_command_ = Command::new("ssh"); + let ssh_wait_command = ssh_wait_command_.arg(&ssh_addr); + + for ssh_opt in &deploy_data.merged_settings.ssh_opts { + ssh_wait_command.arg(ssh_opt); + } + + let ssh_wait_exit_status = ssh_wait_command + .arg(self_wait_command) + .status() + .await + .map_err(DeployProfileError::SSHWaitError)?; + + match ssh_wait_exit_status.code() { + Some(0) => (), + a => return Err(DeployProfileError::SSHWaitExitError(a)), + }; + + info!("Success activating, attempting to confirm activation"); let mut c = Command::new("ssh"); let mut ssh_confirm_command = c.arg(format!("ssh://{}@{}", deploy_defs.ssh_user, hostname)); @@ -164,8 +265,8 @@ pub async fn deploy_profile( ssh_confirm_command = ssh_confirm_command.arg(ssh_opt); } - let lock_hash = &deploy_data.profile.profile_settings.path["/nix/store/".len()..]; - let lock_path = format!("{}/deploy-rs-canary-{}", temp_path, lock_hash); + let lock_path = + super::make_lock_path(&temp_path, &deploy_data.profile.profile_settings.path); let mut confirm_command = format!("rm {}", lock_path); if let Some(sudo_cmd) = &deploy_defs.sudo { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 9557aab..b1aa3e6 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -19,6 +19,12 @@ macro_rules! good_panic { }} } +pub fn make_lock_path(temp_path: &str, closure: &str) -> String { + let lock_hash = + &closure["/nix/store/".len()..closure.find("-").unwrap_or_else(|| closure.len())]; + format!("{}/deploy-rs-canary-{}", temp_path, lock_hash) +} + fn make_emoji(level: log::Level) -> &'static str { match level { log::Level::Error => "❌", @@ -38,7 +44,23 @@ pub fn logger_formatter_activate( write!( w, - "⭐ REMOTE ⭐ {0} {1} {0} {2}", + "⭐ {0} {1} {0} {2}", + make_emoji(level), + style(level, level.to_string()), + record.args() + ) +} + +pub fn logger_formatter_wait( + w: &mut dyn std::io::Write, + _now: &mut DeferredNow, + record: &Record, +) -> Result<(), std::io::Error> { + let level = record.level(); + + write!( + w, + "👀 {0} {1} {0} {2}", make_emoji(level), style(level, level.to_string()), record.args() @@ -61,18 +83,25 @@ pub fn logger_formatter_deploy( ) } +pub enum LoggerType { + Deploy, + Activate, + Wait, +} + pub fn init_logger( debug_logs: bool, log_dir: Option<&str>, - activate: bool, + logger_type: LoggerType, ) -> Result<(), FlexiLoggerError> { - let logger_formatter = match activate { - true => logger_formatter_activate, - false => logger_formatter_deploy, + let logger_formatter = match logger_type { + LoggerType::Deploy => logger_formatter_deploy, + LoggerType::Activate => logger_formatter_activate, + LoggerType::Wait => logger_formatter_wait, }; if let Some(log_dir) = log_dir { - Logger::with_env_or_str("debug") + let mut logger = Logger::with_env_or_str("debug") .log_to_file() .format_for_stderr(logger_formatter) .set_palette("196;208;51;7;8".to_string()) @@ -81,15 +110,22 @@ pub fn init_logger( true => Duplicate::Debug, false => Duplicate::Info, }) - .print_message() - .start()?; + .print_message(); + + match logger_type { + LoggerType::Activate => logger = logger.discriminant("activate"), + LoggerType::Wait => logger = logger.discriminant("wait"), + LoggerType::Deploy => (), + } + + logger.start()?; } else { Logger::with_env_or_str(match debug_logs { true => "debug", false => "info", }) .log_target(LogTarget::StdErr) - .format(logger_formatter_deploy) + .format(logger_formatter) .set_palette("196;208;51;7;8".to_string()) .start()?; } -- cgit v1.2.3