-
-
Notifications
You must be signed in to change notification settings - Fork 63
feat(sync):daemon support for advanced sync #528
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,10 +2,10 @@ | |
// - [x] Setup local sync bucket | ||
// - [x] Import local buckets and sync events from aw-server (either through API or through creating a read-only Datastore) | ||
// - [x] Import buckets and sync events from remotes | ||
// - [ ] Add CLI arguments | ||
// - [x] Add CLI arguments | ||
// - [x] For which local server to use | ||
// - [x] For which sync dir to use | ||
// - [ ] Date to start syncing from | ||
// - [x] Date to start syncing from | ||
|
||
#[macro_use] | ||
extern crate log; | ||
|
@@ -60,35 +60,45 @@ struct Opts { | |
enum Commands { | ||
/// Daemon subcommand | ||
/// Starts aw-sync as a daemon, which will sync every 5 minutes. | ||
Daemon {}, | ||
Daemon { | ||
/// Date to start syncing from. | ||
/// If not specified, start from beginning. | ||
/// Format: YYYY-MM-DD | ||
#[clap(long, value_parser=parse_start_date)] | ||
start_date: Option<DateTime<Utc>>, | ||
|
||
/// Specify buckets to sync using a comma-separated list. | ||
/// By default, all buckets are synced. | ||
#[clap(long)] | ||
buckets: Option<String>, | ||
|
||
/// Full path to sync db file | ||
/// Useful for syncing buckets from a specific db file in the sync directory. | ||
/// Must be a valid absolute path to a file in the sync directory. | ||
#[clap(long)] | ||
sync_db: Option<PathBuf>, | ||
}, | ||
|
||
/// Sync subcommand (basic) | ||
/// Sync subcommand | ||
/// | ||
/// Pulls remote buckets then pushes local buckets. | ||
/// Syncs data between local aw-server and sync directory. | ||
/// First pulls remote buckets from the sync directory to the local aw-server. | ||
/// Then pushes local buckets from the aw-server to the local sync directory. | ||
Sync { | ||
/// Host(s) to pull from, comma separated. Will pull from all hosts if not specified. | ||
#[clap(long, value_parser=parse_list)] | ||
host: Option<Vec<String>>, | ||
}, | ||
|
||
/// Sync subcommand (advanced) | ||
/// | ||
/// Pulls remote buckets then pushes local buckets. | ||
/// First pulls remote buckets in the sync directory to the local aw-server. | ||
/// Then pushes local buckets from the aw-server to the local sync directory. | ||
#[clap(arg_required_else_help = true)] | ||
SyncAdvanced { | ||
/// Date to start syncing from. | ||
/// If not specified, start from beginning. | ||
/// NOTE: might be unstable, as count cannot be used to verify integrity of sync. | ||
/// Format: YYYY-MM-DD | ||
#[clap(long, value_parser=parse_start_date)] | ||
start_date: Option<DateTime<Utc>>, | ||
|
||
/// Specify buckets to sync using a comma-separated list. | ||
/// If not specified, all buckets will be synced. | ||
#[clap(long, value_parser=parse_list)] | ||
buckets: Option<Vec<String>>, | ||
/// By default, all buckets are synced. | ||
#[clap(long)] | ||
buckets: Option<String>, | ||
|
||
/// Mode to sync in. Can be "push", "pull", or "both". | ||
/// Defaults to "both". | ||
|
@@ -111,6 +121,13 @@ fn parse_start_date(arg: &str) -> Result<DateTime<Utc>, chrono::ParseError> { | |
} | ||
|
||
fn parse_list(arg: &str) -> Result<Vec<String>, clap::Error> { | ||
// If the argument is empty or just whitespace, return an empty Vec | ||
// This handles the case when --buckets is used without a value | ||
if arg.trim().is_empty() { | ||
return Ok(vec![]); | ||
} | ||
|
||
// Otherwise, split by comma as usual | ||
Ok(arg.split(',').map(|s| s.to_string()).collect()) | ||
} | ||
|
||
|
@@ -139,60 +156,94 @@ fn main() -> Result<(), Box<dyn Error>> { | |
|
||
let client = AwClient::new(&opts.host, port, "aw-sync")?; | ||
|
||
// if opts.command is None, then we're using the default subcommand (Sync) | ||
match opts.command.unwrap_or(Commands::Daemon {}) { | ||
// if opts.command is None, then we're using the default subcommand (Daemon) | ||
match opts.command.unwrap_or(Commands::Daemon { | ||
start_date: None, | ||
buckets: None, | ||
sync_db: None, | ||
}) { | ||
// Start daemon | ||
Commands::Daemon {} => { | ||
Commands::Daemon { | ||
start_date, | ||
buckets, | ||
sync_db, | ||
} => { | ||
info!("Starting daemon..."); | ||
daemon(&client)?; | ||
} | ||
// Perform basic sync | ||
Commands::Sync { host } => { | ||
// Pull | ||
match host { | ||
Some(hosts) => { | ||
for host in hosts.iter() { | ||
info!("Pulling from host: {}", host); | ||
sync_wrapper::pull(host, &client)?; | ||
} | ||
} | ||
None => { | ||
info!("Pulling from all hosts"); | ||
sync_wrapper::pull_all(&client)?; | ||
} | ||
} | ||
|
||
// Push | ||
info!("Pushing local data"); | ||
sync_wrapper::push(&client)? | ||
// Use an empty vector to sync all buckets for these cases: | ||
// 1. When --buckets '*' is supplied | ||
// 2. When no bucket argument is provided (default) | ||
let effective_buckets = if buckets.as_deref() == Some("*") || buckets.is_none() { | ||
Some(vec![]) | ||
} else if let Some(buckets_str) = buckets { | ||
Some(buckets_str.split(',').map(|s| s.to_string()).collect()) | ||
} else { | ||
None | ||
}; | ||
|
||
daemon(&client, start_date, effective_buckets, sync_db)?; | ||
} | ||
// Perform two-way sync | ||
Commands::SyncAdvanced { | ||
// Perform sync | ||
Commands::Sync { | ||
host, | ||
start_date, | ||
buckets, | ||
mode, | ||
sync_db, | ||
} => { | ||
let sync_dir = dirs::get_sync_dir()?; | ||
if let Some(db_path) = &sync_db { | ||
info!("Using sync db: {}", &db_path.display()); | ||
// Use an empty vector to sync all buckets for these cases: | ||
// 1. When --buckets '*' is supplied | ||
// 2. When no bucket argument is provided (default) | ||
let effective_buckets = if buckets.as_deref() == Some("*") || buckets.is_none() { | ||
Some(vec![]) | ||
} else if let Some(buckets_str) = buckets { | ||
Some(buckets_str.split(',').map(|s| s.to_string()).collect()) | ||
} else { | ||
None | ||
}; | ||
|
||
if !db_path.is_absolute() { | ||
Err("Sync db path must be absolute")? | ||
} | ||
if !db_path.starts_with(&sync_dir) { | ||
Err("Sync db path must be in sync directory")? | ||
// If advanced options are provided, use advanced sync mode | ||
if start_date.is_some() || effective_buckets.is_some() || sync_db.is_some() { | ||
let sync_dir = dirs::get_sync_dir()?; | ||
if let Some(db_path) = &sync_db { | ||
info!("Using sync db: {}", &db_path.display()); | ||
|
||
if !db_path.is_absolute() { | ||
Err("Sync db path must be absolute")? | ||
} | ||
if !db_path.starts_with(&sync_dir) { | ||
Err("Sync db path must be in sync directory")? | ||
} | ||
} | ||
} | ||
|
||
let sync_spec = sync::SyncSpec { | ||
path: sync_dir, | ||
path_db: sync_db, | ||
buckets, | ||
start: start_date, | ||
}; | ||
let sync_spec = sync::SyncSpec { | ||
path: sync_dir, | ||
path_db: sync_db, | ||
buckets: effective_buckets, | ||
start: start_date, | ||
}; | ||
|
||
sync::sync_run(&client, &sync_spec, mode)? | ||
} else { | ||
// Simple host-based sync mode (backwards compatibility) | ||
// Pull | ||
match host { | ||
Some(hosts) => { | ||
for host in hosts.iter() { | ||
info!("Pulling from host: {}", host); | ||
sync_wrapper::pull(host, &client)?; | ||
} | ||
} | ||
None => { | ||
info!("Pulling from all hosts"); | ||
sync_wrapper::pull_all(&client)?; | ||
} | ||
} | ||
|
||
sync::sync_run(&client, &sync_spec, mode)? | ||
// Push | ||
info!("Pushing local data"); | ||
sync_wrapper::push(&client)? | ||
} | ||
} | ||
|
||
// List all buckets | ||
|
@@ -207,23 +258,45 @@ fn main() -> Result<(), Box<dyn Error>> { | |
Ok(()) | ||
} | ||
|
||
fn daemon(client: &AwClient) -> Result<(), Box<dyn Error>> { | ||
fn daemon( | ||
client: &AwClient, | ||
start_date: Option<DateTime<Utc>>, | ||
buckets: Option<Vec<String>>, | ||
sync_db: Option<PathBuf>, | ||
) -> Result<(), Box<dyn Error>> { | ||
let (tx, rx) = channel(); | ||
|
||
ctrlc::set_handler(move || { | ||
let _ = tx.send(()); | ||
})?; | ||
|
||
let sync_dir = dirs::get_sync_dir()?; | ||
if let Some(db_path) = &sync_db { | ||
info!("Using sync db: {}", &db_path.display()); | ||
|
||
if !db_path.is_absolute() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think it's worth atp, never going to reuse/rewrite any other time. |
||
Err("Sync db path must be absolute")? | ||
} | ||
if !db_path.starts_with(&sync_dir) { | ||
Err("Sync db path must be in sync directory")? | ||
} | ||
} | ||
|
||
let sync_spec = sync::SyncSpec { | ||
path: sync_dir, | ||
buckets, | ||
path_db: sync_db, | ||
start: start_date, | ||
}; | ||
|
||
loop { | ||
if let Err(e) = daemon_sync_cycle(client) { | ||
if let Err(e) = sync::sync_run(client, &sync_spec, sync::SyncMode::Both) { | ||
error!("Error during sync cycle: {}", e); | ||
// Re-throw the error | ||
return Err(e); | ||
} | ||
|
||
info!("Sync pass done, sleeping for 5 minutes"); | ||
|
||
// Wait for either the sleep duration or a termination signal | ||
match rx.recv_timeout(Duration::from_secs(300)) { | ||
Ok(_) | Err(RecvTimeoutError::Disconnected) => { | ||
info!("Termination signal received, shutting down."); | ||
|
@@ -237,13 +310,3 @@ fn daemon(client: &AwClient) -> Result<(), Box<dyn Error>> { | |
|
||
Ok(()) | ||
} | ||
|
||
fn daemon_sync_cycle(client: &AwClient) -> Result<(), Box<dyn Error>> { | ||
info!("Pulling from all hosts"); | ||
sync_wrapper::pull_all(client)?; | ||
|
||
info!("Pushing local data"); | ||
sync_wrapper::push(client)?; | ||
|
||
Ok(()) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
ctrlc
handler setup code is duplicated in bothdaemon()
anddaemon_advanced()
. Consider refactoring this common logic into a helper function to reduce duplication.