From 9b882417fd75173b159ff74f25a83f73be7bee90 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 15 Apr 2025 03:04:17 +0000
Subject: [PATCH 1/9] refactor system and dist tests to isolated sccache
 clients so the tests can run in parallel

---
 tests/dist.rs           |  256 ++++----
 tests/harness/client.rs |  177 ++++++
 tests/harness/dist.rs   |  557 +++++++++++++++++
 tests/harness/mod.rs    |  669 +--------------------
 tests/system.rs         | 1248 +++++++++++++++++++--------------------
 5 files changed, 1467 insertions(+), 1440 deletions(-)
 create mode 100644 tests/harness/client.rs
 create mode 100644 tests/harness/dist.rs

diff --git a/tests/dist.rs b/tests/dist.rs
index cc63f0f18..aa2a0a68e 100644
--- a/tests/dist.rs
+++ b/tests/dist.rs
@@ -7,8 +7,9 @@ extern crate sccache;
 extern crate serde_json;
 
 use crate::harness::{
-    cargo_command, get_stats, init_cargo, sccache_command, start_local_daemon, stop_local_daemon,
-    write_json_cfg, write_source,
+    client::{sccache_client_cfg, SccacheClient},
+    dist::{cargo_command, sccache_dist_path, DistSystem},
+    init_cargo, write_source,
 };
 use assert_cmd::prelude::*;
 use sccache::config::HTTPUrl;
@@ -16,7 +17,6 @@ use sccache::dist::{
     AssignJobResult, CompileCommand, InputsReader, JobId, JobState, RunJobResult, ServerIncoming,
     ServerOutgoing, SubmitToolchainResult, Toolchain, ToolchainReader,
 };
-use std::ffi::OsStr;
 use std::path::Path;
 use std::process::Output;
 
@@ -24,17 +24,12 @@ use sccache::errors::*;
 
 mod harness;
 
-fn basic_compile(tmpdir: &Path, sccache_cfg_path: &Path, sccache_cached_cfg_path: &Path) {
-    let envs: Vec<(_, &OsStr)> = vec![
-        ("RUST_BACKTRACE", "1".as_ref()),
-        ("SCCACHE_LOG", "debug".as_ref()),
-        ("SCCACHE_CONF", sccache_cfg_path.as_ref()),
-        ("SCCACHE_CACHED_CONF", sccache_cached_cfg_path.as_ref()),
-    ];
+fn basic_compile(client: &SccacheClient, tmpdir: &Path) {
     let source_file = "x.c";
     let obj_file = "x.o";
     write_source(tmpdir, source_file, "#if !defined(SCCACHE_TEST_DEFINE)\n#error SCCACHE_TEST_DEFINE is not defined\n#endif\nint x() { return 5; }");
-    sccache_command()
+    client
+        .cmd()
         .args([
             std::env::var("CC")
                 .unwrap_or_else(|_| "gcc".to_string())
@@ -45,21 +40,13 @@ fn basic_compile(tmpdir: &Path, sccache_cfg_path: &Path, sccache_cached_cfg_path
         .arg(tmpdir.join(source_file))
         .arg("-o")
         .arg(tmpdir.join(obj_file))
-        .envs(envs)
+        .env("RUST_BACKTRACE", "1")
+        .env("SCCACHE_RECACHE", "1")
         .assert()
         .success();
 }
 
-fn rust_compile(tmpdir: &Path, sccache_cfg_path: &Path, sccache_cached_cfg_path: &Path) -> Output {
-    let sccache_path = assert_cmd::cargo::cargo_bin("sccache").into_os_string();
-    let envs: Vec<(_, &OsStr)> = vec![
-        ("RUSTC_WRAPPER", sccache_path.as_ref()),
-        ("CARGO_TARGET_DIR", "target".as_ref()),
-        ("RUST_BACKTRACE", "1".as_ref()),
-        ("SCCACHE_LOG", "debug".as_ref()),
-        ("SCCACHE_CONF", sccache_cfg_path.as_ref()),
-        ("SCCACHE_CACHED_CONF", sccache_cached_cfg_path.as_ref()),
-    ];
+fn rust_compile(client: &SccacheClient, tmpdir: &Path) -> Output {
     let cargo_name = "sccache-dist-test";
     let cargo_path = init_cargo(tmpdir, cargo_name);
 
@@ -87,7 +74,16 @@ fn rust_compile(tmpdir: &Path, sccache_cfg_path: &Path, sccache_cached_cfg_path:
     cargo_command()
         .current_dir(cargo_path)
         .args(["build", "--release"])
-        .envs(envs)
+        .envs(
+            client
+                .cmd()
+                .get_envs()
+                .map(|(k, v)| (k, v.unwrap_or_default())),
+        )
+        .env("RUSTC_WRAPPER", &client.path)
+        .env("CARGO_TARGET_DIR", "target")
+        .env("RUST_BACKTRACE", "1")
+        .env("SCCACHE_RECACHE", "1")
         .output()
         .unwrap()
 }
@@ -96,7 +92,7 @@ pub fn dist_test_sccache_client_cfg(
     tmpdir: &Path,
     scheduler_url: HTTPUrl,
 ) -> sccache::config::FileConfig {
-    let mut sccache_cfg = harness::sccache_client_cfg(tmpdir, false);
+    let mut sccache_cfg = sccache_client_cfg(tmpdir, false);
     sccache_cfg.cache.disk.as_mut().unwrap().size = 0;
     sccache_cfg.dist.scheduler_url = Some(scheduler_url);
     sccache_cfg
@@ -110,29 +106,27 @@ fn test_dist_basic() {
         .tempdir()
         .unwrap();
     let tmpdir = tmpdir.path();
-    let sccache_dist = harness::sccache_dist_path();
+    let sccache_dist = sccache_dist_path();
 
-    let mut system = harness::DistSystem::new(&sccache_dist, tmpdir);
+    let mut system = DistSystem::new(&sccache_dist, tmpdir);
     system.add_scheduler();
     system.add_server();
 
-    let sccache_cfg = dist_test_sccache_client_cfg(tmpdir, system.scheduler_url());
-    let sccache_cfg_path = tmpdir.join("sccache-cfg.json");
-    write_json_cfg(tmpdir, "sccache-cfg.json", &sccache_cfg);
-    let sccache_cached_cfg_path = tmpdir.join("sccache-cached-cfg");
-
-    stop_local_daemon();
-    start_local_daemon(&sccache_cfg_path, &sccache_cached_cfg_path);
-    basic_compile(tmpdir, &sccache_cfg_path, &sccache_cached_cfg_path);
-
-    get_stats(|info| {
-        assert_eq!(1, info.stats.dist_compiles.values().sum::<usize>());
-        assert_eq!(0, info.stats.dist_errors);
-        assert_eq!(1, info.stats.compile_requests);
-        assert_eq!(1, info.stats.requests_executed);
-        assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-    });
+    let client = system.new_client(&dist_test_sccache_client_cfg(
+        tmpdir,
+        system.scheduler_url(),
+    ));
+
+    basic_compile(&client, tmpdir);
+
+    let stats = client.stats().unwrap();
+
+    assert_eq!(1, stats.dist_compiles.values().sum::<usize>());
+    assert_eq!(0, stats.dist_errors);
+    assert_eq!(1, stats.compile_requests);
+    assert_eq!(1, stats.requests_executed);
+    assert_eq!(0, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
 }
 
 #[test]
@@ -143,32 +137,31 @@ fn test_dist_restartedserver() {
         .tempdir()
         .unwrap();
     let tmpdir = tmpdir.path();
-    let sccache_dist = harness::sccache_dist_path();
+    let sccache_dist = sccache_dist_path();
 
-    let mut system = harness::DistSystem::new(&sccache_dist, tmpdir);
+    let mut system = DistSystem::new(&sccache_dist, tmpdir);
     system.add_scheduler();
     let server_handle = system.add_server();
 
-    let sccache_cfg = dist_test_sccache_client_cfg(tmpdir, system.scheduler_url());
-    let sccache_cfg_path = tmpdir.join("sccache-cfg.json");
-    write_json_cfg(tmpdir, "sccache-cfg.json", &sccache_cfg);
-    let sccache_cached_cfg_path = tmpdir.join("sccache-cached-cfg");
+    let client = system.new_client(&dist_test_sccache_client_cfg(
+        tmpdir,
+        system.scheduler_url(),
+    ));
 
-    stop_local_daemon();
-    start_local_daemon(&sccache_cfg_path, &sccache_cached_cfg_path);
-    basic_compile(tmpdir, &sccache_cfg_path, &sccache_cached_cfg_path);
+    basic_compile(&client, tmpdir);
 
     system.restart_server(&server_handle);
-    basic_compile(tmpdir, &sccache_cfg_path, &sccache_cached_cfg_path);
-
-    get_stats(|info| {
-        assert_eq!(2, info.stats.dist_compiles.values().sum::<usize>());
-        assert_eq!(0, info.stats.dist_errors);
-        assert_eq!(2, info.stats.compile_requests);
-        assert_eq!(2, info.stats.requests_executed);
-        assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(2, info.stats.cache_misses.all());
-    });
+
+    basic_compile(&client, tmpdir);
+
+    let stats = client.stats().unwrap();
+
+    assert_eq!(2, stats.dist_compiles.values().sum::<usize>());
+    assert_eq!(0, stats.dist_errors);
+    assert_eq!(2, stats.compile_requests);
+    assert_eq!(2, stats.requests_executed);
+    assert_eq!(0, stats.cache_hits.all());
+    assert_eq!(2, stats.cache_misses.all());
 }
 
 #[test]
@@ -179,28 +172,26 @@ fn test_dist_nobuilder() {
         .tempdir()
         .unwrap();
     let tmpdir = tmpdir.path();
-    let sccache_dist = harness::sccache_dist_path();
+    let sccache_dist = sccache_dist_path();
 
-    let mut system = harness::DistSystem::new(&sccache_dist, tmpdir);
+    let mut system = DistSystem::new(&sccache_dist, tmpdir);
     system.add_scheduler();
 
-    let sccache_cfg = dist_test_sccache_client_cfg(tmpdir, system.scheduler_url());
-    let sccache_cfg_path = tmpdir.join("sccache-cfg.json");
-    write_json_cfg(tmpdir, "sccache-cfg.json", &sccache_cfg);
-    let sccache_cached_cfg_path = tmpdir.join("sccache-cached-cfg");
-
-    stop_local_daemon();
-    start_local_daemon(&sccache_cfg_path, &sccache_cached_cfg_path);
-    basic_compile(tmpdir, &sccache_cfg_path, &sccache_cached_cfg_path);
-
-    get_stats(|info| {
-        assert_eq!(0, info.stats.dist_compiles.values().sum::<usize>());
-        assert_eq!(1, info.stats.dist_errors);
-        assert_eq!(1, info.stats.compile_requests);
-        assert_eq!(1, info.stats.requests_executed);
-        assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-    });
+    let client = system.new_client(&dist_test_sccache_client_cfg(
+        tmpdir,
+        system.scheduler_url(),
+    ));
+
+    basic_compile(&client, tmpdir);
+
+    let stats = client.stats().unwrap();
+
+    assert_eq!(0, stats.dist_compiles.values().sum::<usize>());
+    assert_eq!(1, stats.dist_errors);
+    assert_eq!(1, stats.compile_requests);
+    assert_eq!(1, stats.requests_executed);
+    assert_eq!(0, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
 }
 
 struct FailingServer;
@@ -244,97 +235,64 @@ fn test_dist_failingserver() {
         .tempdir()
         .unwrap();
     let tmpdir = tmpdir.path();
-    let sccache_dist = harness::sccache_dist_path();
+    let sccache_dist = sccache_dist_path();
 
-    let mut system = harness::DistSystem::new(&sccache_dist, tmpdir);
+    let mut system = DistSystem::new(&sccache_dist, tmpdir);
     system.add_scheduler();
     system.add_custom_server(FailingServer);
 
-    let sccache_cfg = dist_test_sccache_client_cfg(tmpdir, system.scheduler_url());
-    let sccache_cfg_path = tmpdir.join("sccache-cfg.json");
-    write_json_cfg(tmpdir, "sccache-cfg.json", &sccache_cfg);
-    let sccache_cached_cfg_path = tmpdir.join("sccache-cached-cfg");
-
-    stop_local_daemon();
-    start_local_daemon(&sccache_cfg_path, &sccache_cached_cfg_path);
-    basic_compile(tmpdir, &sccache_cfg_path, &sccache_cached_cfg_path);
-
-    get_stats(|info| {
-        assert_eq!(0, info.stats.dist_compiles.values().sum::<usize>());
-        assert_eq!(1, info.stats.dist_errors);
-        assert_eq!(1, info.stats.compile_requests);
-        assert_eq!(1, info.stats.requests_executed);
-        assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-    });
-}
-
-#[test]
-#[cfg_attr(not(feature = "dist-tests"), ignore)]
-fn test_dist_cargo_build() {
-    let tmpdir = tempfile::Builder::new()
-        .prefix("sccache_dist_test")
-        .tempdir()
-        .unwrap();
-    let tmpdir = tmpdir.path();
-    let sccache_dist = harness::sccache_dist_path();
+    let client = system.new_client(&dist_test_sccache_client_cfg(
+        tmpdir,
+        system.scheduler_url(),
+    ));
 
-    let mut system = harness::DistSystem::new(&sccache_dist, tmpdir);
-    system.add_scheduler();
-    let _server_handle = system.add_server();
+    basic_compile(&client, tmpdir);
 
-    let sccache_cfg = dist_test_sccache_client_cfg(tmpdir, system.scheduler_url());
-    let sccache_cfg_path = tmpdir.join("sccache-cfg.json");
-    write_json_cfg(tmpdir, "sccache-cfg.json", &sccache_cfg);
-    let sccache_cached_cfg_path = tmpdir.join("sccache-cached-cfg");
+    let stats = client.stats().unwrap();
 
-    stop_local_daemon();
-    start_local_daemon(&sccache_cfg_path, &sccache_cached_cfg_path);
-    rust_compile(tmpdir, &sccache_cfg_path, &sccache_cached_cfg_path)
-        .assert()
-        .success();
-    get_stats(|info| {
-        assert_eq!(1, info.stats.dist_compiles.values().sum::<usize>());
-        assert_eq!(0, info.stats.dist_errors);
-        assert_eq!(5, info.stats.compile_requests);
-        assert_eq!(1, info.stats.requests_executed);
-        assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-    });
+    assert_eq!(0, stats.dist_compiles.values().sum::<usize>());
+    assert_eq!(1, stats.dist_errors);
+    assert_eq!(1, stats.compile_requests);
+    assert_eq!(1, stats.requests_executed);
+    assert_eq!(0, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
 }
 
 #[test]
 #[cfg_attr(not(feature = "dist-tests"), ignore)]
-fn test_dist_cargo_makeflags() {
+fn test_dist_cargo_build() {
     let tmpdir = tempfile::Builder::new()
         .prefix("sccache_dist_test")
         .tempdir()
         .unwrap();
     let tmpdir = tmpdir.path();
-    let sccache_dist = harness::sccache_dist_path();
+    let sccache_dist = sccache_dist_path();
 
-    let mut system = harness::DistSystem::new(&sccache_dist, tmpdir);
+    let mut system = DistSystem::new(&sccache_dist, tmpdir);
     system.add_scheduler();
     let _server_handle = system.add_server();
 
-    let sccache_cfg = dist_test_sccache_client_cfg(tmpdir, system.scheduler_url());
-    let sccache_cfg_path = tmpdir.join("sccache-cfg.json");
-    write_json_cfg(tmpdir, "sccache-cfg.json", &sccache_cfg);
-    let sccache_cached_cfg_path = tmpdir.join("sccache-cached-cfg");
+    let client = system.new_client(&dist_test_sccache_client_cfg(
+        tmpdir,
+        system.scheduler_url(),
+    ));
 
-    stop_local_daemon();
-    start_local_daemon(&sccache_cfg_path, &sccache_cached_cfg_path);
-    let compile_output = rust_compile(tmpdir, &sccache_cfg_path, &sccache_cached_cfg_path);
+    let compile_output = rust_compile(&client, tmpdir);
 
+    // Ensure sccache ignores inherited jobservers in CARGO_MAKEFLAGS
     assert!(!String::from_utf8_lossy(&compile_output.stderr)
         .contains("warning: failed to connect to jobserver from environment variable"));
 
-    get_stats(|info| {
-        assert_eq!(1, info.stats.dist_compiles.values().sum::<usize>());
-        assert_eq!(0, info.stats.dist_errors);
-        assert_eq!(5, info.stats.compile_requests);
-        assert_eq!(1, info.stats.requests_executed);
-        assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-    });
+    // Assert compilation succeeded
+    compile_output.assert().success();
+
+    let stats = client.stats().unwrap();
+
+    assert_eq!(1, stats.dist_compiles.values().sum::<usize>());
+    assert_eq!(0, stats.dist_errors);
+    // check >= 5 because cargo >=1.82 does additional requests with -vV
+    assert!(stats.compile_requests >= 5);
+    assert_eq!(1, stats.requests_executed);
+    assert_eq!(0, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
 }
diff --git a/tests/harness/client.rs b/tests/harness/client.rs
new file mode 100644
index 000000000..c73f81571
--- /dev/null
+++ b/tests/harness/client.rs
@@ -0,0 +1,177 @@
+use fs_err as fs;
+use std::{
+    env,
+    ffi::OsString,
+    path::{Path, PathBuf},
+    process::{Command, Stdio},
+    str,
+    sync::atomic::{AtomicU16, Ordering},
+};
+
+use sccache::{
+    config::{CacheConfigs, DiskCacheConfig, DistConfig, FileConfig, PreprocessorCacheModeConfig},
+    server::{ServerInfo, ServerStats},
+};
+
+use super::{prune_command, TC_CACHE_SIZE};
+
+pub fn sccache_client_cfg(tmpdir: &Path, preprocessor_cache_mode: bool) -> FileConfig {
+    let cache_relpath = "client-cache";
+    let dist_cache_relpath = "client-dist-cache";
+    fs::create_dir_all(tmpdir.join(cache_relpath)).unwrap();
+    fs::create_dir_all(tmpdir.join(dist_cache_relpath)).unwrap();
+
+    let disk_cache = DiskCacheConfig {
+        dir: tmpdir.join(cache_relpath),
+        preprocessor_cache_mode: PreprocessorCacheModeConfig {
+            use_preprocessor_cache_mode: preprocessor_cache_mode,
+            ..Default::default()
+        },
+        ..Default::default()
+    };
+    FileConfig {
+        cache: CacheConfigs {
+            azure: None,
+            disk: Some(disk_cache),
+            gcs: None,
+            gha: None,
+            memcached: None,
+            redis: None,
+            s3: None,
+            webdav: None,
+            oss: None,
+        },
+        dist: DistConfig {
+            auth: Default::default(), // dangerously_insecure
+            scheduler_url: None,
+            cache_dir: tmpdir.join(dist_cache_relpath),
+            toolchains: vec![],
+            toolchain_cache_size: TC_CACHE_SIZE,
+            rewrite_includes_only: false, // TODO
+        },
+        server_startup_timeout_ms: None,
+    }
+}
+
+static CLIENT_PORT: AtomicU16 = AtomicU16::new(4227);
+
+pub struct SccacheClient {
+    envvars: Vec<(OsString, OsString)>,
+    pub path: PathBuf,
+}
+
+#[allow(unused)]
+impl SccacheClient {
+    pub fn new_no_cfg() -> Self {
+        let path = assert_cmd::cargo::cargo_bin("sccache");
+        let port = CLIENT_PORT.fetch_add(1, Ordering::SeqCst);
+
+        let mut envvars = vec![
+            ("SCCACHE_SERVER_PORT".into(), port.to_string().into()),
+            ("TOKIO_WORKER_THREADS".into(), "2".into()),
+        ];
+
+        // Send daemon logs to a file if SCCACHE_DEBUG is defined
+        if env::var("SCCACHE_DEBUG").is_ok() {
+            envvars.extend_from_slice(&[
+                // Allow overriding log level
+                (
+                    "SCCACHE_SERVER_LOG".into(),
+                    env::var_os("SCCACHE_SERVER_LOG")
+                        .or(env::var_os("SCCACHE_LOG"))
+                        .unwrap_or("sccache=trace".into()),
+                ),
+                // Allow overriding log output path
+                (
+                    "SCCACHE_ERROR_LOG".into(),
+                    env::var_os("SCCACHE_ERROR_LOG").unwrap_or(
+                        env::temp_dir()
+                            .join(format!("sccache_local_daemon.{port}.txt"))
+                            .into_os_string(),
+                    ),
+                ),
+            ]);
+        }
+
+        Self { envvars, path }
+    }
+
+    pub fn new(cfg_path: &Path, cached_cfg_path: &Path) -> Self {
+        let mut this = Self::new_no_cfg();
+        this.envvars.push(("SCCACHE_CONF".into(), cfg_path.into()));
+        this.envvars
+            .push(("SCCACHE_CACHED_CONF".into(), cached_cfg_path.into()));
+        this
+    }
+
+    pub fn start(self) -> Self {
+        trace!("sccache --start-server");
+        // Don't run this with run() because on Windows `wait_with_output`
+        // will hang because the internal server process is not detached.
+        if !self
+            .cmd()
+            .arg("--start-server")
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .status()
+            .unwrap()
+            .success()
+        {
+            panic!("Failed to start local daemon");
+        }
+        self
+    }
+
+    pub fn stop(&self) -> bool {
+        trace!("sccache --stop-server");
+        self.cmd()
+            .arg("--stop-server")
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .status()
+            .is_ok_and(|status| status.success())
+    }
+
+    pub fn cmd(&self) -> Command {
+        let mut cmd = prune_command(Command::new(assert_cmd::cargo::cargo_bin("sccache")));
+        cmd.envs(
+            self.envvars
+                .iter()
+                .map(|(k, v)| (k.as_os_str(), v.as_os_str())),
+        );
+        cmd
+    }
+
+    pub fn info(&self) -> sccache::errors::Result<ServerInfo> {
+        self.cmd()
+            .args(["--show-stats", "--stats-format=json"])
+            .output()
+            .map_err(anyhow::Error::new)
+            .map_err(|e| e.context("`sccache --show-stats --stats-format=json` failed"))
+            .map(|output| {
+                let s = str::from_utf8(&output.stdout).expect("Output not UTF-8");
+                serde_json::from_str(s).expect("Failed to parse JSON stats")
+            })
+    }
+
+    pub fn stats(&self) -> sccache::errors::Result<ServerStats> {
+        self.info().map(|info| info.stats)
+    }
+
+    pub fn zero_stats(&self) {
+        trace!("sccache --zero-stats");
+        drop(
+            self.cmd()
+                .arg("--zero-stats")
+                .stdout(Stdio::null())
+                .stderr(Stdio::null())
+                .status(),
+        );
+    }
+}
+
+impl Drop for SccacheClient {
+    fn drop(&mut self) {
+        self.stop();
+    }
+}
diff --git a/tests/harness/dist.rs b/tests/harness/dist.rs
new file mode 100644
index 000000000..eb95c1a8d
--- /dev/null
+++ b/tests/harness/dist.rs
@@ -0,0 +1,557 @@
+use fs_err as fs;
+use sccache::config::FileConfig;
+#[cfg(any(feature = "dist-client", feature = "dist-server"))]
+use sccache::config::HTTPUrl;
+use sccache::dist::{self, SchedulerStatusResult, ServerId};
+use std::env;
+use std::io::Write;
+use std::net::{self, IpAddr, SocketAddr};
+use std::path::{Path, PathBuf};
+use std::process::{Command, Output, Stdio};
+use std::str::{self, FromStr};
+use std::thread;
+use std::time::{Duration, Instant};
+
+use nix::{
+    sys::{
+        signal::Signal,
+        wait::{WaitPidFlag, WaitStatus},
+    },
+    unistd::{ForkResult, Pid},
+};
+use uuid::Uuid;
+
+use super::client::SccacheClient;
+use super::{prune_command, write_json_cfg, TC_CACHE_SIZE};
+
+const CONTAINER_NAME_PREFIX: &str = "sccache_dist_test";
+const DIST_IMAGE: &str = "sccache_dist_test_image";
+const DIST_DOCKERFILE: &str = include_str!("Dockerfile.sccache-dist");
+const DIST_IMAGE_BWRAP_PATH: &str = "/usr/bin/bwrap";
+const MAX_STARTUP_WAIT: Duration = Duration::from_secs(5);
+
+const DIST_SERVER_TOKEN: &str = "THIS IS THE TEST TOKEN";
+
+const CONFIGS_CONTAINER_PATH: &str = "/sccache-bits";
+const BUILD_DIR_CONTAINER_PATH: &str = "/sccache-bits/build-dir";
+const SCHEDULER_PORT: u16 = 10500;
+const SERVER_PORT: u16 = 12345; // arbitrary
+
+pub fn cargo_command() -> Command {
+    prune_command(Command::new("cargo"))
+}
+
+pub fn sccache_dist_path() -> PathBuf {
+    assert_cmd::cargo::cargo_bin("sccache-dist")
+}
+
+fn sccache_scheduler_cfg() -> sccache::config::scheduler::Config {
+    sccache::config::scheduler::Config {
+        public_addr: SocketAddr::from(([0, 0, 0, 0], SCHEDULER_PORT)),
+        client_auth: sccache::config::scheduler::ClientAuth::Insecure,
+        server_auth: sccache::config::scheduler::ServerAuth::Token {
+            token: DIST_SERVER_TOKEN.to_owned(),
+        },
+    }
+}
+
+fn sccache_server_cfg(
+    tmpdir: &Path,
+    scheduler_url: HTTPUrl,
+    server_ip: IpAddr,
+) -> sccache::config::server::Config {
+    let relpath = "server-cache";
+    fs::create_dir(tmpdir.join(relpath)).unwrap();
+
+    sccache::config::server::Config {
+        builder: sccache::config::server::BuilderType::Overlay {
+            build_dir: BUILD_DIR_CONTAINER_PATH.into(),
+            bwrap_path: DIST_IMAGE_BWRAP_PATH.into(),
+        },
+        cache_dir: Path::new(CONFIGS_CONTAINER_PATH).join(relpath),
+        public_addr: SocketAddr::new(server_ip, SERVER_PORT),
+        bind_address: Some(SocketAddr::from(([0, 0, 0, 0], SERVER_PORT))),
+        scheduler_url,
+        scheduler_auth: sccache::config::server::SchedulerAuth::Token {
+            token: DIST_SERVER_TOKEN.to_owned(),
+        },
+        toolchain_cache_size: TC_CACHE_SIZE,
+    }
+}
+
+// TODO: this is copied from the sccache-dist binary - it's not clear where would be a better place to put the
+// code so that it can be included here
+fn create_server_token(server_id: ServerId, auth_token: &str) -> String {
+    format!("{} {}", server_id.addr(), auth_token)
+}
+
+pub enum ServerHandle {
+    Container {
+        cid: String,
+        url: HTTPUrl,
+    },
+    #[allow(dead_code)]
+    Process {
+        pid: Pid,
+        url: HTTPUrl,
+    },
+}
+
+pub struct DistSystem {
+    sccache_dist: PathBuf,
+    tmpdir: PathBuf,
+
+    scheduler_name: Option<String>,
+    server_names: Vec<String>,
+    server_pids: Vec<Pid>,
+}
+
+impl DistSystem {
+    pub fn new(sccache_dist: &Path, tmpdir: &Path) -> Self {
+        // Make sure the docker image is available, building it if necessary
+        let mut child = Command::new("docker")
+            .args(["build", "-q", "-t", DIST_IMAGE, "-"])
+            .stdin(Stdio::piped())
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped())
+            .spawn()
+            .unwrap();
+        child
+            .stdin
+            .as_mut()
+            .unwrap()
+            .write_all(DIST_DOCKERFILE.as_bytes())
+            .unwrap();
+        let output = child.wait_with_output().unwrap();
+        check_output(&output);
+
+        let tmpdir = tmpdir.join("distsystem");
+        fs::create_dir(&tmpdir).unwrap();
+
+        Self {
+            sccache_dist: sccache_dist.to_owned(),
+            tmpdir,
+
+            scheduler_name: None,
+            server_names: vec![],
+            server_pids: vec![],
+        }
+    }
+
+    pub fn add_scheduler(&mut self) {
+        let scheduler_cfg_relpath = "scheduler-cfg.json";
+        let scheduler_cfg_path = self.tmpdir.join(scheduler_cfg_relpath);
+        let scheduler_cfg_container_path =
+            Path::new(CONFIGS_CONTAINER_PATH).join(scheduler_cfg_relpath);
+        let scheduler_cfg = sccache_scheduler_cfg();
+        fs::File::create(scheduler_cfg_path)
+            .unwrap()
+            .write_all(&serde_json::to_vec(&scheduler_cfg).unwrap())
+            .unwrap();
+
+        // Create the scheduler
+        let scheduler_name = make_container_name("scheduler");
+        let output = Command::new("docker")
+            .args([
+                "run",
+                "--name",
+                &scheduler_name,
+                "-e",
+                "SCCACHE_NO_DAEMON=1",
+                "-e",
+                "SCCACHE_LOG=debug",
+                "-e",
+                "RUST_BACKTRACE=1",
+                "--network",
+                "host",
+                "-v",
+                &format!("{}:/sccache-dist:z", self.sccache_dist.to_str().unwrap()),
+                "-v",
+                &format!(
+                    "{}:{}:z",
+                    self.tmpdir.to_str().unwrap(),
+                    CONFIGS_CONTAINER_PATH
+                ),
+                "-d",
+                DIST_IMAGE,
+                "bash",
+                "-c",
+                &format!(
+                    r#"
+                    set -o errexit &&
+                    exec /sccache-dist scheduler --config {cfg}
+                "#,
+                    cfg = scheduler_cfg_container_path.to_str().unwrap()
+                ),
+            ])
+            .output()
+            .unwrap();
+        self.scheduler_name = Some(scheduler_name);
+
+        check_output(&output);
+
+        let scheduler_url = self.scheduler_url();
+        wait_for_http(scheduler_url, Duration::from_millis(100), MAX_STARTUP_WAIT);
+        wait_for(
+            || {
+                let status = self.scheduler_status();
+                if matches!(
+                    status,
+                    SchedulerStatusResult {
+                        num_servers: 0,
+                        num_cpus: _,
+                        in_progress: 0
+                    }
+                ) {
+                    Ok(())
+                } else {
+                    Err(format!("{:?}", status))
+                }
+            },
+            Duration::from_millis(100),
+            MAX_STARTUP_WAIT,
+        );
+    }
+
+    pub fn add_server(&mut self) -> ServerHandle {
+        let server_cfg_relpath = format!("server-cfg-{}.json", self.server_names.len());
+        let server_cfg_path = self.tmpdir.join(&server_cfg_relpath);
+        let server_cfg_container_path = Path::new(CONFIGS_CONTAINER_PATH).join(server_cfg_relpath);
+
+        let server_name = make_container_name("server");
+        let output = Command::new("docker")
+            .args([
+                "run",
+                // Important for the bubblewrap builder
+                "--privileged",
+                "--name",
+                &server_name,
+                "-e",
+                "SCCACHE_LOG=debug",
+                "-e",
+                "RUST_BACKTRACE=1",
+                "--network",
+                "host",
+                "-v",
+                &format!("{}:/sccache-dist:z", self.sccache_dist.to_str().unwrap()),
+                "-v",
+                &format!(
+                    "{}:{}:z",
+                    self.tmpdir.to_str().unwrap(),
+                    CONFIGS_CONTAINER_PATH
+                ),
+                "-d",
+                DIST_IMAGE,
+                "bash",
+                "-c",
+                &format!(
+                    r#"
+                    set -o errexit &&
+                    while [ ! -f {cfg}.ready ]; do sleep 0.1; done &&
+                    exec /sccache-dist server --config {cfg}
+                "#,
+                    cfg = server_cfg_container_path.to_str().unwrap()
+                ),
+            ])
+            .output()
+            .unwrap();
+        self.server_names.push(server_name.clone());
+
+        check_output(&output);
+
+        let server_ip = IpAddr::from_str("127.0.0.1").unwrap();
+        let server_cfg = sccache_server_cfg(&self.tmpdir, self.scheduler_url(), server_ip);
+        fs::File::create(&server_cfg_path)
+            .unwrap()
+            .write_all(&serde_json::to_vec(&server_cfg).unwrap())
+            .unwrap();
+        fs::File::create(format!("{}.ready", server_cfg_path.to_str().unwrap())).unwrap();
+
+        let url = HTTPUrl::from_url(
+            reqwest::Url::parse(&format!("https://{}:{}", server_ip, SERVER_PORT)).unwrap(),
+        );
+        let handle = ServerHandle::Container {
+            cid: server_name,
+            url,
+        };
+        self.wait_server_ready(&handle);
+        handle
+    }
+
+    pub fn add_custom_server<S: dist::ServerIncoming + 'static>(
+        &mut self,
+        handler: S,
+    ) -> ServerHandle {
+        let server_addr = {
+            let ip = IpAddr::from_str("127.0.0.1").unwrap();
+            let listener = net::TcpListener::bind(SocketAddr::from((ip, 0))).unwrap();
+            listener.local_addr().unwrap()
+        };
+        let token = create_server_token(ServerId::new(server_addr), DIST_SERVER_TOKEN);
+        let server = dist::http::Server::new(
+            server_addr,
+            Some(SocketAddr::from(([0, 0, 0, 0], server_addr.port()))),
+            self.scheduler_url().to_url(),
+            token,
+            handler,
+        )
+        .unwrap();
+        let pid = match unsafe { nix::unistd::fork() }.unwrap() {
+            ForkResult::Parent { child } => {
+                self.server_pids.push(child);
+                child
+            }
+            ForkResult::Child => {
+                env::set_var("SCCACHE_LOG", "sccache=trace");
+                env_logger::try_init().unwrap();
+                server.start().unwrap();
+                unreachable!();
+            }
+        };
+
+        let url =
+            HTTPUrl::from_url(reqwest::Url::parse(&format!("https://{}", server_addr)).unwrap());
+        let handle = ServerHandle::Process { pid, url };
+        self.wait_server_ready(&handle);
+        handle
+    }
+
+    pub fn restart_server(&mut self, handle: &ServerHandle) {
+        match handle {
+            ServerHandle::Container { cid, url: _ } => {
+                let output = Command::new("docker")
+                    .args(["restart", cid])
+                    .output()
+                    .unwrap();
+                check_output(&output);
+            }
+            ServerHandle::Process { pid: _, url: _ } => {
+                // TODO: pretty easy, just no need yet
+                panic!("restart not yet implemented for pids")
+            }
+        }
+        self.wait_server_ready(handle)
+    }
+
+    pub fn wait_server_ready(&mut self, handle: &ServerHandle) {
+        let url = match handle {
+            ServerHandle::Container { cid: _, url } | ServerHandle::Process { pid: _, url } => {
+                url.clone()
+            }
+        };
+        wait_for_http(url, Duration::from_millis(100), MAX_STARTUP_WAIT);
+        wait_for(
+            || {
+                let status = self.scheduler_status();
+                if matches!(
+                    status,
+                    SchedulerStatusResult {
+                        num_servers: 1,
+                        num_cpus: _,
+                        in_progress: 0
+                    }
+                ) {
+                    Ok(())
+                } else {
+                    Err(format!("{:?}", status))
+                }
+            },
+            Duration::from_millis(100),
+            MAX_STARTUP_WAIT,
+        );
+    }
+
+    pub fn scheduler_url(&self) -> HTTPUrl {
+        let url = format!("http://127.0.0.1:{}", SCHEDULER_PORT);
+        HTTPUrl::from_url(reqwest::Url::parse(&url).unwrap())
+    }
+
+    fn scheduler_status(&self) -> SchedulerStatusResult {
+        let res = reqwest::blocking::get(dist::http::urls::scheduler_status(
+            &self.scheduler_url().to_url(),
+        ))
+        .unwrap();
+        assert!(res.status().is_success());
+        bincode::deserialize_from(res).unwrap()
+    }
+
+    pub fn new_client(&self, client_config: &FileConfig) -> SccacheClient {
+        write_json_cfg(&self.tmpdir, "sccache-client.json", client_config);
+        SccacheClient::new(
+            &self.tmpdir.join("sccache-client.json"),
+            &self.tmpdir.join("sccache-cached-cfg"),
+        )
+        .start()
+    }
+}
+
+// If you want containers to hang around (e.g. for debugging), comment out the "rm -f" lines
+impl Drop for DistSystem {
+    fn drop(&mut self) {
+        let mut did_err = false;
+
+        // Panicking halfway through drop would either abort (if it's a double panic) or leave us with
+        // resources that aren't yet cleaned up. Instead, do as much as possible then decide what to do
+        // at the end - panic (if not already doing so) or let the panic continue
+        macro_rules! droperr {
+            ($e:expr) => {
+                match $e {
+                    Ok(()) => (),
+                    Err(e) => {
+                        did_err = true;
+                        eprintln!("Error with {}: {}", stringify!($e), e)
+                    }
+                }
+            };
+        }
+
+        let mut logs = vec![];
+        let mut outputs = vec![];
+        let mut exits = vec![];
+
+        if let Some(scheduler_name) = self.scheduler_name.as_ref() {
+            droperr!(Command::new("docker")
+                .args(["logs", scheduler_name])
+                .output()
+                .map(|o| logs.push((scheduler_name, o))));
+            droperr!(Command::new("docker")
+                .args(["kill", scheduler_name])
+                .output()
+                .map(|o| outputs.push((scheduler_name, o))));
+            droperr!(Command::new("docker")
+                .args(["rm", "-f", scheduler_name])
+                .output()
+                .map(|o| outputs.push((scheduler_name, o))));
+        }
+        for server_name in self.server_names.iter() {
+            droperr!(Command::new("docker")
+                .args(["logs", server_name])
+                .output()
+                .map(|o| logs.push((server_name, o))));
+            droperr!(Command::new("docker")
+                .args(["kill", server_name])
+                .output()
+                .map(|o| outputs.push((server_name, o))));
+            droperr!(Command::new("docker")
+                .args(["rm", "-f", server_name])
+                .output()
+                .map(|o| outputs.push((server_name, o))));
+        }
+        for &pid in self.server_pids.iter() {
+            droperr!(nix::sys::signal::kill(pid, Signal::SIGINT));
+            thread::sleep(Duration::from_millis(100));
+            let mut killagain = true; // Default to trying to kill again, e.g. if there was an error waiting on the pid
+            droperr!(
+                nix::sys::wait::waitpid(pid, Some(WaitPidFlag::WNOHANG)).map(|ws| {
+                    if ws != WaitStatus::StillAlive {
+                        killagain = false;
+                        exits.push(ws)
+                    }
+                })
+            );
+            if killagain {
+                eprintln!("SIGINT didn't kill process, trying SIGKILL");
+                droperr!(nix::sys::signal::kill(pid, Signal::SIGKILL));
+                droperr!(nix::sys::wait::waitpid(pid, Some(WaitPidFlag::WNOHANG))
+                    .map_err(|e| e.to_string())
+                    .and_then(|ws| if ws == WaitStatus::StillAlive {
+                        Err("process alive after sigkill".to_owned())
+                    } else {
+                        exits.push(ws);
+                        Ok(())
+                    }));
+            }
+        }
+
+        for (
+            container,
+            Output {
+                status,
+                stdout,
+                stderr,
+            },
+        ) in logs
+        {
+            println!(
+                "LOGS == ({}) ==\n> {} <:\n## STDOUT\n{}\n\n## STDERR\n{}\n====",
+                status,
+                container,
+                String::from_utf8_lossy(&stdout),
+                String::from_utf8_lossy(&stderr)
+            );
+        }
+        for (
+            container,
+            Output {
+                status,
+                stdout,
+                stderr,
+            },
+        ) in outputs
+        {
+            println!(
+                "OUTPUTS == ({}) ==\n> {} <:\n## STDOUT\n{}\n\n## STDERR\n{}\n====",
+                status,
+                container,
+                String::from_utf8_lossy(&stdout),
+                String::from_utf8_lossy(&stderr)
+            );
+        }
+        for exit in exits {
+            println!("EXIT: {:?}", exit)
+        }
+
+        if did_err && !thread::panicking() {
+            panic!("Encountered failures during dist system teardown")
+        }
+    }
+}
+
+fn make_container_name(tag: &str) -> String {
+    format!(
+        "{}_{}_{}",
+        CONTAINER_NAME_PREFIX,
+        tag,
+        Uuid::new_v4().hyphenated()
+    )
+}
+
+fn check_output(output: &Output) {
+    if !output.status.success() {
+        println!("{}\n\n[BEGIN STDOUT]\n===========\n{}\n===========\n[FIN STDOUT]\n\n[BEGIN STDERR]\n===========\n{}\n===========\n[FIN STDERR]\n\n",
+            output.status, String::from_utf8_lossy(&output.stdout), String::from_utf8_lossy(&output.stderr));
+        panic!()
+    }
+}
+
+fn wait_for_http(url: HTTPUrl, interval: Duration, max_wait: Duration) {
+    // TODO: after upgrading to reqwest >= 0.9, use 'danger_accept_invalid_certs' and stick with that rather than tcp
+    wait_for(
+        || {
+            let url = url.to_url();
+            let url = url.socket_addrs(|| None).unwrap();
+            match net::TcpStream::connect(url.as_slice()) {
+                Ok(_) => Ok(()),
+                Err(e) => Err(e.to_string()),
+            }
+        },
+        interval,
+        max_wait,
+    )
+}
+
+fn wait_for<F: Fn() -> Result<(), String>>(f: F, interval: Duration, max_wait: Duration) {
+    let start = Instant::now();
+    let mut lasterr;
+    loop {
+        match f() {
+            Ok(()) => return,
+            Err(e) => lasterr = e,
+        }
+        if start.elapsed() > max_wait {
+            break;
+        }
+        thread::sleep(interval)
+    }
+    panic!("wait timed out, last error result: {}", lasterr)
+}
diff --git a/tests/harness/mod.rs b/tests/harness/mod.rs
index d148793f2..28452b18c 100644
--- a/tests/harness/mod.rs
+++ b/tests/harness/mod.rs
@@ -1,114 +1,18 @@
 use fs_err as fs;
-#[cfg(any(feature = "dist-client", feature = "dist-server"))]
-use sccache::config::HTTPUrl;
-use sccache::dist::{self, SchedulerStatusResult, ServerId};
-use sccache::server::ServerInfo;
 use std::env;
 use std::io::Write;
-use std::net::{self, IpAddr, SocketAddr};
 use std::path::{Path, PathBuf};
-use std::process::{Command, Output, Stdio};
-use std::str::{self, FromStr};
-use std::thread;
-use std::time::{Duration, Instant};
+use std::process::Command;
+use std::str;
 
-use assert_cmd::prelude::*;
-#[cfg(feature = "dist-server")]
-use nix::{
-    sys::{
-        signal::Signal,
-        wait::{WaitPidFlag, WaitStatus},
-    },
-    unistd::{ForkResult, Pid},
-};
-use predicates::prelude::*;
 use serde::Serialize;
-use uuid::Uuid;
-
-const CONTAINER_NAME_PREFIX: &str = "sccache_dist_test";
-const DIST_IMAGE: &str = "sccache_dist_test_image";
-const DIST_DOCKERFILE: &str = include_str!("Dockerfile.sccache-dist");
-const DIST_IMAGE_BWRAP_PATH: &str = "/usr/bin/bwrap";
-const MAX_STARTUP_WAIT: Duration = Duration::from_secs(5);
-
-const DIST_SERVER_TOKEN: &str = "THIS IS THE TEST TOKEN";
-
-const CONFIGS_CONTAINER_PATH: &str = "/sccache-bits";
-const BUILD_DIR_CONTAINER_PATH: &str = "/sccache-bits/build-dir";
-const SCHEDULER_PORT: u16 = 10500;
-const SERVER_PORT: u16 = 12345; // arbitrary
 
-const TC_CACHE_SIZE: u64 = 1024 * 1024 * 1024; // 1 gig
+pub mod client;
 
-pub fn start_local_daemon(cfg_path: &Path, cached_cfg_path: &Path) {
-    // Don't run this with run() because on Windows `wait_with_output`
-    // will hang because the internal server process is not detached.
-    if !sccache_command()
-        .arg("--start-server")
-        // Uncomment following lines to debug locally.
-        // .env("SCCACHE_LOG", "sccache=trace")
-        // .env("RUST_LOG_STYLE", "never")
-        // .env(
-        //     "SCCACHE_ERROR_LOG",
-        //     env::temp_dir().join("sccache_local_daemon.txt"),
-        // )
-        .env("SCCACHE_CONF", cfg_path)
-        .env("SCCACHE_CACHED_CONF", cached_cfg_path)
-        .status()
-        .unwrap()
-        .success()
-    {
-        panic!("Failed to start local daemon");
-    }
-}
-
-pub fn stop_local_daemon() -> bool {
-    trace!("sccache --stop-server");
-    sccache_command()
-        .arg("--stop-server")
-        .stdout(Stdio::null())
-        .stderr(Stdio::null())
-        .status()
-        .is_ok_and(|status| status.success())
-}
-
-pub fn get_stats<F: 'static + Fn(ServerInfo)>(f: F) {
-    sccache_command()
-        .args(["--show-stats", "--stats-format=json"])
-        .assert()
-        .success()
-        .stdout(predicate::function(move |output: &[u8]| {
-            let s = str::from_utf8(output).expect("Output not UTF-8");
-            let stats = serde_json::from_str(s).expect("Failed to parse JSON stats");
-            eprintln!("get server stats: {stats:?}");
-            f(stats);
-            true
-        }));
-}
-
-#[allow(unused)]
-pub fn zero_stats() {
-    trace!("sccache --zero-stats");
-    drop(
-        sccache_command()
-            .arg("--zero-stats")
-            .stdout(Stdio::null())
-            .stderr(Stdio::null())
-            .status(),
-    );
-}
-
-pub fn write_json_cfg<T: Serialize>(path: &Path, filename: &str, contents: &T) {
-    let p = path.join(filename);
-    let mut f = fs::File::create(p).unwrap();
-    f.write_all(&serde_json::to_vec(contents).unwrap()).unwrap();
-}
+#[cfg(feature = "dist-server")]
+pub mod dist;
 
-pub fn write_source(path: &Path, filename: &str, contents: &str) {
-    let p = path.join(filename);
-    let mut f = fs::File::create(p).unwrap();
-    f.write_all(contents.as_bytes()).unwrap();
-}
+pub const TC_CACHE_SIZE: u64 = 1024 * 1024 * 1024; // 1 gig
 
 pub fn init_cargo(path: &Path, cargo_name: &str) -> PathBuf {
     let cargo_path = path.join(cargo_name);
@@ -129,559 +33,14 @@ pub fn prune_command(mut cmd: Command) -> Command {
     cmd
 }
 
-pub fn sccache_command() -> Command {
-    prune_command(Command::new(assert_cmd::cargo::cargo_bin("sccache")))
-}
-
-pub fn cargo_command() -> Command {
-    prune_command(Command::new("cargo"))
-}
-
-#[cfg(feature = "dist-server")]
-pub fn sccache_dist_path() -> PathBuf {
-    assert_cmd::cargo::cargo_bin("sccache-dist")
-}
-
-pub fn sccache_client_cfg(
-    tmpdir: &Path,
-    preprocessor_cache_mode: bool,
-) -> sccache::config::FileConfig {
-    let cache_relpath = "client-cache";
-    let dist_cache_relpath = "client-dist-cache";
-    fs::create_dir(tmpdir.join(cache_relpath)).unwrap();
-    fs::create_dir(tmpdir.join(dist_cache_relpath)).unwrap();
-
-    let disk_cache = sccache::config::DiskCacheConfig {
-        dir: tmpdir.join(cache_relpath),
-        preprocessor_cache_mode: sccache::config::PreprocessorCacheModeConfig {
-            use_preprocessor_cache_mode: preprocessor_cache_mode,
-            ..Default::default()
-        },
-        ..Default::default()
-    };
-    sccache::config::FileConfig {
-        cache: sccache::config::CacheConfigs {
-            azure: None,
-            disk: Some(disk_cache),
-            gcs: None,
-            gha: None,
-            memcached: None,
-            redis: None,
-            s3: None,
-            webdav: None,
-            oss: None,
-        },
-        dist: sccache::config::DistConfig {
-            auth: Default::default(), // dangerously_insecure
-            scheduler_url: None,
-            cache_dir: tmpdir.join(dist_cache_relpath),
-            toolchains: vec![],
-            toolchain_cache_size: TC_CACHE_SIZE,
-            rewrite_includes_only: false, // TODO
-        },
-        server_startup_timeout_ms: None,
-    }
-}
-
-#[cfg(feature = "dist-server")]
-fn sccache_scheduler_cfg() -> sccache::config::scheduler::Config {
-    sccache::config::scheduler::Config {
-        public_addr: SocketAddr::from(([0, 0, 0, 0], SCHEDULER_PORT)),
-        client_auth: sccache::config::scheduler::ClientAuth::Insecure,
-        server_auth: sccache::config::scheduler::ServerAuth::Token {
-            token: DIST_SERVER_TOKEN.to_owned(),
-        },
-    }
-}
-
-#[cfg(feature = "dist-server")]
-fn sccache_server_cfg(
-    tmpdir: &Path,
-    scheduler_url: HTTPUrl,
-    server_ip: IpAddr,
-) -> sccache::config::server::Config {
-    let relpath = "server-cache";
-    fs::create_dir(tmpdir.join(relpath)).unwrap();
-
-    sccache::config::server::Config {
-        builder: sccache::config::server::BuilderType::Overlay {
-            build_dir: BUILD_DIR_CONTAINER_PATH.into(),
-            bwrap_path: DIST_IMAGE_BWRAP_PATH.into(),
-        },
-        cache_dir: Path::new(CONFIGS_CONTAINER_PATH).join(relpath),
-        public_addr: SocketAddr::new(server_ip, SERVER_PORT),
-        bind_address: Some(SocketAddr::from(([0, 0, 0, 0], SERVER_PORT))),
-        scheduler_url,
-        scheduler_auth: sccache::config::server::SchedulerAuth::Token {
-            token: DIST_SERVER_TOKEN.to_owned(),
-        },
-        toolchain_cache_size: TC_CACHE_SIZE,
-    }
-}
-
-// TODO: this is copied from the sccache-dist binary - it's not clear where would be a better place to put the
-// code so that it can be included here
-#[cfg(feature = "dist-server")]
-fn create_server_token(server_id: ServerId, auth_token: &str) -> String {
-    format!("{} {}", server_id.addr(), auth_token)
-}
-
-#[cfg(feature = "dist-server")]
-pub enum ServerHandle {
-    Container { cid: String, url: HTTPUrl },
-    Process { pid: Pid, url: HTTPUrl },
-}
-
-#[cfg(feature = "dist-server")]
-pub struct DistSystem {
-    sccache_dist: PathBuf,
-    tmpdir: PathBuf,
-
-    scheduler_name: Option<String>,
-    server_names: Vec<String>,
-    server_pids: Vec<Pid>,
-}
-
-#[cfg(feature = "dist-server")]
-impl DistSystem {
-    pub fn new(sccache_dist: &Path, tmpdir: &Path) -> Self {
-        // Make sure the docker image is available, building it if necessary
-        let mut child = Command::new("docker")
-            .args(["build", "-q", "-t", DIST_IMAGE, "-"])
-            .stdin(Stdio::piped())
-            .stdout(Stdio::piped())
-            .stderr(Stdio::piped())
-            .spawn()
-            .unwrap();
-        child
-            .stdin
-            .as_mut()
-            .unwrap()
-            .write_all(DIST_DOCKERFILE.as_bytes())
-            .unwrap();
-        let output = child.wait_with_output().unwrap();
-        check_output(&output);
-
-        let tmpdir = tmpdir.join("distsystem");
-        fs::create_dir(&tmpdir).unwrap();
-
-        Self {
-            sccache_dist: sccache_dist.to_owned(),
-            tmpdir,
-
-            scheduler_name: None,
-            server_names: vec![],
-            server_pids: vec![],
-        }
-    }
-
-    pub fn add_scheduler(&mut self) {
-        let scheduler_cfg_relpath = "scheduler-cfg.json";
-        let scheduler_cfg_path = self.tmpdir.join(scheduler_cfg_relpath);
-        let scheduler_cfg_container_path =
-            Path::new(CONFIGS_CONTAINER_PATH).join(scheduler_cfg_relpath);
-        let scheduler_cfg = sccache_scheduler_cfg();
-        fs::File::create(scheduler_cfg_path)
-            .unwrap()
-            .write_all(&serde_json::to_vec(&scheduler_cfg).unwrap())
-            .unwrap();
-
-        // Create the scheduler
-        let scheduler_name = make_container_name("scheduler");
-        let output = Command::new("docker")
-            .args([
-                "run",
-                "--name",
-                &scheduler_name,
-                "-e",
-                "SCCACHE_NO_DAEMON=1",
-                "-e",
-                "SCCACHE_LOG=debug",
-                "-e",
-                "RUST_BACKTRACE=1",
-                "--network",
-                "host",
-                "-v",
-                &format!("{}:/sccache-dist:z", self.sccache_dist.to_str().unwrap()),
-                "-v",
-                &format!(
-                    "{}:{}:z",
-                    self.tmpdir.to_str().unwrap(),
-                    CONFIGS_CONTAINER_PATH
-                ),
-                "-d",
-                DIST_IMAGE,
-                "bash",
-                "-c",
-                &format!(
-                    r#"
-                    set -o errexit &&
-                    exec /sccache-dist scheduler --config {cfg}
-                "#,
-                    cfg = scheduler_cfg_container_path.to_str().unwrap()
-                ),
-            ])
-            .output()
-            .unwrap();
-        self.scheduler_name = Some(scheduler_name);
-
-        check_output(&output);
-
-        let scheduler_url = self.scheduler_url();
-        wait_for_http(scheduler_url, Duration::from_millis(100), MAX_STARTUP_WAIT);
-        wait_for(
-            || {
-                let status = self.scheduler_status();
-                if matches!(
-                    status,
-                    SchedulerStatusResult {
-                        num_servers: 0,
-                        num_cpus: _,
-                        in_progress: 0
-                    }
-                ) {
-                    Ok(())
-                } else {
-                    Err(format!("{:?}", status))
-                }
-            },
-            Duration::from_millis(100),
-            MAX_STARTUP_WAIT,
-        );
-    }
-
-    pub fn add_server(&mut self) -> ServerHandle {
-        let server_cfg_relpath = format!("server-cfg-{}.json", self.server_names.len());
-        let server_cfg_path = self.tmpdir.join(&server_cfg_relpath);
-        let server_cfg_container_path = Path::new(CONFIGS_CONTAINER_PATH).join(server_cfg_relpath);
-
-        let server_name = make_container_name("server");
-        let output = Command::new("docker")
-            .args([
-                "run",
-                // Important for the bubblewrap builder
-                "--privileged",
-                "--name",
-                &server_name,
-                "-e",
-                "SCCACHE_LOG=debug",
-                "-e",
-                "RUST_BACKTRACE=1",
-                "--network",
-                "host",
-                "-v",
-                &format!("{}:/sccache-dist:z", self.sccache_dist.to_str().unwrap()),
-                "-v",
-                &format!(
-                    "{}:{}:z",
-                    self.tmpdir.to_str().unwrap(),
-                    CONFIGS_CONTAINER_PATH
-                ),
-                "-d",
-                DIST_IMAGE,
-                "bash",
-                "-c",
-                &format!(
-                    r#"
-                    set -o errexit &&
-                    while [ ! -f {cfg}.ready ]; do sleep 0.1; done &&
-                    exec /sccache-dist server --config {cfg}
-                "#,
-                    cfg = server_cfg_container_path.to_str().unwrap()
-                ),
-            ])
-            .output()
-            .unwrap();
-        self.server_names.push(server_name.clone());
-
-        check_output(&output);
-
-        let server_ip = IpAddr::from_str("127.0.0.1").unwrap();
-        let server_cfg = sccache_server_cfg(&self.tmpdir, self.scheduler_url(), server_ip);
-        fs::File::create(&server_cfg_path)
-            .unwrap()
-            .write_all(&serde_json::to_vec(&server_cfg).unwrap())
-            .unwrap();
-        fs::File::create(format!("{}.ready", server_cfg_path.to_str().unwrap())).unwrap();
-
-        let url = HTTPUrl::from_url(
-            reqwest::Url::parse(&format!("https://{}:{}", server_ip, SERVER_PORT)).unwrap(),
-        );
-        let handle = ServerHandle::Container {
-            cid: server_name,
-            url,
-        };
-        self.wait_server_ready(&handle);
-        handle
-    }
-
-    pub fn add_custom_server<S: dist::ServerIncoming + 'static>(
-        &mut self,
-        handler: S,
-    ) -> ServerHandle {
-        let server_addr = {
-            let ip = IpAddr::from_str("127.0.0.1").unwrap();
-            let listener = net::TcpListener::bind(SocketAddr::from((ip, 0))).unwrap();
-            listener.local_addr().unwrap()
-        };
-        let token = create_server_token(ServerId::new(server_addr), DIST_SERVER_TOKEN);
-        let server = dist::http::Server::new(
-            server_addr,
-            Some(SocketAddr::from(([0, 0, 0, 0], server_addr.port()))),
-            self.scheduler_url().to_url(),
-            token,
-            handler,
-        )
-        .unwrap();
-        let pid = match unsafe { nix::unistd::fork() }.unwrap() {
-            ForkResult::Parent { child } => {
-                self.server_pids.push(child);
-                child
-            }
-            ForkResult::Child => {
-                env::set_var("SCCACHE_LOG", "sccache=trace");
-                env_logger::try_init().unwrap();
-                server.start().unwrap();
-                unreachable!();
-            }
-        };
-
-        let url =
-            HTTPUrl::from_url(reqwest::Url::parse(&format!("https://{}", server_addr)).unwrap());
-        let handle = ServerHandle::Process { pid, url };
-        self.wait_server_ready(&handle);
-        handle
-    }
-
-    pub fn restart_server(&mut self, handle: &ServerHandle) {
-        match handle {
-            ServerHandle::Container { cid, url: _ } => {
-                let output = Command::new("docker")
-                    .args(["restart", cid])
-                    .output()
-                    .unwrap();
-                check_output(&output);
-            }
-            ServerHandle::Process { pid: _, url: _ } => {
-                // TODO: pretty easy, just no need yet
-                panic!("restart not yet implemented for pids")
-            }
-        }
-        self.wait_server_ready(handle)
-    }
-
-    pub fn wait_server_ready(&mut self, handle: &ServerHandle) {
-        let url = match handle {
-            ServerHandle::Container { cid: _, url } | ServerHandle::Process { pid: _, url } => {
-                url.clone()
-            }
-        };
-        wait_for_http(url, Duration::from_millis(100), MAX_STARTUP_WAIT);
-        wait_for(
-            || {
-                let status = self.scheduler_status();
-                if matches!(
-                    status,
-                    SchedulerStatusResult {
-                        num_servers: 1,
-                        num_cpus: _,
-                        in_progress: 0
-                    }
-                ) {
-                    Ok(())
-                } else {
-                    Err(format!("{:?}", status))
-                }
-            },
-            Duration::from_millis(100),
-            MAX_STARTUP_WAIT,
-        );
-    }
-
-    pub fn scheduler_url(&self) -> HTTPUrl {
-        let url = format!("http://127.0.0.1:{}", SCHEDULER_PORT);
-        HTTPUrl::from_url(reqwest::Url::parse(&url).unwrap())
-    }
-
-    fn scheduler_status(&self) -> SchedulerStatusResult {
-        let res = reqwest::blocking::get(dist::http::urls::scheduler_status(
-            &self.scheduler_url().to_url(),
-        ))
-        .unwrap();
-        assert!(res.status().is_success());
-        bincode::deserialize_from(res).unwrap()
-    }
-}
-
-// If you want containers to hang around (e.g. for debugging), comment out the "rm -f" lines
-#[cfg(feature = "dist-server")]
-impl Drop for DistSystem {
-    fn drop(&mut self) {
-        let mut did_err = false;
-
-        // Panicking halfway through drop would either abort (if it's a double panic) or leave us with
-        // resources that aren't yet cleaned up. Instead, do as much as possible then decide what to do
-        // at the end - panic (if not already doing so) or let the panic continue
-        macro_rules! droperr {
-            ($e:expr) => {
-                match $e {
-                    Ok(()) => (),
-                    Err(e) => {
-                        did_err = true;
-                        eprintln!("Error with {}: {}", stringify!($e), e)
-                    }
-                }
-            };
-        }
-
-        let mut logs = vec![];
-        let mut outputs = vec![];
-        let mut exits = vec![];
-
-        if let Some(scheduler_name) = self.scheduler_name.as_ref() {
-            droperr!(Command::new("docker")
-                .args(["logs", scheduler_name])
-                .output()
-                .map(|o| logs.push((scheduler_name, o))));
-            droperr!(Command::new("docker")
-                .args(["kill", scheduler_name])
-                .output()
-                .map(|o| outputs.push((scheduler_name, o))));
-            droperr!(Command::new("docker")
-                .args(["rm", "-f", scheduler_name])
-                .output()
-                .map(|o| outputs.push((scheduler_name, o))));
-        }
-        for server_name in self.server_names.iter() {
-            droperr!(Command::new("docker")
-                .args(["logs", server_name])
-                .output()
-                .map(|o| logs.push((server_name, o))));
-            droperr!(Command::new("docker")
-                .args(["kill", server_name])
-                .output()
-                .map(|o| outputs.push((server_name, o))));
-            droperr!(Command::new("docker")
-                .args(["rm", "-f", server_name])
-                .output()
-                .map(|o| outputs.push((server_name, o))));
-        }
-        for &pid in self.server_pids.iter() {
-            droperr!(nix::sys::signal::kill(pid, Signal::SIGINT));
-            thread::sleep(Duration::from_millis(100));
-            let mut killagain = true; // Default to trying to kill again, e.g. if there was an error waiting on the pid
-            droperr!(
-                nix::sys::wait::waitpid(pid, Some(WaitPidFlag::WNOHANG)).map(|ws| {
-                    if ws != WaitStatus::StillAlive {
-                        killagain = false;
-                        exits.push(ws)
-                    }
-                })
-            );
-            if killagain {
-                eprintln!("SIGINT didn't kill process, trying SIGKILL");
-                droperr!(nix::sys::signal::kill(pid, Signal::SIGKILL));
-                droperr!(nix::sys::wait::waitpid(pid, Some(WaitPidFlag::WNOHANG))
-                    .map_err(|e| e.to_string())
-                    .and_then(|ws| if ws == WaitStatus::StillAlive {
-                        Err("process alive after sigkill".to_owned())
-                    } else {
-                        exits.push(ws);
-                        Ok(())
-                    }));
-            }
-        }
-
-        for (
-            container,
-            Output {
-                status,
-                stdout,
-                stderr,
-            },
-        ) in logs
-        {
-            println!(
-                "LOGS == ({}) ==\n> {} <:\n## STDOUT\n{}\n\n## STDERR\n{}\n====",
-                status,
-                container,
-                String::from_utf8_lossy(&stdout),
-                String::from_utf8_lossy(&stderr)
-            );
-        }
-        for (
-            container,
-            Output {
-                status,
-                stdout,
-                stderr,
-            },
-        ) in outputs
-        {
-            println!(
-                "OUTPUTS == ({}) ==\n> {} <:\n## STDOUT\n{}\n\n## STDERR\n{}\n====",
-                status,
-                container,
-                String::from_utf8_lossy(&stdout),
-                String::from_utf8_lossy(&stderr)
-            );
-        }
-        for exit in exits {
-            println!("EXIT: {:?}", exit)
-        }
-
-        if did_err && !thread::panicking() {
-            panic!("Encountered failures during dist system teardown")
-        }
-    }
-}
-
-fn make_container_name(tag: &str) -> String {
-    format!(
-        "{}_{}_{}",
-        CONTAINER_NAME_PREFIX,
-        tag,
-        Uuid::new_v4().hyphenated()
-    )
-}
-
-fn check_output(output: &Output) {
-    if !output.status.success() {
-        println!("{}\n\n[BEGIN STDOUT]\n===========\n{}\n===========\n[FIN STDOUT]\n\n[BEGIN STDERR]\n===========\n{}\n===========\n[FIN STDERR]\n\n",
-            output.status, String::from_utf8_lossy(&output.stdout), String::from_utf8_lossy(&output.stderr));
-        panic!()
-    }
-}
-
-#[cfg(feature = "dist-server")]
-fn wait_for_http(url: HTTPUrl, interval: Duration, max_wait: Duration) {
-    // TODO: after upgrading to reqwest >= 0.9, use 'danger_accept_invalid_certs' and stick with that rather than tcp
-    wait_for(
-        || {
-            let url = url.to_url();
-            let url = url.socket_addrs(|| None).unwrap();
-            match net::TcpStream::connect(url.as_slice()) {
-                Ok(_) => Ok(()),
-                Err(e) => Err(e.to_string()),
-            }
-        },
-        interval,
-        max_wait,
-    )
+pub fn write_json_cfg<T: Serialize>(path: &Path, filename: &str, contents: &T) {
+    let p = path.join(filename);
+    let mut f = fs::File::create(p).unwrap();
+    f.write_all(&serde_json::to_vec(contents).unwrap()).unwrap();
 }
 
-fn wait_for<F: Fn() -> Result<(), String>>(f: F, interval: Duration, max_wait: Duration) {
-    let start = Instant::now();
-    let mut lasterr;
-    loop {
-        match f() {
-            Ok(()) => return,
-            Err(e) => lasterr = e,
-        }
-        if start.elapsed() > max_wait {
-            break;
-        }
-        thread::sleep(interval)
-    }
-    panic!("wait timed out, last error result: {}", lasterr)
+pub fn write_source(path: &Path, filename: &str, contents: &str) {
+    let p = path.join(filename);
+    let mut f = fs::File::create(p).unwrap();
+    f.write_all(contents.as_bytes()).unwrap();
 }
diff --git a/tests/system.rs b/tests/system.rs
index d4ef4fd63..9af481351 100644
--- a/tests/system.rs
+++ b/tests/system.rs
@@ -20,8 +20,8 @@
 #[macro_use]
 extern crate log;
 use crate::harness::{
-    get_stats, sccache_client_cfg, sccache_command, start_local_daemon, stop_local_daemon,
-    write_json_cfg, write_source, zero_stats,
+    client::{sccache_client_cfg, SccacheClient},
+    write_json_cfg, write_source,
 };
 use assert_cmd::prelude::*;
 use fs::File;
@@ -211,7 +211,7 @@ fn copy_to_tempdir(inputs: &[&str], tempdir: &Path) {
     }
 }
 
-fn test_basic_compile(compiler: Compiler, tempdir: &Path) {
+fn test_basic_compile(client: &SccacheClient, compiler: Compiler, tempdir: &Path) {
     let Compiler {
         name,
         exe,
@@ -223,7 +223,8 @@ fn test_basic_compile(compiler: Compiler, tempdir: &Path) {
 
     let out_file = tempdir.join(OUTPUT);
     trace!("compile");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new()))
         .current_dir(tempdir)
         .envs(env_vars.clone())
@@ -231,18 +232,18 @@ fn test_basic_compile(compiler: Compiler, tempdir: &Path) {
         .success();
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     trace!("request stats");
-    get_stats(|info| {
-        assert_eq!(1, info.stats.compile_requests);
-        assert_eq!(1, info.stats.requests_executed);
-        assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
-        let adv_key = adv_key_kind("c", compiler.name);
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(1, stats.compile_requests);
+    assert_eq!(1, stats.requests_executed);
+    assert_eq!(0, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_misses.get("C/C++").unwrap());
+    let adv_key = adv_key_kind("c", compiler.name);
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_key).unwrap());
     trace!("compile");
     fs::remove_file(&out_file).unwrap();
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new()))
         .current_dir(tempdir)
         .envs(env_vars)
@@ -250,20 +251,19 @@ fn test_basic_compile(compiler: Compiler, tempdir: &Path) {
         .success();
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     trace!("request stats");
-    get_stats(|info| {
-        assert_eq!(2, info.stats.compile_requests);
-        assert_eq!(2, info.stats.requests_executed);
-        assert_eq!(1, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("C/C++").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
-        let adv_key = adv_key_kind("c", compiler.name);
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_key).unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(2, stats.compile_requests);
+    assert_eq!(2, stats.requests_executed);
+    assert_eq!(1, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("C/C++").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("C/C++").unwrap());
+    let adv_key = adv_key_kind("c", compiler.name);
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_key).unwrap());
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_key).unwrap());
 }
 
-fn test_noncacheable_stats(compiler: Compiler, tempdir: &Path) {
+fn test_noncacheable_stats(client: &SccacheClient, compiler: Compiler, tempdir: &Path) {
     let Compiler {
         name,
         exe,
@@ -273,7 +273,8 @@ fn test_noncacheable_stats(compiler: Compiler, tempdir: &Path) {
     copy_to_tempdir(&[INPUT], tempdir);
 
     trace!("compile");
-    sccache_command()
+    client
+        .cmd()
         .arg(&exe)
         .arg("-E")
         .arg(INPUT)
@@ -282,15 +283,14 @@ fn test_noncacheable_stats(compiler: Compiler, tempdir: &Path) {
         .assert()
         .success();
     trace!("request stats");
-    get_stats(|info| {
-        assert_eq!(1, info.stats.compile_requests);
-        assert_eq!(0, info.stats.requests_executed);
-        assert_eq!(1, info.stats.not_cached.len());
-        assert_eq!(Some(&1), info.stats.not_cached.get("-E"));
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(1, stats.compile_requests);
+    assert_eq!(0, stats.requests_executed);
+    assert_eq!(1, stats.not_cached.len());
+    assert_eq!(Some(&1), stats.not_cached.get("-E"));
 }
 
-fn test_msvc_deps(compiler: Compiler, tempdir: &Path) {
+fn test_msvc_deps(client: &SccacheClient, compiler: Compiler, tempdir: &Path) {
     let Compiler {
         name,
         exe,
@@ -300,7 +300,8 @@ fn test_msvc_deps(compiler: Compiler, tempdir: &Path) {
     trace!("compile with /sourceDependencies");
     let mut args = compile_cmdline(name, exe, INPUT, OUTPUT, Vec::new());
     args.push("/sourceDependenciestest.o.json".into());
-    sccache_command()
+    client
+        .cmd()
         .args(&args)
         .current_dir(tempdir)
         .envs(env_vars)
@@ -321,7 +322,7 @@ fn test_msvc_deps(compiler: Compiler, tempdir: &Path) {
     assert_ne!(includes.len(), 0);
 }
 
-fn test_msvc_responsefile(compiler: Compiler, tempdir: &Path) {
+fn test_msvc_responsefile(client: &SccacheClient, compiler: Compiler, tempdir: &Path) {
     let Compiler {
         name: _,
         exe,
@@ -337,7 +338,8 @@ fn test_msvc_responsefile(compiler: Compiler, tempdir: &Path) {
     }
 
     let args = vec_from!(OsString, exe, &format!("@{cmd_file_name}"));
-    sccache_command()
+    client
+        .cmd()
         .args(&args)
         .current_dir(tempdir)
         .envs(env_vars)
@@ -348,7 +350,7 @@ fn test_msvc_responsefile(compiler: Compiler, tempdir: &Path) {
     fs::remove_file(&out_file).unwrap();
 }
 
-fn test_gcc_mp_werror(compiler: Compiler, tempdir: &Path) {
+fn test_gcc_mp_werror(client: &SccacheClient, compiler: Compiler, tempdir: &Path) {
     let Compiler {
         name,
         exe,
@@ -360,7 +362,8 @@ fn test_gcc_mp_werror(compiler: Compiler, tempdir: &Path) {
         OsString, "-MD", "-MP", "-MF", "foo.pp", "-Werror"
     ));
     // This should fail, but the error should be from the #error!
-    sccache_command()
+    client
+        .cmd()
         .args(&args)
         .current_dir(tempdir)
         .envs(env_vars)
@@ -373,14 +376,18 @@ fn test_gcc_mp_werror(compiler: Compiler, tempdir: &Path) {
         );
 }
 
-fn test_gcc_fprofile_generate_source_changes(compiler: Compiler, tempdir: &Path) {
+fn test_gcc_fprofile_generate_source_changes(
+    client: &SccacheClient,
+    compiler: Compiler,
+    tempdir: &Path,
+) {
     let Compiler {
         name,
         exe,
         env_vars,
     } = compiler;
     trace!("test -fprofile-generate with different source inputs");
-    zero_stats();
+    client.zero_stats();
     const SRC: &str = "source.c";
     write_source(
         tempdir,
@@ -398,31 +405,31 @@ int main(int argc, char** argv) {
     let mut args = compile_cmdline(name, exe, SRC, OUTPUT, Vec::new());
     args.extend(vec_from!(OsString, "-fprofile-generate"));
     trace!("compile source.c (1)");
-    sccache_command()
+    client
+        .cmd()
         .args(&args)
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
-    get_stats(|info| {
-        assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(0, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_misses.get("C/C++").unwrap());
     // Compile the same source again to ensure we can get a cache hit.
     trace!("compile source.c (2)");
-    sccache_command()
+    client
+        .cmd()
         .args(&args)
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
-    get_stats(|info| {
-        assert_eq!(1, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("C/C++").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(1, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("C/C++").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("C/C++").unwrap());
     // Now write out a slightly different source file that will preprocess to the same thing,
     // modulo line numbers. This should not be a cache hit because line numbers are important
     // with -fprofile-generate.
@@ -441,18 +448,18 @@ int main(int argc, char** argv) {
 ",
     );
     trace!("compile source.c (3)");
-    sccache_command()
+    client
+        .cmd()
         .args(&args)
         .current_dir(tempdir)
         .envs(env_vars)
         .assert()
         .success();
-    get_stats(|info| {
-        assert_eq!(1, info.stats.cache_hits.all());
-        assert_eq!(2, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("C/C++").unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get("C/C++").unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(1, stats.cache_hits.all());
+    assert_eq!(2, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("C/C++").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("C/C++").unwrap());
 }
 
 /* test case like this:
@@ -463,66 +470,74 @@ int main(int argc, char** argv) {
     sccache g++ -c -g -gsplit-dwarf test.cc -o test2.o   --- > cache miss
     strings test2.o |grep test2.dwo
 */
-fn test_split_dwarf_object_generate_output_dir_changes(compiler: Compiler, tempdir: &Path) {
+fn test_split_dwarf_object_generate_output_dir_changes(
+    client: &SccacheClient,
+    compiler: Compiler,
+    tempdir: &Path,
+) {
     let Compiler {
         name,
         exe,
         env_vars,
     } = compiler;
     trace!("test -g -gsplit-dwarf with different output");
-    zero_stats();
+    client.zero_stats();
     const SRC: &str = "source.c";
     write_source(tempdir, SRC, "int test(){}");
     let mut args = compile_cmdline(name, exe.clone(), SRC, "test1.o", Vec::new());
     args.extend(vec_from!(OsString, "-g"));
     args.extend(vec_from!(OsString, "-gsplit-dwarf"));
     trace!("compile source.c (1)");
-    sccache_command()
+    client
+        .cmd()
         .args(&args)
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
-    get_stats(|info| {
-        assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(0, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_misses.get("C/C++").unwrap());
     // Compile the same source again to ensure we can get a cache hit.
     trace!("compile source.c (2)");
-    sccache_command()
+    client
+        .cmd()
         .args(&args)
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
-    get_stats(|info| {
-        assert_eq!(1, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("C/C++").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(1, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("C/C++").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("C/C++").unwrap());
     // Compile the same source again with different output
     // to ensure we can force generate new object file.
     let mut args2 = compile_cmdline(name, exe, SRC, "test2.o", Vec::new());
     args2.extend(vec_from!(OsString, "-g"));
     args2.extend(vec_from!(OsString, "-gsplit-dwarf"));
     trace!("compile source.c (2)");
-    sccache_command()
+    client
+        .cmd()
         .args(&args2)
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
-    get_stats(|info| {
-        assert_eq!(1, info.stats.cache_hits.all());
-        assert_eq!(2, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("C/C++").unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get("C/C++").unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(1, stats.cache_hits.all());
+    assert_eq!(2, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("C/C++").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("C/C++").unwrap());
 }
 
-fn test_gcc_clang_no_warnings_from_macro_expansion(compiler: Compiler, tempdir: &Path) {
+fn test_gcc_clang_no_warnings_from_macro_expansion(
+    client: &SccacheClient,
+    compiler: Compiler,
+    tempdir: &Path,
+) {
     let Compiler {
         name,
         exe,
@@ -533,7 +548,8 @@ fn test_gcc_clang_no_warnings_from_macro_expansion(compiler: Compiler, tempdir:
     copy_to_tempdir(&[INPUT_MACRO_EXPANSION], tempdir);
 
     trace!("compile");
-    sccache_command()
+    client
+        .cmd()
         .args(
             [
                 &compile_cmdline(name, exe, INPUT_MACRO_EXPANSION, OUTPUT, Vec::new())[..],
@@ -548,7 +564,7 @@ fn test_gcc_clang_no_warnings_from_macro_expansion(compiler: Compiler, tempdir:
         .stderr(predicates::str::contains("warning:").from_utf8().not());
 }
 
-fn test_compile_with_define(compiler: Compiler, tempdir: &Path) {
+fn test_compile_with_define(client: &SccacheClient, compiler: Compiler, tempdir: &Path) {
     let Compiler {
         name,
         exe,
@@ -559,7 +575,8 @@ fn test_compile_with_define(compiler: Compiler, tempdir: &Path) {
     copy_to_tempdir(&[INPUT_WITH_DEFINE], tempdir);
 
     trace!("compile");
-    sccache_command()
+    client
+        .cmd()
         .args(
             [
                 &compile_cmdline(name, exe, INPUT_WITH_DEFINE, OUTPUT, Vec::new())[..],
@@ -574,7 +591,7 @@ fn test_compile_with_define(compiler: Compiler, tempdir: &Path) {
         .stderr(predicates::str::contains("warning:").from_utf8().not());
 }
 
-fn test_gcc_clang_depfile(compiler: Compiler, tempdir: &Path) {
+fn test_gcc_clang_depfile(client: &SccacheClient, compiler: Compiler, tempdir: &Path) {
     let Compiler {
         name,
         exe,
@@ -585,7 +602,8 @@ fn test_gcc_clang_depfile(compiler: Compiler, tempdir: &Path) {
     fs::copy(tempdir.join(INPUT), tempdir.join("same-content.c")).unwrap();
 
     trace!("compile");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cmdline(
             name,
             exe.clone(),
@@ -598,7 +616,8 @@ fn test_gcc_clang_depfile(compiler: Compiler, tempdir: &Path) {
         .envs(env_vars.clone())
         .assert()
         .success();
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cmdline(
             name,
             exe,
@@ -624,26 +643,31 @@ fn test_gcc_clang_depfile(compiler: Compiler, tempdir: &Path) {
     assert_ne!(first, second);
 }
 
-fn run_sccache_command_tests(compiler: Compiler, tempdir: &Path, preprocessor_cache_mode: bool) {
+fn run_sccache_command_tests(
+    client: &SccacheClient,
+    compiler: Compiler,
+    tempdir: &Path,
+    preprocessor_cache_mode: bool,
+) {
     if compiler.name != "clang++" {
-        test_basic_compile(compiler.clone(), tempdir);
+        test_basic_compile(client, compiler.clone(), tempdir);
     }
-    test_compile_with_define(compiler.clone(), tempdir);
+    test_compile_with_define(client, compiler.clone(), tempdir);
     if compiler.name == "cl.exe" {
-        test_msvc_deps(compiler.clone(), tempdir);
-        test_msvc_responsefile(compiler.clone(), tempdir);
+        test_msvc_deps(client, compiler.clone(), tempdir);
+        test_msvc_responsefile(client, compiler.clone(), tempdir);
     }
     if compiler.name == "gcc" {
-        test_gcc_mp_werror(compiler.clone(), tempdir);
-        test_gcc_fprofile_generate_source_changes(compiler.clone(), tempdir);
+        test_gcc_mp_werror(client, compiler.clone(), tempdir);
+        test_gcc_fprofile_generate_source_changes(client, compiler.clone(), tempdir);
     }
     if compiler.name == "clang" || compiler.name == "gcc" {
-        test_gcc_clang_no_warnings_from_macro_expansion(compiler.clone(), tempdir);
-        test_split_dwarf_object_generate_output_dir_changes(compiler.clone(), tempdir);
-        test_gcc_clang_depfile(compiler.clone(), tempdir);
+        test_gcc_clang_no_warnings_from_macro_expansion(client, compiler.clone(), tempdir);
+        test_split_dwarf_object_generate_output_dir_changes(client, compiler.clone(), tempdir);
+        test_gcc_clang_depfile(client, compiler.clone(), tempdir);
     }
     if compiler.name == "clang++" {
-        test_clang_multicall(compiler.clone(), tempdir);
+        test_clang_multicall(client, compiler.clone(), tempdir);
     }
 
     // If we are testing with clang-14 or later, we expect the -fminimize-whitespace flag to be used.
@@ -672,6 +696,7 @@ fn run_sccache_command_tests(compiler: Compiler, tempdir: &Path, preprocessor_ca
             ),
         };
         test_clang_cache_whitespace_normalization(
+            client,
             compiler,
             tempdir,
             !is_appleclang && major >= 14,
@@ -679,6 +704,7 @@ fn run_sccache_command_tests(compiler: Compiler, tempdir: &Path, preprocessor_ca
         );
     } else {
         test_clang_cache_whitespace_normalization(
+            client,
             compiler,
             tempdir,
             false,
@@ -687,7 +713,7 @@ fn run_sccache_command_tests(compiler: Compiler, tempdir: &Path, preprocessor_ca
     }
 }
 
-fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
+fn test_nvcc_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir: &Path) {
     let Compiler {
         name,
         exe,
@@ -699,7 +725,8 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
 
     let out_file = tempdir.join(OUTPUT);
     trace!("compile A");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cuda_cmdline(
             name,
             exe,
@@ -717,34 +744,28 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     fs::remove_file(&out_file).unwrap();
     trace!("compile A request stats");
-    get_stats(|info| {
-        assert_eq!(1, info.stats.compile_requests);
-        assert_eq!(5, info.stats.requests_executed);
-        assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(4, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
-        assert_eq!(
-            &1,
-            info.stats.cache_misses.get("CUDA (Device code)").unwrap()
-        );
-        assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
-        assert!(info.stats.cache_misses.get("C/C++").is_none());
-        let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-        let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-        let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-        let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(
-            &1,
-            info.stats.cache_misses.get_adv(&adv_cudafe_key).unwrap()
-        );
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(1, stats.compile_requests);
+    assert_eq!(5, stats.requests_executed);
+    assert_eq!(0, stats.cache_hits.all());
+    assert_eq!(4, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_misses.get("CUDA").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("CUDA (Device code)").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("PTX").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("CUBIN").unwrap());
+    assert!(stats.cache_misses.get("C/C++").is_none());
+    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
+    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
+    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
+    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
 
     trace!("compile A");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cuda_cmdline(
             name,
             exe,
@@ -762,44 +783,38 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     fs::remove_file(&out_file).unwrap();
     trace!("compile A request stats");
-    get_stats(|info| {
-        assert_eq!(2, info.stats.compile_requests);
-        assert_eq!(10, info.stats.requests_executed);
-        assert_eq!(4, info.stats.cache_hits.all());
-        assert_eq!(4, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA (Device code)").unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
-        assert_eq!(
-            &1,
-            info.stats.cache_misses.get("CUDA (Device code)").unwrap()
-        );
-        assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
-        assert!(info.stats.cache_misses.get("C/C++").is_none());
-        let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-        let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-        let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-        let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(
-            &1,
-            info.stats.cache_misses.get_adv(&adv_cudafe_key).unwrap()
-        );
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(2, stats.compile_requests);
+    assert_eq!(10, stats.requests_executed);
+    assert_eq!(4, stats.cache_hits.all());
+    assert_eq!(4, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
+    assert_eq!(&1, stats.cache_hits.get("CUDA (Device code)").unwrap());
+    assert_eq!(&1, stats.cache_hits.get("PTX").unwrap());
+    assert_eq!(&1, stats.cache_hits.get("CUBIN").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("CUDA").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("CUDA (Device code)").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("PTX").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("CUBIN").unwrap());
+    assert!(stats.cache_misses.get("C/C++").is_none());
+    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
+    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
+    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
+    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
 
     // By compiling another input source we verify that the pre-processor
     // phase is correctly running and outputting text
     trace!("compile B");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cuda_cmdline(
             name,
             exe,
@@ -817,43 +832,37 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     fs::remove_file(&out_file).unwrap();
     trace!("compile B request stats");
-    get_stats(|info| {
-        assert_eq!(3, info.stats.compile_requests);
-        assert_eq!(15, info.stats.requests_executed);
-        assert_eq!(5, info.stats.cache_hits.all());
-        assert_eq!(7, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA (Device code)").unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
-        assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
-        assert_eq!(
-            &2,
-            info.stats.cache_misses.get("CUDA (Device code)").unwrap()
-        );
-        assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
-        assert!(info.stats.cache_misses.get("C/C++").is_none());
-        let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-        let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-        let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-        let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(
-            &2,
-            info.stats.cache_misses.get_adv(&adv_cudafe_key).unwrap()
-        );
-        assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(3, stats.compile_requests);
+    assert_eq!(15, stats.requests_executed);
+    assert_eq!(5, stats.cache_hits.all());
+    assert_eq!(7, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
+    assert_eq!(&1, stats.cache_hits.get("CUDA (Device code)").unwrap());
+    assert_eq!(&1, stats.cache_hits.get("PTX").unwrap());
+    assert_eq!(&2, stats.cache_hits.get("CUBIN").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("CUDA").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("CUDA (Device code)").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("PTX").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("CUBIN").unwrap());
+    assert!(stats.cache_misses.get("C/C++").is_none());
+    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
+    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
+    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
+    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&2, stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
+    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&2, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
 
     trace!("compile ptx");
     let out_file = tempdir.join("test.ptx");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cuda_cmdline(
             name,
             exe,
@@ -870,43 +879,37 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     fs::remove_file(&out_file).unwrap();
     trace!("compile ptx request stats");
-    get_stats(|info| {
-        assert_eq!(4, info.stats.compile_requests);
-        assert_eq!(17, info.stats.requests_executed);
-        assert_eq!(5, info.stats.cache_hits.all());
-        assert_eq!(8, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA (Device code)").unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
-        assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
-        assert_eq!(
-            &2,
-            info.stats.cache_misses.get("CUDA (Device code)").unwrap()
-        );
-        assert_eq!(&3, info.stats.cache_misses.get("PTX").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
-        assert!(info.stats.cache_misses.get("C/C++").is_none());
-        let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-        let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-        let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-        let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(
-            &2,
-            info.stats.cache_misses.get_adv(&adv_cudafe_key).unwrap()
-        );
-        assert_eq!(&3, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(4, stats.compile_requests);
+    assert_eq!(17, stats.requests_executed);
+    assert_eq!(5, stats.cache_hits.all());
+    assert_eq!(8, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
+    assert_eq!(&1, stats.cache_hits.get("CUDA (Device code)").unwrap());
+    assert_eq!(&1, stats.cache_hits.get("PTX").unwrap());
+    assert_eq!(&2, stats.cache_hits.get("CUBIN").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("CUDA").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("CUDA (Device code)").unwrap());
+    assert_eq!(&3, stats.cache_misses.get("PTX").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("CUBIN").unwrap());
+    assert!(stats.cache_misses.get("C/C++").is_none());
+    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
+    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
+    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
+    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&2, stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
+    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&3, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
 
     trace!("compile cubin");
     let out_file = tempdir.join("test.cubin");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cuda_cmdline(
             name,
             exe,
@@ -923,39 +926,32 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     fs::remove_file(&out_file).unwrap();
     trace!("compile cubin request stats");
-    get_stats(|info| {
-        assert_eq!(5, info.stats.compile_requests);
-        assert_eq!(20, info.stats.requests_executed);
-        assert_eq!(6, info.stats.cache_hits.all());
-        assert_eq!(9, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA (Device code)").unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
-        assert_eq!(&3, info.stats.cache_hits.get("CUBIN").unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
-        assert_eq!(
-            &2,
-            info.stats.cache_misses.get("CUDA (Device code)").unwrap()
-        );
-        assert_eq!(&4, info.stats.cache_misses.get("PTX").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
-        assert!(info.stats.cache_misses.get("C/C++").is_none());
-        let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-        let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-        let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-        let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&3, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(
-            &2,
-            info.stats.cache_misses.get_adv(&adv_cudafe_key).unwrap()
-        );
-        assert_eq!(&4, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(5, stats.compile_requests);
+    assert_eq!(20, stats.requests_executed);
+    assert_eq!(6, stats.cache_hits.all());
+    assert_eq!(9, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
+    assert_eq!(&1, stats.cache_hits.get("CUDA (Device code)").unwrap());
+    assert_eq!(&1, stats.cache_hits.get("PTX").unwrap());
+    assert_eq!(&3, stats.cache_hits.get("CUBIN").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("CUDA").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("CUDA (Device code)").unwrap());
+    assert_eq!(&4, stats.cache_misses.get("PTX").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("CUBIN").unwrap());
+    assert!(stats.cache_misses.get("C/C++").is_none());
+    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
+    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
+    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
+    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&3, stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
+    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&4, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
 
     // Test to ensure #2299 doesn't regress (https://github.com/mozilla/sccache/issues/2299)
     let test_2299_src_name = "test_2299.cu";
@@ -979,7 +975,8 @@ int main(int argc, char** argv) {
 ";
     write_source(tempdir, test_2299_src_name, test_2299_cu_src_1);
     trace!("compile test_2299.cu (1)");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cuda_cmdline(
             name,
             exe,
@@ -1003,43 +1000,37 @@ int main(int argc, char** argv) {
         .unwrap());
     fs::remove_file(&test_2299_out_file).unwrap();
     trace!("compile test_2299.cu request stats (1)");
-    get_stats(|info| {
-        assert_eq!(6, info.stats.compile_requests);
-        assert_eq!(25, info.stats.requests_executed);
-        assert_eq!(6, info.stats.cache_hits.all());
-        assert_eq!(13, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA (Device code)").unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
-        assert_eq!(&3, info.stats.cache_hits.get("CUBIN").unwrap());
-        assert_eq!(&3, info.stats.cache_misses.get("CUDA").unwrap());
-        assert_eq!(
-            &3,
-            info.stats.cache_misses.get("CUDA (Device code)").unwrap()
-        );
-        assert_eq!(&5, info.stats.cache_misses.get("PTX").unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get("CUBIN").unwrap());
-        assert!(info.stats.cache_misses.get("C/C++").is_none());
-        let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-        let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-        let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-        let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&3, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
-        assert_eq!(&3, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(
-            &3,
-            info.stats.cache_misses.get_adv(&adv_cudafe_key).unwrap()
-        );
-        assert_eq!(&5, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(6, stats.compile_requests);
+    assert_eq!(25, stats.requests_executed);
+    assert_eq!(6, stats.cache_hits.all());
+    assert_eq!(13, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
+    assert_eq!(&1, stats.cache_hits.get("CUDA (Device code)").unwrap());
+    assert_eq!(&1, stats.cache_hits.get("PTX").unwrap());
+    assert_eq!(&3, stats.cache_hits.get("CUBIN").unwrap());
+    assert_eq!(&3, stats.cache_misses.get("CUDA").unwrap());
+    assert_eq!(&3, stats.cache_misses.get("CUDA (Device code)").unwrap());
+    assert_eq!(&5, stats.cache_misses.get("PTX").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("CUBIN").unwrap());
+    assert!(stats.cache_misses.get("C/C++").is_none());
+    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
+    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
+    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
+    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&3, stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
+    assert_eq!(&3, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&3, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&5, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
 
     write_source(tempdir, test_2299_src_name, test_2299_cu_src_2);
     trace!("compile test_2299.cu (2)");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cuda_cmdline(
             name,
             exe,
@@ -1063,44 +1054,38 @@ int main(int argc, char** argv) {
         .unwrap());
     fs::remove_file(&test_2299_out_file).unwrap();
     trace!("compile test_2299.cu request stats (2)");
-    get_stats(|info| {
-        assert_eq!(7, info.stats.compile_requests);
-        assert_eq!(30, info.stats.requests_executed);
-        assert_eq!(8, info.stats.cache_hits.all());
-        assert_eq!(15, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA (Device code)").unwrap());
-        assert_eq!(&2, info.stats.cache_hits.get("PTX").unwrap());
-        assert_eq!(&4, info.stats.cache_hits.get("CUBIN").unwrap());
-        assert_eq!(&4, info.stats.cache_misses.get("CUDA").unwrap());
-        assert_eq!(
-            &4,
-            info.stats.cache_misses.get("CUDA (Device code)").unwrap()
-        );
-        assert_eq!(&5, info.stats.cache_misses.get("PTX").unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get("CUBIN").unwrap());
-        assert!(info.stats.cache_misses.get("C/C++").is_none());
-        let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-        let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-        let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-        let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
-        assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&4, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
-        assert_eq!(&4, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(
-            &4,
-            info.stats.cache_misses.get_adv(&adv_cudafe_key).unwrap()
-        );
-        assert_eq!(&5, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(7, stats.compile_requests);
+    assert_eq!(30, stats.requests_executed);
+    assert_eq!(8, stats.cache_hits.all());
+    assert_eq!(15, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
+    assert_eq!(&1, stats.cache_hits.get("CUDA (Device code)").unwrap());
+    assert_eq!(&2, stats.cache_hits.get("PTX").unwrap());
+    assert_eq!(&4, stats.cache_hits.get("CUBIN").unwrap());
+    assert_eq!(&4, stats.cache_misses.get("CUDA").unwrap());
+    assert_eq!(&4, stats.cache_misses.get("CUDA (Device code)").unwrap());
+    assert_eq!(&5, stats.cache_misses.get("PTX").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("CUBIN").unwrap());
+    assert!(stats.cache_misses.get("C/C++").is_none());
+    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
+    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
+    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
+    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&2, stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&4, stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
+    assert_eq!(&4, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&4, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&5, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
 
     // Recompile the original version again to ensure only cache hits
     write_source(tempdir, test_2299_src_name, test_2299_cu_src_1);
     trace!("compile test_2299.cu (3)");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cuda_cmdline(
             name,
             exe,
@@ -1124,48 +1109,44 @@ int main(int argc, char** argv) {
         .unwrap());
     fs::remove_file(&test_2299_out_file).unwrap();
     trace!("compile test_2299.cu request stats (3)");
-    get_stats(|info| {
-        assert_eq!(8, info.stats.compile_requests);
-        assert_eq!(35, info.stats.requests_executed);
-        assert_eq!(12, info.stats.cache_hits.all());
-        assert_eq!(15, info.stats.cache_misses.all());
-        assert_eq!(&2, info.stats.cache_hits.get("CUDA").unwrap());
-        assert_eq!(&2, info.stats.cache_hits.get("CUDA (Device code)").unwrap());
-        assert_eq!(&3, info.stats.cache_hits.get("PTX").unwrap());
-        assert_eq!(&5, info.stats.cache_hits.get("CUBIN").unwrap());
-        assert_eq!(&4, info.stats.cache_misses.get("CUDA").unwrap());
-        assert_eq!(
-            &4,
-            info.stats.cache_misses.get("CUDA (Device code)").unwrap()
-        );
-        assert_eq!(&5, info.stats.cache_misses.get("PTX").unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get("CUBIN").unwrap());
-        assert!(info.stats.cache_misses.get("C/C++").is_none());
-        let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-        let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-        let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-        let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-        assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
-        assert_eq!(&3, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&5, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
-        assert_eq!(&4, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(
-            &4,
-            info.stats.cache_misses.get_adv(&adv_cudafe_key).unwrap()
-        );
-        assert_eq!(&5, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(8, stats.compile_requests);
+    assert_eq!(35, stats.requests_executed);
+    assert_eq!(12, stats.cache_hits.all());
+    assert_eq!(15, stats.cache_misses.all());
+    assert_eq!(&2, stats.cache_hits.get("CUDA").unwrap());
+    assert_eq!(&2, stats.cache_hits.get("CUDA (Device code)").unwrap());
+    assert_eq!(&3, stats.cache_hits.get("PTX").unwrap());
+    assert_eq!(&5, stats.cache_hits.get("CUBIN").unwrap());
+    assert_eq!(&4, stats.cache_misses.get("CUDA").unwrap());
+    assert_eq!(&4, stats.cache_misses.get("CUDA (Device code)").unwrap());
+    assert_eq!(&5, stats.cache_misses.get("PTX").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("CUBIN").unwrap());
+    assert!(stats.cache_misses.get("C/C++").is_none());
+    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
+    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
+    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
+    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
+    assert_eq!(&2, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&2, stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&3, stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&5, stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
+    assert_eq!(&4, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&4, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
+    assert_eq!(&5, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
+    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
 }
 
-fn test_nvcc_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) {
+fn test_nvcc_proper_lang_stat_tracking(
+    client: &SccacheClient,
+    compiler: &Compiler,
+    tempdir: &Path,
+) {
     let Compiler {
         name,
         exe,
         env_vars,
     } = compiler;
-    zero_stats();
 
     println!("test_nvcc_proper_lang_stat_tracking: {}", name);
     // Compile multiple source files.
@@ -1173,10 +1154,11 @@ fn test_nvcc_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) {
 
     let out_file = tempdir.join(OUTPUT);
     trace!("compile CUDA A");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cmdline(
             name,
-            &exe,
+            exe,
             INPUT_FOR_CUDA_C,
             OUTPUT,
             Vec::new(),
@@ -1187,10 +1169,11 @@ fn test_nvcc_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) {
         .success();
     fs::remove_file(&out_file).unwrap();
     trace!("compile CUDA A");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cmdline(
             name,
-            &exe,
+            exe,
             INPUT_FOR_CUDA_C,
             OUTPUT,
             Vec::new(),
@@ -1201,48 +1184,48 @@ fn test_nvcc_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) {
         .success();
     fs::remove_file(&out_file).unwrap();
     trace!("compile C++ A");
-    sccache_command()
-        .args(compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new()))
+    client
+        .cmd()
+        .args(compile_cmdline(name, exe, INPUT, OUTPUT, Vec::new()))
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
     fs::remove_file(&out_file).unwrap();
     trace!("compile C++ A");
-    sccache_command()
-        .args(compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new()))
+    client
+        .cmd()
+        .args(compile_cmdline(name, exe, INPUT, OUTPUT, Vec::new()))
         .current_dir(tempdir)
-        .envs(env_vars)
+        .envs(env_vars.clone())
         .assert()
         .success();
     fs::remove_file(&out_file).unwrap();
 
     trace!("request stats");
-    get_stats(|info| {
-        assert_eq!(4, info.stats.compile_requests);
-        assert_eq!(14, info.stats.requests_executed);
-        assert_eq!(6, info.stats.cache_hits.all());
-        assert_eq!(4, info.stats.cache_misses.all());
-        assert!(info.stats.cache_hits.get("C/C++").is_none());
-        assert_eq!(&2, info.stats.cache_hits.get("CUDA").unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA (Device code)").unwrap());
-        assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
-        assert!(info.stats.cache_misses.get("C/C++").is_none());
-        assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
-        assert_eq!(
-            &1,
-            info.stats.cache_misses.get("CUDA (Device code)").unwrap()
-        );
-        assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(4, stats.compile_requests);
+    assert_eq!(14, stats.requests_executed);
+    assert_eq!(6, stats.cache_hits.all());
+    assert_eq!(4, stats.cache_misses.all());
+    assert!(stats.cache_hits.get("C/C++").is_none());
+    assert_eq!(&2, stats.cache_hits.get("CUDA").unwrap());
+    assert_eq!(&1, stats.cache_hits.get("CUDA (Device code)").unwrap());
+    assert_eq!(&2, stats.cache_hits.get("CUBIN").unwrap());
+    assert!(stats.cache_misses.get("C/C++").is_none());
+    assert_eq!(&2, stats.cache_misses.get("CUDA").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("CUDA (Device code)").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("PTX").unwrap());
 }
 
-fn run_sccache_nvcc_cuda_command_tests(compiler: Compiler, tempdir: &Path) {
-    test_nvcc_cuda_compiles(&compiler, tempdir);
-    test_nvcc_proper_lang_stat_tracking(compiler, tempdir);
+fn run_sccache_nvcc_cuda_command_tests(client: &SccacheClient, compiler: Compiler, tempdir: &Path) {
+    client.zero_stats();
+    test_nvcc_cuda_compiles(client, &compiler, tempdir);
+    client.zero_stats();
+    test_nvcc_proper_lang_stat_tracking(client, &compiler, tempdir);
 }
 
-fn test_clang_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
+fn test_clang_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir: &Path) {
     let Compiler {
         name,
         exe,
@@ -1254,7 +1237,8 @@ fn test_clang_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
 
     let out_file = tempdir.join(OUTPUT);
     trace!("compile A");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cuda_cmdline(
             name,
             exe,
@@ -1269,18 +1253,18 @@ fn test_clang_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
         .success();
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     trace!("request stats");
-    get_stats(|info| {
-        assert_eq!(1, info.stats.compile_requests);
-        assert_eq!(1, info.stats.requests_executed);
-        assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
-        let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(1, stats.compile_requests);
+    assert_eq!(1, stats.requests_executed);
+    assert_eq!(0, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_misses.get("CUDA").unwrap());
+    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
     trace!("compile A");
     fs::remove_file(&out_file).unwrap();
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cuda_cmdline(
             name,
             exe,
@@ -1295,21 +1279,21 @@ fn test_clang_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
         .success();
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     trace!("request stats");
-    get_stats(|info| {
-        assert_eq!(2, info.stats.compile_requests);
-        assert_eq!(2, info.stats.requests_executed);
-        assert_eq!(1, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
-        let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(2, stats.compile_requests);
+    assert_eq!(2, stats.requests_executed);
+    assert_eq!(1, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("CUDA").unwrap());
+    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
     // By compiling another input source we verify that the pre-processor
     // phase is correctly running and outputting text
     trace!("compile B");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cuda_cmdline(
             name,
             exe,
@@ -1324,26 +1308,28 @@ fn test_clang_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
         .success();
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     trace!("request stats");
-    get_stats(|info| {
-        assert_eq!(3, info.stats.compile_requests);
-        assert_eq!(3, info.stats.requests_executed);
-        assert_eq!(1, info.stats.cache_hits.all());
-        assert_eq!(2, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
-        let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(3, stats.compile_requests);
+    assert_eq!(3, stats.requests_executed);
+    assert_eq!(1, stats.cache_hits.all());
+    assert_eq!(2, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("CUDA").unwrap());
+    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
+    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
 }
 
-fn test_clang_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) {
+fn test_clang_proper_lang_stat_tracking(
+    client: &SccacheClient,
+    compiler: &Compiler,
+    tempdir: &Path,
+) {
     let Compiler {
         name,
         exe,
         env_vars,
     } = compiler;
-    zero_stats();
 
     println!("test_clang_proper_lang_stat_tracking: {}", name);
     // Compile multiple source files.
@@ -1351,10 +1337,11 @@ fn test_clang_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) {
 
     let out_file = tempdir.join(OUTPUT);
     trace!("compile CUDA A");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cuda_cmdline(
             name,
-            &exe,
+            exe,
             "-c",
             INPUT_FOR_CUDA_C,
             OUTPUT,
@@ -1366,10 +1353,11 @@ fn test_clang_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) {
         .success();
     fs::remove_file(&out_file).unwrap();
     trace!("compile CUDA A");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cuda_cmdline(
             name,
-            &exe,
+            exe,
             "-c",
             INPUT_FOR_CUDA_C,
             OUTPUT,
@@ -1381,41 +1369,48 @@ fn test_clang_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) {
         .success();
     fs::remove_file(&out_file).unwrap();
     trace!("compile C++ A");
-    sccache_command()
-        .args(compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new()))
+    client
+        .cmd()
+        .args(compile_cmdline(name, exe, INPUT, OUTPUT, Vec::new()))
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
     fs::remove_file(&out_file).unwrap();
     trace!("compile C++ A");
-    sccache_command()
-        .args(compile_cmdline(name, &exe, INPUT, OUTPUT, Vec::new()))
+    client
+        .cmd()
+        .args(compile_cmdline(name, exe, INPUT, OUTPUT, Vec::new()))
         .current_dir(tempdir)
-        .envs(env_vars)
+        .envs(env_vars.clone())
         .assert()
         .success();
     fs::remove_file(&out_file).unwrap();
 
     trace!("request stats");
-    get_stats(|info| {
-        assert_eq!(4, info.stats.compile_requests);
-        assert_eq!(4, info.stats.requests_executed);
-        assert_eq!(2, info.stats.cache_hits.all());
-        assert_eq!(2, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("C/C++").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
-        assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(4, stats.compile_requests);
+    assert_eq!(4, stats.requests_executed);
+    assert_eq!(2, stats.cache_hits.all());
+    assert_eq!(2, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("C/C++").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("C/C++").unwrap());
+    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("CUDA").unwrap());
 }
 
-fn run_sccache_clang_cuda_command_tests(compiler: Compiler, tempdir: &Path) {
-    test_clang_cuda_compiles(&compiler, tempdir);
-    test_clang_proper_lang_stat_tracking(compiler, tempdir);
+fn run_sccache_clang_cuda_command_tests(
+    client: &SccacheClient,
+    compiler: Compiler,
+    tempdir: &Path,
+) {
+    client.zero_stats();
+    test_clang_cuda_compiles(client, &compiler, tempdir);
+    client.zero_stats();
+    test_clang_proper_lang_stat_tracking(client, &compiler, tempdir);
 }
 
-fn test_hip_compiles(compiler: &Compiler, tempdir: &Path) {
+fn test_hip_compiles(client: &SccacheClient, compiler: &Compiler, tempdir: &Path) {
     let Compiler {
         name,
         exe,
@@ -1429,7 +1424,8 @@ fn test_hip_compiles(compiler: &Compiler, tempdir: &Path) {
 
     let out_file = tempdir.join(OUTPUT);
     trace!("compile A");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_hip_cmdline(
             name,
             exe,
@@ -1444,18 +1440,18 @@ fn test_hip_compiles(compiler: &Compiler, tempdir: &Path) {
         .success();
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     trace!("request stats");
-    get_stats(|info| {
-        assert_eq!(1, info.stats.compile_requests);
-        assert_eq!(1, info.stats.requests_executed);
-        assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_misses.get("HIP").unwrap());
-        let adv_hip_key = adv_key_kind("hip", compiler.name);
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_hip_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(1, stats.compile_requests);
+    assert_eq!(1, stats.requests_executed);
+    assert_eq!(0, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_misses.get("HIP").unwrap());
+    let adv_hip_key = adv_key_kind("hip", compiler.name);
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_hip_key).unwrap());
     trace!("compile A");
     fs::remove_file(&out_file).unwrap();
-    sccache_command()
+    client
+        .cmd()
         .args(compile_hip_cmdline(
             name,
             exe,
@@ -1470,21 +1466,21 @@ fn test_hip_compiles(compiler: &Compiler, tempdir: &Path) {
         .success();
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     trace!("request stats");
-    get_stats(|info| {
-        assert_eq!(2, info.stats.compile_requests);
-        assert_eq!(2, info.stats.requests_executed);
-        assert_eq!(1, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("HIP").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("HIP").unwrap());
-        let adv_hip_key = adv_key_kind("hip", compiler.name);
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_hip_key).unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_hip_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(2, stats.compile_requests);
+    assert_eq!(2, stats.requests_executed);
+    assert_eq!(1, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("HIP").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("HIP").unwrap());
+    let adv_hip_key = adv_key_kind("hip", compiler.name);
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_hip_key).unwrap());
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_hip_key).unwrap());
     // By compiling another input source we verify that the pre-processor
     // phase is correctly running and outputting text
     trace!("compile B");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_hip_cmdline(
             name,
             exe,
@@ -1499,20 +1495,19 @@ fn test_hip_compiles(compiler: &Compiler, tempdir: &Path) {
         .success();
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     trace!("request stats");
-    get_stats(|info| {
-        assert_eq!(3, info.stats.compile_requests);
-        assert_eq!(3, info.stats.requests_executed);
-        assert_eq!(1, info.stats.cache_hits.all());
-        assert_eq!(2, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("HIP").unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get("HIP").unwrap());
-        let adv_hip_key = adv_key_kind("hip", compiler.name);
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_hip_key).unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_hip_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(3, stats.compile_requests);
+    assert_eq!(3, stats.requests_executed);
+    assert_eq!(1, stats.cache_hits.all());
+    assert_eq!(2, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("HIP").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("HIP").unwrap());
+    let adv_hip_key = adv_key_kind("hip", compiler.name);
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_hip_key).unwrap());
+    assert_eq!(&2, stats.cache_misses.get_adv(&adv_hip_key).unwrap());
 }
 
-fn test_hip_compiles_multi_targets(compiler: &Compiler, tempdir: &Path) {
+fn test_hip_compiles_multi_targets(client: &SccacheClient, compiler: &Compiler, tempdir: &Path) {
     let Compiler {
         name,
         exe,
@@ -1526,7 +1521,8 @@ fn test_hip_compiles_multi_targets(compiler: &Compiler, tempdir: &Path) {
 
     let out_file = tempdir.join(OUTPUT);
     trace!("compile A with gfx900 and gfx1030");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_hip_cmdline(
             name,
             exe,
@@ -1541,19 +1537,19 @@ fn test_hip_compiles_multi_targets(compiler: &Compiler, tempdir: &Path) {
         .success();
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     trace!("request stats");
-    get_stats(|info| {
-        assert_eq!(1, info.stats.compile_requests);
-        assert_eq!(1, info.stats.requests_executed);
-        assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_misses.get("HIP").unwrap());
-        let adv_hip_key = adv_key_kind("hip", compiler.name);
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_hip_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(1, stats.compile_requests);
+    assert_eq!(1, stats.requests_executed);
+    assert_eq!(0, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_misses.get("HIP").unwrap());
+    let adv_hip_key = adv_key_kind("hip", compiler.name);
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_hip_key).unwrap());
 
     trace!("compile A with with gfx900 and gfx1030 again");
     fs::remove_file(&out_file).unwrap();
-    sccache_command()
+    client
+        .cmd()
         .args(compile_hip_cmdline(
             name,
             exe,
@@ -1568,22 +1564,22 @@ fn test_hip_compiles_multi_targets(compiler: &Compiler, tempdir: &Path) {
         .success();
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     trace!("request stats");
-    get_stats(|info| {
-        assert_eq!(2, info.stats.compile_requests);
-        assert_eq!(2, info.stats.requests_executed);
-        assert_eq!(1, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("HIP").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("HIP").unwrap());
-        let adv_hip_key = adv_key_kind("hip", compiler.name);
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_hip_key).unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_hip_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(2, stats.compile_requests);
+    assert_eq!(2, stats.requests_executed);
+    assert_eq!(1, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("HIP").unwrap());
+    assert_eq!(&1, stats.cache_misses.get("HIP").unwrap());
+    let adv_hip_key = adv_key_kind("hip", compiler.name);
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_hip_key).unwrap());
+    assert_eq!(&1, stats.cache_misses.get_adv(&adv_hip_key).unwrap());
 
     // By compiling another input source we verify that the pre-processor
     // phase is correctly running and outputting text
     trace!("compile B with gfx900 and gfx1030");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_hip_cmdline(
             name,
             exe,
@@ -1598,28 +1594,27 @@ fn test_hip_compiles_multi_targets(compiler: &Compiler, tempdir: &Path) {
         .success();
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
     trace!("request stats");
-    get_stats(|info| {
-        assert_eq!(3, info.stats.compile_requests);
-        assert_eq!(3, info.stats.requests_executed);
-        assert_eq!(1, info.stats.cache_hits.all());
-        assert_eq!(2, info.stats.cache_misses.all());
-        assert_eq!(&1, info.stats.cache_hits.get("HIP").unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get("HIP").unwrap());
-        let adv_hip_key = adv_key_kind("hip", compiler.name);
-        assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_hip_key).unwrap());
-        assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_hip_key).unwrap());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(3, stats.compile_requests);
+    assert_eq!(3, stats.requests_executed);
+    assert_eq!(1, stats.cache_hits.all());
+    assert_eq!(2, stats.cache_misses.all());
+    assert_eq!(&1, stats.cache_hits.get("HIP").unwrap());
+    assert_eq!(&2, stats.cache_misses.get("HIP").unwrap());
+    let adv_hip_key = adv_key_kind("hip", compiler.name);
+    assert_eq!(&1, stats.cache_hits.get_adv(&adv_hip_key).unwrap());
+    assert_eq!(&2, stats.cache_misses.get_adv(&adv_hip_key).unwrap());
 }
 
-fn run_sccache_hip_command_tests(compiler: Compiler, tempdir: &Path) {
-    zero_stats();
-    test_hip_compiles(&compiler, tempdir);
-    zero_stats();
-    test_hip_compiles_multi_targets(&compiler, tempdir);
+fn run_sccache_hip_command_tests(client: &SccacheClient, compiler: Compiler, tempdir: &Path) {
+    client.zero_stats();
+    test_hip_compiles(client, &compiler, tempdir);
+    client.zero_stats();
+    test_hip_compiles_multi_targets(client, &compiler, tempdir);
     // test_proper_lang_stat_tracking(compiler, tempdir);
 }
 
-fn test_clang_multicall(compiler: Compiler, tempdir: &Path) {
+fn test_clang_multicall(client: &SccacheClient, compiler: Compiler, tempdir: &Path) {
     let Compiler {
         name,
         exe,
@@ -1630,7 +1625,8 @@ fn test_clang_multicall(compiler: Compiler, tempdir: &Path) {
     copy_to_tempdir(&[INPUT_CLANG_MULTICALL], tempdir);
 
     println!("compile clang_multicall");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cmdline(
             name,
             exe,
@@ -1645,6 +1641,7 @@ fn test_clang_multicall(compiler: Compiler, tempdir: &Path) {
 }
 
 fn test_clang_cache_whitespace_normalization(
+    client: &SccacheClient,
     compiler: Compiler,
     tempdir: &Path,
     hit: bool,
@@ -1659,10 +1656,11 @@ fn test_clang_cache_whitespace_normalization(
     debug!("expecting hit: {}", hit);
     // Compile a source file.
     copy_to_tempdir(&[INPUT_WITH_WHITESPACE, INPUT_WITH_WHITESPACE_ALT], tempdir);
-    zero_stats();
+    client.zero_stats();
 
     debug!("compile whitespace");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cmdline(
             name,
             &exe,
@@ -1675,15 +1673,15 @@ fn test_clang_cache_whitespace_normalization(
         .assert()
         .success();
     debug!("request stats");
-    get_stats(|info| {
-        assert_eq!(1, info.stats.compile_requests);
-        assert_eq!(1, info.stats.requests_executed);
-        assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(1, info.stats.cache_misses.all());
-    });
+    let stats = client.stats().unwrap();
+    assert_eq!(1, stats.compile_requests);
+    assert_eq!(1, stats.requests_executed);
+    assert_eq!(0, stats.cache_hits.all());
+    assert_eq!(1, stats.cache_misses.all());
 
     debug!("compile whitespace_alt");
-    sccache_command()
+    client
+        .cmd()
         .args(compile_cmdline(
             name,
             &exe,
@@ -1697,26 +1695,24 @@ fn test_clang_cache_whitespace_normalization(
         .success();
     debug!("request stats (expecting cache hit)");
     if hit {
-        get_stats(move |info| {
-            assert_eq!(2, info.stats.compile_requests);
-            assert_eq!(2, info.stats.requests_executed);
-            if preprocessor_cache_mode {
-                // Preprocessor cache mode hashes the input file, so whitespace
-                // normalization does not work.
-                assert_eq!(0, info.stats.cache_hits.all());
-                assert_eq!(2, info.stats.cache_misses.all());
-            } else {
-                assert_eq!(1, info.stats.cache_hits.all());
-                assert_eq!(1, info.stats.cache_misses.all());
-            }
-        });
+        let stats = client.stats().unwrap();
+        assert_eq!(2, stats.compile_requests);
+        assert_eq!(2, stats.requests_executed);
+        if preprocessor_cache_mode {
+            // Preprocessor cache mode hashes the input file, so whitespace
+            // normalization does not work.
+            assert_eq!(0, stats.cache_hits.all());
+            assert_eq!(2, stats.cache_misses.all());
+        } else {
+            assert_eq!(1, stats.cache_hits.all());
+            assert_eq!(1, stats.cache_misses.all());
+        }
     } else {
-        get_stats(|info| {
-            assert_eq!(2, info.stats.compile_requests);
-            assert_eq!(2, info.stats.requests_executed);
-            assert_eq!(0, info.stats.cache_hits.all());
-            assert_eq!(2, info.stats.cache_misses.all());
-        });
+        let stats = client.stats().unwrap();
+        assert_eq!(2, stats.compile_requests);
+        assert_eq!(2, stats.requests_executed);
+        assert_eq!(0, stats.cache_hits.all());
+        assert_eq!(2, stats.cache_misses.all());
     }
 }
 
@@ -1814,66 +1810,73 @@ fn find_hip_compiler() -> Option<Compiler> {
     None
 }
 
+fn make_sccache_client(
+    preprocessor_cache_mode: bool,
+) -> (Option<tempfile::TempDir>, PathBuf, SccacheClient) {
+    let tempdir = tempfile::Builder::new()
+        .prefix("sccache_system_test")
+        .tempdir()
+        .unwrap();
+
+    // Persist the tempdir if SCCACHE_DEBUG is defined
+    let (tempdir_path, maybe_tempdir) = if env::var("SCCACHE_DEBUG").is_ok() {
+        (tempdir.into_path(), None)
+    } else {
+        (tempdir.path().to_path_buf(), Some(tempdir))
+    };
+
+    // Create the configurations
+    let sccache_cfg = sccache_client_cfg(&tempdir_path, preprocessor_cache_mode);
+    write_json_cfg(&tempdir_path, "sccache-cfg.json", &sccache_cfg);
+    let sccache_cached_cfg_path = tempdir_path.join("sccache-cached-cfg");
+    // Start the server daemon on a unique port
+    let client = SccacheClient::new(
+        &tempdir_path.join("sccache-cfg.json"),
+        &sccache_cached_cfg_path,
+    )
+    .start();
+
+    (maybe_tempdir, tempdir_path, client)
+}
+
 // TODO: This runs multiple test cases, for multiple compilers. It should be
 // split up to run them individually. In the current form, it is hard to see
 // which sub test cases are executed, and if one fails, the remaining tests
 // are not run.
 #[test_case(true ; "with preprocessor cache")]
 #[test_case(false ; "without preprocessor cache")]
-#[serial]
 #[cfg(any(unix, target_env = "msvc"))]
 fn test_sccache_command(preprocessor_cache_mode: bool) {
     let _ = env_logger::try_init();
-    let tempdir = tempfile::Builder::new()
-        .prefix("sccache_system_test")
-        .tempdir()
-        .unwrap();
     let compilers = find_compilers();
     if compilers.is_empty() {
-        warn!("No compilers found, skipping test");
-    } else {
-        // Ensure there's no existing sccache server running.
-        stop_local_daemon();
-        // Create the configurations
-        let sccache_cfg = sccache_client_cfg(tempdir.path(), preprocessor_cache_mode);
-        write_json_cfg(tempdir.path(), "sccache-cfg.json", &sccache_cfg);
-        let sccache_cached_cfg_path = tempdir.path().join("sccache-cached-cfg");
-        // Start a server.
-        trace!("start server");
-        start_local_daemon(
-            &tempdir.path().join("sccache-cfg.json"),
-            &sccache_cached_cfg_path,
-        );
-        for compiler in compilers {
-            run_sccache_command_tests(compiler, tempdir.path(), preprocessor_cache_mode);
-            zero_stats();
-        }
-        stop_local_daemon();
+        return warn!("No compilers found, skipping test");
+    }
+
+    // Create and start the sccache client
+    let (_tempdir, tempdir_path, client) = make_sccache_client(preprocessor_cache_mode);
+
+    for compiler in compilers {
+        run_sccache_command_tests(&client, compiler, &tempdir_path, preprocessor_cache_mode);
+        client.zero_stats();
     }
 }
 
 #[test]
-#[serial]
 fn test_stats_no_server() {
-    // Ensure there's no existing sccache server running.
-    stop_local_daemon();
-    get_stats(|_| {});
+    let client = SccacheClient::new_no_cfg();
+    let _ = client.stats();
     assert!(
-        !stop_local_daemon(),
+        !client.stop(),
         "Server shouldn't be running after --show-stats"
     );
 }
 
 #[test_case(true ; "with preprocessor cache")]
 #[test_case(false ; "without preprocessor cache")]
-#[serial]
 #[cfg(any(unix, target_env = "msvc"))]
 fn test_cuda_sccache_command(preprocessor_cache_mode: bool) {
     let _ = env_logger::try_init();
-    let tempdir = tempfile::Builder::new()
-        .prefix("sccache_system_test")
-        .tempdir()
-        .unwrap();
     let compilers = find_cuda_compilers();
     println!(
         "CUDA compilers: {:?}",
@@ -1883,57 +1886,30 @@ fn test_cuda_sccache_command(preprocessor_cache_mode: bool) {
             .collect::<Vec<_>>()
     );
     if compilers.is_empty() {
-        warn!("No compilers found, skipping test");
-    } else {
-        // Ensure there's no existing sccache server running.
-        stop_local_daemon();
-        // Create the configurations
-        let sccache_cfg = sccache_client_cfg(tempdir.path(), preprocessor_cache_mode);
-        write_json_cfg(tempdir.path(), "sccache-cfg.json", &sccache_cfg);
-        let sccache_cached_cfg_path = tempdir.path().join("sccache-cached-cfg");
-        // Start a server.
-        trace!("start server");
-        start_local_daemon(
-            &tempdir.path().join("sccache-cfg.json"),
-            &sccache_cached_cfg_path,
-        );
-        for compiler in compilers {
-            match compiler.name {
-                "nvcc" => run_sccache_nvcc_cuda_command_tests(compiler, tempdir.path()),
-                "clang++" => run_sccache_clang_cuda_command_tests(compiler, tempdir.path()),
-                _ => {}
-            }
-            zero_stats();
+        return warn!("No compilers found, skipping test");
+    }
+
+    // Create and start the sccache client
+    let (_tempdir, tempdir_path, client) = make_sccache_client(preprocessor_cache_mode);
+
+    for compiler in compilers {
+        match compiler.name {
+            "nvcc" => run_sccache_nvcc_cuda_command_tests(&client, compiler, &tempdir_path),
+            "clang++" => run_sccache_clang_cuda_command_tests(&client, compiler, &tempdir_path),
+            _ => {}
         }
-        stop_local_daemon();
     }
 }
 
 #[test_case(true ; "with preprocessor cache")]
 #[test_case(false ; "without preprocessor cache")]
-#[serial]
 #[cfg(any(unix, target_env = "msvc"))]
 fn test_hip_sccache_command(preprocessor_cache_mode: bool) {
     let _ = env_logger::try_init();
-    let tempdir = tempfile::Builder::new()
-        .prefix("sccache_system_test")
-        .tempdir()
-        .unwrap();
 
     if let Some(compiler) = find_hip_compiler() {
-        stop_local_daemon();
-        // Create the configurations
-        let sccache_cfg = sccache_client_cfg(tempdir.path(), preprocessor_cache_mode);
-        write_json_cfg(tempdir.path(), "sccache-cfg.json", &sccache_cfg);
-        let sccache_cached_cfg_path = tempdir.path().join("sccache-cached-cfg");
-        // Start a server.
-        trace!("start server");
-        start_local_daemon(
-            &tempdir.path().join("sccache-cfg.json"),
-            &sccache_cached_cfg_path,
-        );
-        run_sccache_hip_command_tests(compiler, tempdir.path());
-        zero_stats();
-        stop_local_daemon();
+        // Create and start the sccache client
+        let (_tempdir, tempdir_path, client) = make_sccache_client(preprocessor_cache_mode);
+        run_sccache_hip_command_tests(&client, compiler, &tempdir_path);
     }
 }

From 5423197c64683573626359305a20059415da21ad Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 15 Apr 2025 03:55:31 +0000
Subject: [PATCH 2/9] refactor CUDA tests to be more readable

---
 src/lib.rs      |   2 +-
 src/server.rs   |   6 +-
 tests/system.rs | 928 +++++++++++++++++++++++++-----------------------
 3 files changed, 483 insertions(+), 453 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 6d022e30a..f82f72933 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -37,7 +37,7 @@ mod cache;
 mod client;
 mod cmdline;
 mod commands;
-mod compiler;
+pub mod compiler;
 pub mod config;
 pub mod dist;
 mod jobserver;
diff --git a/src/server.rs b/src/server.rs
index 76a63ed27..67a897227 100644
--- a/src/server.rs
+++ b/src/server.rs
@@ -1540,14 +1540,14 @@ where
     }
 }
 
-#[derive(Serialize, Deserialize, Debug, Clone, Default)]
+#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq)]
 pub struct PerLanguageCount {
     counts: HashMap<String, u64>,
     adv_counts: HashMap<String, u64>,
 }
 
 impl PerLanguageCount {
-    fn increment(&mut self, kind: &CompilerKind, lang: &Language) {
+    pub fn increment(&mut self, kind: &CompilerKind, lang: &Language) {
         let lang_comp_key = kind.lang_comp_kind(lang);
         let adv_count = self.adv_counts.entry(lang_comp_key).or_insert(0);
         *adv_count += 1;
@@ -1575,7 +1575,7 @@ impl PerLanguageCount {
 }
 
 /// Statistics about the server.
-#[derive(Serialize, Deserialize, Clone, Debug)]
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
 pub struct ServerStats {
     /// The count of client compile requests.
     pub compile_requests: u64,
diff --git a/tests/system.rs b/tests/system.rs
index 9af481351..2a9795e95 100644
--- a/tests/system.rs
+++ b/tests/system.rs
@@ -29,6 +29,10 @@ use fs_err as fs;
 use log::Level::Trace;
 use predicates::prelude::*;
 use regex::Regex;
+use sccache::{
+    compiler::{CCompilerKind, CompilerKind, Language},
+    server::ServerStats,
+};
 use serial_test::serial;
 use std::collections::HashMap;
 use std::env;
@@ -119,9 +123,9 @@ fn compile_cuda_cmdline<T: AsRef<OsStr>>(
     compile_flag: &str,
     input: &str,
     output: &str,
-    mut extra_args: Vec<OsString>,
+    extra_args: &[OsString],
 ) -> Vec<OsString> {
-    let mut arg = match compiler {
+    let mut args = match compiler {
         "nvcc" => vec_from!(OsString, exe.as_ref(), compile_flag, input, "-o", output),
         "clang++" => {
             vec_from!(
@@ -147,9 +151,12 @@ fn compile_cuda_cmdline<T: AsRef<OsStr>>(
         _ => panic!("Unsupported compiler: {}", compiler),
     };
     if !extra_args.is_empty() {
-        arg.append(&mut extra_args)
+        args.append(&mut extra_args.to_vec())
     }
-    arg
+    args.iter()
+        .filter(|x| !x.is_empty())
+        .cloned()
+        .collect::<Vec<_>>()
 }
 
 // TODO: This will fail if gcc/clang is actually a ccache wrapper, as it is the
@@ -713,7 +720,22 @@ fn run_sccache_command_tests(
     }
 }
 
+#[derive(Clone, Debug, Default)]
+struct AdditionalStats {
+    cache_writes: Option<u64>,
+    compilations: Option<u64>,
+    compile_requests: Option<u64>,
+    requests_executed: Option<u64>,
+    requests_not_compile: Option<u64>,
+    cache_hits: Option<Vec<(CCompilerKind, Language, u64)>>,
+    cache_misses: Option<Vec<(CCompilerKind, Language, u64)>>,
+}
+
 fn test_nvcc_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir: &Path) {
+    let mut stats = client.stats().unwrap();
+
+    let extra_args = vec![];
+
     let Compiler {
         name,
         exe,
@@ -723,239 +745,176 @@ fn test_nvcc_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir:
     // Compile multiple source files.
     copy_to_tempdir(&[INPUT_FOR_CUDA_A, INPUT_FOR_CUDA_B], tempdir);
 
-    let out_file = tempdir.join(OUTPUT);
+    let build_dir = PathBuf::from("build");
+    fs::create_dir_all(tempdir.join(&build_dir)).unwrap();
+
+    let mut run_cuda_test = |compile_flag: &str,
+                             input: &Path,
+                             output: &Path,
+                             extra_args: &[OsString],
+                             additional_stats: AdditionalStats| {
+        client
+            .cmd()
+            .args(compile_cuda_cmdline(
+                name,
+                exe,
+                compile_flag,
+                input.to_string_lossy().as_ref(),
+                output.to_string_lossy().as_ref(),
+                extra_args,
+            ))
+            .current_dir(tempdir)
+            .envs(env_vars.clone())
+            .assert()
+            .success();
+
+        assert!(fs::metadata(tempdir.join(output))
+            .map(|m| m.len() > 0)
+            .unwrap());
+
+        fs::remove_file(tempdir.join(output)).unwrap();
+
+        stats.cache_writes += additional_stats.cache_writes.unwrap_or(0);
+        stats.compilations += additional_stats.compilations.unwrap_or(0);
+        stats.compile_requests += additional_stats.compile_requests.unwrap_or(0);
+        stats.requests_executed += additional_stats.requests_executed.unwrap_or(0);
+        stats.requests_not_compile += additional_stats.requests_not_compile.unwrap_or(0);
+        stats.non_cacheable_compilations += 1;
+
+        for (kind, lang, count) in additional_stats.cache_hits.unwrap_or_default() {
+            let kind = CompilerKind::C(kind);
+            for _ in 0..count {
+                stats.cache_hits.increment(&kind, &lang);
+            }
+        }
+
+        for (kind, lang, count) in additional_stats.cache_misses.unwrap_or_default() {
+            let kind = CompilerKind::C(kind);
+            for _ in 0..count {
+                stats.cache_misses.increment(&kind, &lang);
+            }
+        }
+
+        assert_eq!(
+            stats,
+            ServerStats {
+                // TODO: Fix this in the next PR
+                cache_errors: stats.cache_errors.clone(),
+                cache_write_duration: stats.cache_write_duration,
+                cache_read_hit_duration: stats.cache_read_hit_duration,
+                compiler_write_duration: stats.compiler_write_duration,
+                ..client.stats().unwrap()
+            }
+        );
+    };
+
     trace!("compile A");
-    client
-        .cmd()
-        .args(compile_cuda_cmdline(
-            name,
-            exe,
-            "-c",
-            // relative path for input
-            INPUT_FOR_CUDA_A,
-            // relative path for output
-            out_file.file_name().unwrap().to_string_lossy().as_ref(),
-            Vec::new(),
-        ))
-        .current_dir(tempdir)
-        .envs(env_vars.clone())
-        .assert()
-        .success();
-    assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
-    fs::remove_file(&out_file).unwrap();
-    trace!("compile A request stats");
-    let stats = client.stats().unwrap();
-    assert_eq!(1, stats.compile_requests);
-    assert_eq!(5, stats.requests_executed);
-    assert_eq!(0, stats.cache_hits.all());
-    assert_eq!(4, stats.cache_misses.all());
-    assert_eq!(&1, stats.cache_misses.get("CUDA").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("CUDA (Device code)").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("PTX").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("CUBIN").unwrap());
-    assert!(stats.cache_misses.get("C/C++").is_none());
-    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&1, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
+    run_cuda_test(
+        "-c",
+        Path::new(INPUT_FOR_CUDA_A), // relative path for input
+        &build_dir.join(OUTPUT),     // relative path for output
+        &extra_args,
+        AdditionalStats {
+            cache_writes: Some(4),
+            compilations: Some(5),
+            compile_requests: Some(1),
+            requests_executed: Some(5),
+            cache_misses: Some(vec![
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (CCompilerKind::Ptxas, Language::Cubin, 1),
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
+            ]),
+            ..Default::default()
+        },
+    );
 
     trace!("compile A");
-    client
-        .cmd()
-        .args(compile_cuda_cmdline(
-            name,
-            exe,
-            "-c",
-            // relative path for input
-            INPUT_FOR_CUDA_A,
-            // absolute path for output
-            out_file.to_string_lossy().as_ref(),
-            Vec::new(),
-        ))
-        .current_dir(tempdir)
-        .envs(env_vars.clone())
-        .assert()
-        .success();
-    assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
-    fs::remove_file(&out_file).unwrap();
-    trace!("compile A request stats");
-    let stats = client.stats().unwrap();
-    assert_eq!(2, stats.compile_requests);
-    assert_eq!(10, stats.requests_executed);
-    assert_eq!(4, stats.cache_hits.all());
-    assert_eq!(4, stats.cache_misses.all());
-    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
-    assert_eq!(&1, stats.cache_hits.get("CUDA (Device code)").unwrap());
-    assert_eq!(&1, stats.cache_hits.get("PTX").unwrap());
-    assert_eq!(&1, stats.cache_hits.get("CUBIN").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("CUDA").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("CUDA (Device code)").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("PTX").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("CUBIN").unwrap());
-    assert!(stats.cache_misses.get("C/C++").is_none());
-    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
-    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&1, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
+    run_cuda_test(
+        "-c",
+        Path::new(INPUT_FOR_CUDA_A),            // relative path for input
+        &tempdir.join(&build_dir).join(OUTPUT), // absolute path for output
+        &extra_args,
+        AdditionalStats {
+            cache_writes: Some(0),
+            compilations: Some(1),
+            compile_requests: Some(1),
+            requests_executed: Some(5),
+            cache_hits: Some(vec![
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (CCompilerKind::Ptxas, Language::Cubin, 1),
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
+            ]),
+            ..Default::default()
+        },
+    );
 
     // By compiling another input source we verify that the pre-processor
     // phase is correctly running and outputting text
     trace!("compile B");
-    client
-        .cmd()
-        .args(compile_cuda_cmdline(
-            name,
-            exe,
-            "-c",
-            // absolute path for input
-            &tempdir.join(INPUT_FOR_CUDA_B).to_string_lossy(),
-            // absolute path for output
-            out_file.to_string_lossy().as_ref(),
-            Vec::new(),
-        ))
-        .current_dir(tempdir)
-        .envs(env_vars.clone())
-        .assert()
-        .success();
-    assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
-    fs::remove_file(&out_file).unwrap();
-    trace!("compile B request stats");
-    let stats = client.stats().unwrap();
-    assert_eq!(3, stats.compile_requests);
-    assert_eq!(15, stats.requests_executed);
-    assert_eq!(5, stats.cache_hits.all());
-    assert_eq!(7, stats.cache_misses.all());
-    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
-    assert_eq!(&1, stats.cache_hits.get("CUDA (Device code)").unwrap());
-    assert_eq!(&1, stats.cache_hits.get("PTX").unwrap());
-    assert_eq!(&2, stats.cache_hits.get("CUBIN").unwrap());
-    assert_eq!(&2, stats.cache_misses.get("CUDA").unwrap());
-    assert_eq!(&2, stats.cache_misses.get("CUDA (Device code)").unwrap());
-    assert_eq!(&2, stats.cache_misses.get("PTX").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("CUBIN").unwrap());
-    assert!(stats.cache_misses.get("C/C++").is_none());
-    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&2, stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
-    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&2, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
+    run_cuda_test(
+        "-c",
+        &tempdir.join(INPUT_FOR_CUDA_B), // absolute path for input
+        &tempdir.join(&build_dir).join(OUTPUT), // absolute path for output
+        &extra_args,
+        AdditionalStats {
+            cache_writes: Some(3),
+            compilations: Some(4),
+            compile_requests: Some(1),
+            requests_executed: Some(5),
+            cache_hits: Some(vec![(CCompilerKind::Ptxas, Language::Cubin, 1)]),
+            cache_misses: Some(vec![
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
+            ]),
+            ..Default::default()
+        },
+    );
 
     trace!("compile ptx");
-    let out_file = tempdir.join("test.ptx");
-    client
-        .cmd()
-        .args(compile_cuda_cmdline(
-            name,
-            exe,
-            "-ptx",
-            INPUT_FOR_CUDA_A,
-            // relative path for output
-            out_file.file_name().unwrap().to_string_lossy().as_ref(),
-            Vec::new(),
-        ))
-        .current_dir(tempdir)
-        .envs(env_vars.clone())
-        .assert()
-        .success();
-    assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
-    fs::remove_file(&out_file).unwrap();
-    trace!("compile ptx request stats");
-    let stats = client.stats().unwrap();
-    assert_eq!(4, stats.compile_requests);
-    assert_eq!(17, stats.requests_executed);
-    assert_eq!(5, stats.cache_hits.all());
-    assert_eq!(8, stats.cache_misses.all());
-    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
-    assert_eq!(&1, stats.cache_hits.get("CUDA (Device code)").unwrap());
-    assert_eq!(&1, stats.cache_hits.get("PTX").unwrap());
-    assert_eq!(&2, stats.cache_hits.get("CUBIN").unwrap());
-    assert_eq!(&2, stats.cache_misses.get("CUDA").unwrap());
-    assert_eq!(&2, stats.cache_misses.get("CUDA (Device code)").unwrap());
-    assert_eq!(&3, stats.cache_misses.get("PTX").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("CUBIN").unwrap());
-    assert!(stats.cache_misses.get("C/C++").is_none());
-    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&2, stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
-    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&3, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
+    run_cuda_test(
+        "-ptx",
+        Path::new(INPUT_FOR_CUDA_A), // relative path for input
+        &build_dir.join("test.ptx"), // relative path for output
+        &extra_args,
+        AdditionalStats {
+            cache_writes: Some(1),
+            compilations: Some(2),
+            compile_requests: Some(1),
+            requests_executed: Some(2),
+            cache_misses: Some(vec![(CCompilerKind::Cicc, Language::Ptx, 1)]),
+            ..Default::default()
+        },
+    );
 
     trace!("compile cubin");
-    let out_file = tempdir.join("test.cubin");
-    client
-        .cmd()
-        .args(compile_cuda_cmdline(
-            name,
-            exe,
-            "-cubin",
-            INPUT_FOR_CUDA_A,
-            // absolute path for output
-            out_file.to_string_lossy().as_ref(),
-            Vec::new(),
-        ))
-        .current_dir(tempdir)
-        .envs(env_vars.clone())
-        .assert()
-        .success();
-    assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
-    fs::remove_file(&out_file).unwrap();
-    trace!("compile cubin request stats");
-    let stats = client.stats().unwrap();
-    assert_eq!(5, stats.compile_requests);
-    assert_eq!(20, stats.requests_executed);
-    assert_eq!(6, stats.cache_hits.all());
-    assert_eq!(9, stats.cache_misses.all());
-    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
-    assert_eq!(&1, stats.cache_hits.get("CUDA (Device code)").unwrap());
-    assert_eq!(&1, stats.cache_hits.get("PTX").unwrap());
-    assert_eq!(&3, stats.cache_hits.get("CUBIN").unwrap());
-    assert_eq!(&2, stats.cache_misses.get("CUDA").unwrap());
-    assert_eq!(&2, stats.cache_misses.get("CUDA (Device code)").unwrap());
-    assert_eq!(&4, stats.cache_misses.get("PTX").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("CUBIN").unwrap());
-    assert!(stats.cache_misses.get("C/C++").is_none());
-    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&3, stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
-    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&4, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
+    run_cuda_test(
+        "-cubin",
+        Path::new(INPUT_FOR_CUDA_A), // relative path for input
+        &tempdir.join(&build_dir).join("test.cubin"), // absolute path for output
+        &extra_args,
+        AdditionalStats {
+            cache_writes: Some(1),
+            compilations: Some(2),
+            compile_requests: Some(1),
+            requests_executed: Some(3),
+            cache_hits: Some(vec![
+                // TODO: Fix this in the next PR
+                // (CCompilerKind::Cicc, Language::Ptx, 1),
+                (CCompilerKind::Ptxas, Language::Cubin, 1),
+            ]),
+            // TODO: Should not be a cache miss.
+            //       Fix this in the next PR
+            cache_misses: Some(vec![(CCompilerKind::Cicc, Language::Ptx, 1)]),
+            ..Default::default()
+        },
+    );
 
     // Test to ensure #2299 doesn't regress (https://github.com/mozilla/sccache/issues/2299)
     let test_2299_src_name = "test_2299.cu";
-    let test_2299_out_file = tempdir.join("test_2299.cu.o");
+    let test_2299_out_name = "test_2299.cu.o";
     // Two versions of the source with different contents inside the #ifndef __CUDA_ARCH__
     let test_2299_cu_src_1 = "
 #ifndef __CUDA_ARCH__
@@ -975,166 +934,71 @@ int main(int argc, char** argv) {
 ";
     write_source(tempdir, test_2299_src_name, test_2299_cu_src_1);
     trace!("compile test_2299.cu (1)");
-    client
-        .cmd()
-        .args(compile_cuda_cmdline(
-            name,
-            exe,
-            "-c",
-            // relative path for input
-            test_2299_src_name,
-            // relative path for output
-            test_2299_out_file
-                .file_name()
-                .unwrap()
-                .to_string_lossy()
-                .as_ref(),
-            Vec::new(),
-        ))
-        .current_dir(tempdir)
-        .envs(env_vars.clone())
-        .assert()
-        .success();
-    assert!(fs::metadata(&test_2299_out_file)
-        .map(|m| m.len() > 0)
-        .unwrap());
-    fs::remove_file(&test_2299_out_file).unwrap();
-    trace!("compile test_2299.cu request stats (1)");
-    let stats = client.stats().unwrap();
-    assert_eq!(6, stats.compile_requests);
-    assert_eq!(25, stats.requests_executed);
-    assert_eq!(6, stats.cache_hits.all());
-    assert_eq!(13, stats.cache_misses.all());
-    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
-    assert_eq!(&1, stats.cache_hits.get("CUDA (Device code)").unwrap());
-    assert_eq!(&1, stats.cache_hits.get("PTX").unwrap());
-    assert_eq!(&3, stats.cache_hits.get("CUBIN").unwrap());
-    assert_eq!(&3, stats.cache_misses.get("CUDA").unwrap());
-    assert_eq!(&3, stats.cache_misses.get("CUDA (Device code)").unwrap());
-    assert_eq!(&5, stats.cache_misses.get("PTX").unwrap());
-    assert_eq!(&2, stats.cache_misses.get("CUBIN").unwrap());
-    assert!(stats.cache_misses.get("C/C++").is_none());
-    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&3, stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
-    assert_eq!(&3, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&3, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&5, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
+    run_cuda_test(
+        "-c",
+        Path::new(test_2299_src_name),       // relative path for input
+        &build_dir.join(test_2299_out_name), // relative path for output
+        &extra_args,
+        AdditionalStats {
+            cache_writes: Some(4),
+            compilations: Some(5),
+            compile_requests: Some(1),
+            requests_executed: Some(5),
+            cache_misses: Some(vec![
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (CCompilerKind::Ptxas, Language::Cubin, 1),
+            ]),
+            ..Default::default()
+        },
+    );
 
     write_source(tempdir, test_2299_src_name, test_2299_cu_src_2);
     trace!("compile test_2299.cu (2)");
-    client
-        .cmd()
-        .args(compile_cuda_cmdline(
-            name,
-            exe,
-            "-c",
-            // relative path for input
-            test_2299_src_name,
-            // relative path for output
-            test_2299_out_file
-                .file_name()
-                .unwrap()
-                .to_string_lossy()
-                .as_ref(),
-            Vec::new(),
-        ))
-        .current_dir(tempdir)
-        .envs(env_vars.clone())
-        .assert()
-        .success();
-    assert!(fs::metadata(&test_2299_out_file)
-        .map(|m| m.len() > 0)
-        .unwrap());
-    fs::remove_file(&test_2299_out_file).unwrap();
-    trace!("compile test_2299.cu request stats (2)");
-    let stats = client.stats().unwrap();
-    assert_eq!(7, stats.compile_requests);
-    assert_eq!(30, stats.requests_executed);
-    assert_eq!(8, stats.cache_hits.all());
-    assert_eq!(15, stats.cache_misses.all());
-    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
-    assert_eq!(&1, stats.cache_hits.get("CUDA (Device code)").unwrap());
-    assert_eq!(&2, stats.cache_hits.get("PTX").unwrap());
-    assert_eq!(&4, stats.cache_hits.get("CUBIN").unwrap());
-    assert_eq!(&4, stats.cache_misses.get("CUDA").unwrap());
-    assert_eq!(&4, stats.cache_misses.get("CUDA (Device code)").unwrap());
-    assert_eq!(&5, stats.cache_misses.get("PTX").unwrap());
-    assert_eq!(&2, stats.cache_misses.get("CUBIN").unwrap());
-    assert!(stats.cache_misses.get("C/C++").is_none());
-    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&2, stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&4, stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
-    assert_eq!(&4, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&4, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&5, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
+    run_cuda_test(
+        "-c",
+        Path::new(test_2299_src_name),       // relative path for input
+        &build_dir.join(test_2299_out_name), // relative path for output
+        &extra_args,
+        AdditionalStats {
+            cache_writes: Some(2),
+            compilations: Some(3),
+            compile_requests: Some(1),
+            requests_executed: Some(5),
+            cache_hits: Some(vec![
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (CCompilerKind::Ptxas, Language::Cubin, 1),
+            ]),
+            cache_misses: Some(vec![
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+            ]),
+            ..Default::default()
+        },
+    );
 
     // Recompile the original version again to ensure only cache hits
     write_source(tempdir, test_2299_src_name, test_2299_cu_src_1);
     trace!("compile test_2299.cu (3)");
-    client
-        .cmd()
-        .args(compile_cuda_cmdline(
-            name,
-            exe,
-            "-c",
-            // relative path for input
-            test_2299_src_name,
-            // relative path for output
-            test_2299_out_file
-                .file_name()
-                .unwrap()
-                .to_string_lossy()
-                .as_ref(),
-            Vec::new(),
-        ))
-        .current_dir(tempdir)
-        .envs(env_vars.clone())
-        .assert()
-        .success();
-    assert!(fs::metadata(&test_2299_out_file)
-        .map(|m| m.len() > 0)
-        .unwrap());
-    fs::remove_file(&test_2299_out_file).unwrap();
-    trace!("compile test_2299.cu request stats (3)");
-    let stats = client.stats().unwrap();
-    assert_eq!(8, stats.compile_requests);
-    assert_eq!(35, stats.requests_executed);
-    assert_eq!(12, stats.cache_hits.all());
-    assert_eq!(15, stats.cache_misses.all());
-    assert_eq!(&2, stats.cache_hits.get("CUDA").unwrap());
-    assert_eq!(&2, stats.cache_hits.get("CUDA (Device code)").unwrap());
-    assert_eq!(&3, stats.cache_hits.get("PTX").unwrap());
-    assert_eq!(&5, stats.cache_hits.get("CUBIN").unwrap());
-    assert_eq!(&4, stats.cache_misses.get("CUDA").unwrap());
-    assert_eq!(&4, stats.cache_misses.get("CUDA (Device code)").unwrap());
-    assert_eq!(&5, stats.cache_misses.get("PTX").unwrap());
-    assert_eq!(&2, stats.cache_misses.get("CUBIN").unwrap());
-    assert!(stats.cache_misses.get("C/C++").is_none());
-    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-    let adv_cudafe_key = adv_key_kind("cudafe++", compiler.name);
-    let adv_ptx_key = adv_key_kind("ptx", compiler.name);
-    let adv_cubin_key = adv_key_kind("cubin", compiler.name);
-    assert_eq!(&2, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&2, stats.cache_hits.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&3, stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&5, stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
-    assert_eq!(&4, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&4, stats.cache_misses.get_adv(&adv_cudafe_key).unwrap());
-    assert_eq!(&5, stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
-    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
+    run_cuda_test(
+        "-c",
+        &tempdir.join(test_2299_src_name), // absolute path for input
+        &tempdir.join(&build_dir).join(test_2299_out_name), // absolute path for output
+        &extra_args,
+        AdditionalStats {
+            compilations: Some(1),
+            compile_requests: Some(1),
+            requests_executed: Some(5),
+            cache_hits: Some(vec![
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (CCompilerKind::Ptxas, Language::Cubin, 1),
+            ]),
+            ..Default::default()
+        },
+    );
 }
 
 fn test_nvcc_proper_lang_stat_tracking(
@@ -1142,6 +1006,10 @@ fn test_nvcc_proper_lang_stat_tracking(
     compiler: &Compiler,
     tempdir: &Path,
 ) {
+    let mut stats = client.stats().unwrap();
+
+    let extra_args = vec![];
+
     let Compiler {
         name,
         exe,
@@ -1153,7 +1021,8 @@ fn test_nvcc_proper_lang_stat_tracking(
     copy_to_tempdir(&[INPUT_FOR_CUDA_C, INPUT], tempdir);
 
     let out_file = tempdir.join(OUTPUT);
-    trace!("compile CUDA A");
+
+    trace!("compile CUDA C");
     client
         .cmd()
         .args(compile_cmdline(
@@ -1168,7 +1037,35 @@ fn test_nvcc_proper_lang_stat_tracking(
         .assert()
         .success();
     fs::remove_file(&out_file).unwrap();
-    trace!("compile CUDA A");
+
+    stats.cache_writes += 3;
+    stats.compilations += 4;
+    stats.compile_requests += 1;
+    stats.requests_executed += 5;
+    stats.non_cacheable_compilations += 1;
+    stats
+        .cache_misses
+        .increment(&CompilerKind::C(CCompilerKind::Nvcc), &Language::Cuda);
+    stats
+        .cache_misses
+        .increment(&CompilerKind::C(CCompilerKind::CudaFE), &Language::CudaFE);
+    stats
+        .cache_misses
+        .increment(&CompilerKind::C(CCompilerKind::Cicc), &Language::Ptx);
+    stats
+        .cache_hits
+        .increment(&CompilerKind::C(CCompilerKind::Ptxas), &Language::Cubin);
+    assert_eq!(
+        stats,
+        ServerStats {
+            cache_write_duration: stats.cache_write_duration,
+            cache_read_hit_duration: stats.cache_read_hit_duration,
+            compiler_write_duration: stats.compiler_write_duration,
+            ..client.stats().unwrap()
+        }
+    );
+
+    trace!("compile CUDA C");
     client
         .cmd()
         .args(compile_cmdline(
@@ -1176,56 +1073,118 @@ fn test_nvcc_proper_lang_stat_tracking(
             exe,
             INPUT_FOR_CUDA_C,
             OUTPUT,
-            Vec::new(),
+            extra_args.clone(),
         ))
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
     fs::remove_file(&out_file).unwrap();
-    trace!("compile C++ A");
+
+    stats.compilations += 1;
+    stats.compile_requests += 1;
+    stats.requests_executed += 5;
+    stats.non_cacheable_compilations += 1;
+    stats
+        .cache_hits
+        .increment(&CompilerKind::C(CCompilerKind::Nvcc), &Language::Cuda);
+    stats
+        .cache_hits
+        .increment(&CompilerKind::C(CCompilerKind::CudaFE), &Language::CudaFE);
+    stats
+        .cache_hits
+        .increment(&CompilerKind::C(CCompilerKind::Cicc), &Language::Ptx);
+    stats
+        .cache_hits
+        .increment(&CompilerKind::C(CCompilerKind::Ptxas), &Language::Cubin);
+    assert_eq!(
+        stats,
+        ServerStats {
+            cache_write_duration: stats.cache_write_duration,
+            cache_read_hit_duration: stats.cache_read_hit_duration,
+            compiler_write_duration: stats.compiler_write_duration,
+            ..client.stats().unwrap()
+        }
+    );
+
+    trace!("compile C++");
     client
         .cmd()
-        .args(compile_cmdline(name, exe, INPUT, OUTPUT, Vec::new()))
+        .args(compile_cmdline(
+            name,
+            exe,
+            INPUT,
+            OUTPUT,
+            extra_args.clone(),
+        ))
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
     fs::remove_file(&out_file).unwrap();
-    trace!("compile C++ A");
+
+    stats.cache_writes += 1;
+    stats.compilations += 2;
+    stats.compile_requests += 1;
+    stats.requests_executed += 2;
+    stats.non_cacheable_compilations += 1;
+    stats
+        .cache_misses
+        .increment(&CompilerKind::C(CCompilerKind::Nvcc), &Language::Cuda);
+    assert_eq!(
+        stats,
+        ServerStats {
+            cache_write_duration: stats.cache_write_duration,
+            cache_read_hit_duration: stats.cache_read_hit_duration,
+            compiler_write_duration: stats.compiler_write_duration,
+            ..client.stats().unwrap()
+        }
+    );
+
+    trace!("compile C++");
     client
         .cmd()
-        .args(compile_cmdline(name, exe, INPUT, OUTPUT, Vec::new()))
+        .args(compile_cmdline(
+            name,
+            exe,
+            INPUT,
+            OUTPUT,
+            extra_args.clone(),
+        ))
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
     fs::remove_file(&out_file).unwrap();
 
-    trace!("request stats");
-    let stats = client.stats().unwrap();
-    assert_eq!(4, stats.compile_requests);
-    assert_eq!(14, stats.requests_executed);
-    assert_eq!(6, stats.cache_hits.all());
-    assert_eq!(4, stats.cache_misses.all());
-    assert!(stats.cache_hits.get("C/C++").is_none());
-    assert_eq!(&2, stats.cache_hits.get("CUDA").unwrap());
-    assert_eq!(&1, stats.cache_hits.get("CUDA (Device code)").unwrap());
-    assert_eq!(&2, stats.cache_hits.get("CUBIN").unwrap());
-    assert!(stats.cache_misses.get("C/C++").is_none());
-    assert_eq!(&2, stats.cache_misses.get("CUDA").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("CUDA (Device code)").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("PTX").unwrap());
+    stats.compilations += 1;
+    stats.compile_requests += 1;
+    stats.requests_executed += 2;
+    stats.non_cacheable_compilations += 1;
+    stats
+        .cache_hits
+        .increment(&CompilerKind::C(CCompilerKind::Nvcc), &Language::Cuda);
+    assert_eq!(
+        stats,
+        ServerStats {
+            cache_write_duration: stats.cache_write_duration,
+            cache_read_hit_duration: stats.cache_read_hit_duration,
+            compiler_write_duration: stats.compiler_write_duration,
+            ..client.stats().unwrap()
+        }
+    );
 }
 
 fn run_sccache_nvcc_cuda_command_tests(client: &SccacheClient, compiler: Compiler, tempdir: &Path) {
-    client.zero_stats();
     test_nvcc_cuda_compiles(client, &compiler, tempdir);
-    client.zero_stats();
     test_nvcc_proper_lang_stat_tracking(client, &compiler, tempdir);
 }
 
 fn test_clang_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir: &Path) {
+    let mut stats = client.stats().unwrap();
+
+    let extra_args = vec![];
+
     let Compiler {
         name,
         exe,
@@ -1245,24 +1204,32 @@ fn test_clang_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir
             "-c",
             INPUT_FOR_CUDA_A,
             OUTPUT,
-            Vec::new(),
+            &extra_args,
         ))
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
-    trace!("request stats");
-    let stats = client.stats().unwrap();
-    assert_eq!(1, stats.compile_requests);
-    assert_eq!(1, stats.requests_executed);
-    assert_eq!(0, stats.cache_hits.all());
-    assert_eq!(1, stats.cache_misses.all());
-    assert_eq!(&1, stats.cache_misses.get("CUDA").unwrap());
-    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
-    trace!("compile A");
     fs::remove_file(&out_file).unwrap();
+    stats.cache_writes += 1;
+    stats.compilations += 1;
+    stats.compile_requests += 1;
+    stats.requests_executed += 1;
+    stats
+        .cache_misses
+        .increment(&CompilerKind::C(CCompilerKind::Clang), &Language::Cuda);
+    assert_eq!(
+        stats,
+        ServerStats {
+            cache_write_duration: stats.cache_write_duration,
+            cache_read_hit_duration: stats.cache_read_hit_duration,
+            compiler_write_duration: stats.compiler_write_duration,
+            ..client.stats().unwrap()
+        }
+    );
+
+    trace!("compile A");
     client
         .cmd()
         .args(compile_cuda_cmdline(
@@ -1271,24 +1238,29 @@ fn test_clang_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir
             "-c",
             INPUT_FOR_CUDA_A,
             OUTPUT,
-            Vec::new(),
+            &extra_args,
         ))
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
-    trace!("request stats");
-    let stats = client.stats().unwrap();
-    assert_eq!(2, stats.compile_requests);
-    assert_eq!(2, stats.requests_executed);
-    assert_eq!(1, stats.cache_hits.all());
-    assert_eq!(1, stats.cache_misses.all());
-    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("CUDA").unwrap());
-    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&1, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
+    fs::remove_file(&out_file).unwrap();
+    stats.compile_requests += 1;
+    stats.requests_executed += 1;
+    stats
+        .cache_hits
+        .increment(&CompilerKind::C(CCompilerKind::Clang), &Language::Cuda);
+    assert_eq!(
+        stats,
+        ServerStats {
+            cache_write_duration: stats.cache_write_duration,
+            cache_read_hit_duration: stats.cache_read_hit_duration,
+            compiler_write_duration: stats.compiler_write_duration,
+            ..client.stats().unwrap()
+        }
+    );
+
     // By compiling another input source we verify that the pre-processor
     // phase is correctly running and outputting text
     trace!("compile B");
@@ -1300,24 +1272,30 @@ fn test_clang_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir
             "-c",
             INPUT_FOR_CUDA_B,
             OUTPUT,
-            Vec::new(),
+            &extra_args,
         ))
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
     assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap());
-    trace!("request stats");
-    let stats = client.stats().unwrap();
-    assert_eq!(3, stats.compile_requests);
-    assert_eq!(3, stats.requests_executed);
-    assert_eq!(1, stats.cache_hits.all());
-    assert_eq!(2, stats.cache_misses.all());
-    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
-    assert_eq!(&2, stats.cache_misses.get("CUDA").unwrap());
-    let adv_cuda_key = adv_key_kind("cuda", compiler.name);
-    assert_eq!(&1, stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
-    assert_eq!(&2, stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
+    fs::remove_file(&out_file).unwrap();
+    stats.cache_writes += 1;
+    stats.compilations += 1;
+    stats.compile_requests += 1;
+    stats.requests_executed += 1;
+    stats
+        .cache_misses
+        .increment(&CompilerKind::C(CCompilerKind::Clang), &Language::Cuda);
+    assert_eq!(
+        stats,
+        ServerStats {
+            cache_write_duration: stats.cache_write_duration,
+            cache_read_hit_duration: stats.cache_read_hit_duration,
+            compiler_write_duration: stats.compiler_write_duration,
+            ..client.stats().unwrap()
+        }
+    );
 }
 
 fn test_clang_proper_lang_stat_tracking(
@@ -1325,6 +1303,8 @@ fn test_clang_proper_lang_stat_tracking(
     compiler: &Compiler,
     tempdir: &Path,
 ) {
+    let mut stats = client.stats().unwrap();
+
     let Compiler {
         name,
         exe,
@@ -1345,13 +1325,30 @@ fn test_clang_proper_lang_stat_tracking(
             "-c",
             INPUT_FOR_CUDA_C,
             OUTPUT,
-            Vec::new(),
+            &[],
         ))
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
     fs::remove_file(&out_file).unwrap();
+    stats.cache_writes += 1;
+    stats.compilations += 1;
+    stats.compile_requests += 1;
+    stats.requests_executed += 1;
+    stats
+        .cache_misses
+        .increment(&CompilerKind::C(CCompilerKind::Clang), &Language::Cuda);
+    assert_eq!(
+        stats,
+        ServerStats {
+            cache_write_duration: stats.cache_write_duration,
+            cache_read_hit_duration: stats.cache_read_hit_duration,
+            compiler_write_duration: stats.compiler_write_duration,
+            ..client.stats().unwrap()
+        }
+    );
+
     trace!("compile CUDA A");
     client
         .cmd()
@@ -1361,13 +1358,28 @@ fn test_clang_proper_lang_stat_tracking(
             "-c",
             INPUT_FOR_CUDA_C,
             OUTPUT,
-            Vec::new(),
+            &[],
         ))
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
         .success();
     fs::remove_file(&out_file).unwrap();
+    stats.compile_requests += 1;
+    stats.requests_executed += 1;
+    stats
+        .cache_hits
+        .increment(&CompilerKind::C(CCompilerKind::Clang), &Language::Cuda);
+    assert_eq!(
+        stats,
+        ServerStats {
+            cache_write_duration: stats.cache_write_duration,
+            cache_read_hit_duration: stats.cache_read_hit_duration,
+            compiler_write_duration: stats.compiler_write_duration,
+            ..client.stats().unwrap()
+        }
+    );
+
     trace!("compile C++ A");
     client
         .cmd()
@@ -1377,6 +1389,23 @@ fn test_clang_proper_lang_stat_tracking(
         .assert()
         .success();
     fs::remove_file(&out_file).unwrap();
+    stats.cache_writes += 1;
+    stats.compilations += 1;
+    stats.compile_requests += 1;
+    stats.requests_executed += 1;
+    stats
+        .cache_misses
+        .increment(&CompilerKind::C(CCompilerKind::Clang), &Language::Cxx);
+    assert_eq!(
+        stats,
+        ServerStats {
+            cache_write_duration: stats.cache_write_duration,
+            cache_read_hit_duration: stats.cache_read_hit_duration,
+            compiler_write_duration: stats.compiler_write_duration,
+            ..client.stats().unwrap()
+        }
+    );
+
     trace!("compile C++ A");
     client
         .cmd()
@@ -1386,17 +1415,20 @@ fn test_clang_proper_lang_stat_tracking(
         .assert()
         .success();
     fs::remove_file(&out_file).unwrap();
-
-    trace!("request stats");
-    let stats = client.stats().unwrap();
-    assert_eq!(4, stats.compile_requests);
-    assert_eq!(4, stats.requests_executed);
-    assert_eq!(2, stats.cache_hits.all());
-    assert_eq!(2, stats.cache_misses.all());
-    assert_eq!(&1, stats.cache_hits.get("C/C++").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("C/C++").unwrap());
-    assert_eq!(&1, stats.cache_hits.get("CUDA").unwrap());
-    assert_eq!(&1, stats.cache_misses.get("CUDA").unwrap());
+    stats.compile_requests += 1;
+    stats.requests_executed += 1;
+    stats
+        .cache_hits
+        .increment(&CompilerKind::C(CCompilerKind::Clang), &Language::Cxx);
+    assert_eq!(
+        stats,
+        ServerStats {
+            cache_write_duration: stats.cache_write_duration,
+            cache_read_hit_duration: stats.cache_read_hit_duration,
+            compiler_write_duration: stats.compiler_write_duration,
+            ..client.stats().unwrap()
+        }
+    );
 }
 
 fn run_sccache_clang_cuda_command_tests(
@@ -1404,9 +1436,7 @@ fn run_sccache_clang_cuda_command_tests(
     compiler: Compiler,
     tempdir: &Path,
 ) {
-    client.zero_stats();
     test_clang_cuda_compiles(client, &compiler, tempdir);
-    client.zero_stats();
     test_clang_proper_lang_stat_tracking(client, &compiler, tempdir);
 }
 

From 702c0e9921990e6b720be00ce780662bbb2b9f5a Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 15 Apr 2025 18:56:08 +0000
Subject: [PATCH 3/9] Rework nvcc name rewriting to ensure more cache hits

These changes ensure cache hits for compilations which are subsets of previously cached compilations

* Normalize cudafe++, ptx, and cubin names regardless of whether the compilation flag is `-c`, `-ptx`, `-cubin`, or whether there are one or many `-gencode` flags
* Include the compiler `hash_key` in the output dir for internal nvcc files to guarantee stability and uniqueness
* Fix cache error due to hash collision from not hashing all the PTX and cubin flags
---
 Cargo.lock                 |  45 +-
 Cargo.toml                 |   1 +
 src/compiler/c.rs          |   4 +
 src/compiler/cicc.rs       |  77 ++--
 src/compiler/clang.rs      |   1 +
 src/compiler/compiler.rs   |  10 +-
 src/compiler/cudafe.rs     |   3 +-
 src/compiler/diab.rs       |   1 +
 src/compiler/gcc.rs        |  20 +-
 src/compiler/msvc.rs       |  17 +-
 src/compiler/nvcc.rs       | 913 +++++++++++++++++++++++--------------
 src/compiler/nvhpc.rs      |   1 +
 src/compiler/ptxas.rs      |   1 +
 src/compiler/rust.rs       |   1 +
 src/compiler/tasking_vx.rs |   1 +
 15 files changed, 696 insertions(+), 400 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index b7403e209..ff7bc3cbf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2200,7 +2200,7 @@ dependencies = [
  "aho-corasick",
  "memchr",
  "regex-automata",
- "regex-syntax",
+ "regex-syntax 0.8.2",
 ]
 
 [[package]]
@@ -2211,15 +2211,55 @@ checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-syntax",
+ "regex-syntax 0.8.2",
 ]
 
+[[package]]
+name = "regex-syntax"
+version = "0.6.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
+
 [[package]]
 name = "regex-syntax"
 version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
 
+[[package]]
+name = "regex_static"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6126d61c5e4b41929098f73b42fc1d257116cc95d19739248c51591f77cc0021"
+dependencies = [
+ "once_cell",
+ "regex",
+ "regex_static_macro",
+]
+
+[[package]]
+name = "regex_static_impl"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3755019886a70e772e6360b0b58501d75cf7dc17a53e08aa97e59ecb2c2bc5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "regex-syntax 0.6.29",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "regex_static_macro"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79b15495fd034158635bc8b762a132dfc83864d6992aeda1ffabf01b03b611a1"
+dependencies = [
+ "proc-macro2",
+ "regex_static_impl",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "reqsign"
 version = "0.16.1"
@@ -2609,6 +2649,7 @@ dependencies = [
  "predicates",
  "rand",
  "regex",
+ "regex_static",
  "reqsign",
  "reqwest 0.12.5",
  "retry",
diff --git a/Cargo.toml b/Cargo.toml
index 097f8455a..5cf5d625b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -69,6 +69,7 @@ opendal = { version = "0.52.0", optional = true, default-features = false }
 openssl = { version = "0.10.72", optional = true }
 rand = "0.8.4"
 regex = "1.10.3"
+regex_static = "0.1.1"
 reqsign = { version = "0.16.0", optional = true }
 reqwest = { version = "0.12", features = [
   "json",
diff --git a/src/compiler/c.rs b/src/compiler/c.rs
index 9476178a5..da018fefe 100644
--- a/src/compiler/c.rs
+++ b/src/compiler/c.rs
@@ -200,6 +200,7 @@ pub trait CCompilerImpl: Clone + fmt::Debug + Send + Sync + 'static {
         T: CommandCreatorSync;
     /// Generate a command that can be used to invoke the C compiler to perform
     /// the compilation.
+    #[allow(clippy::too_many_arguments)]
     fn generate_compile_commands<T>(
         &self,
         path_transformer: &mut dist::PathTransformer,
@@ -208,6 +209,7 @@ pub trait CCompilerImpl: Clone + fmt::Debug + Send + Sync + 'static {
         cwd: &Path,
         env_vars: &[(OsString, OsString)],
         rewrite_includes_only: bool,
+        hash_key: &str,
     ) -> Result<(
         Box<dyn CompileCommand<T>>,
         Option<dist::CompileCommand>,
@@ -1157,6 +1159,7 @@ impl<T: CommandCreatorSync, I: CCompilerImpl> Compilation<T> for CCompilation<I>
         &self,
         path_transformer: &mut dist::PathTransformer,
         rewrite_includes_only: bool,
+        hash_key: &str,
     ) -> Result<(
         Box<dyn CompileCommand<T>>,
         Option<dist::CompileCommand>,
@@ -1178,6 +1181,7 @@ impl<T: CommandCreatorSync, I: CCompilerImpl> Compilation<T> for CCompilation<I>
             cwd,
             env_vars,
             rewrite_includes_only,
+            hash_key,
         )
     }
 
diff --git a/src/compiler/cicc.rs b/src/compiler/cicc.rs
index 41ba1e8ef..0e16c3cec 100644
--- a/src/compiler/cicc.rs
+++ b/src/compiler/cicc.rs
@@ -85,6 +85,7 @@ impl CCompilerImpl for Cicc {
         cwd: &Path,
         env_vars: &[(OsString, OsString)],
         _rewrite_includes_only: bool,
+        _hash_key: &str,
     ) -> Result<(
         Box<dyn CompileCommand<T>>,
         Option<dist::CompileCommand>,
@@ -118,7 +119,7 @@ where
     let mut take_next = false;
     let mut outputs = HashMap::new();
     let mut extra_dist_files = vec![];
-    let mut gen_module_id_file = false;
+    // let mut gen_module_id_file = false;
     let mut module_id_file_name = Option::<PathBuf>::None;
 
     let mut common_args = vec![];
@@ -128,6 +129,20 @@ where
         match arg {
             Ok(arg) => {
                 let args = match arg.get_data() {
+                    Some(ExtraOutput(o)) => {
+                        take_next = false;
+                        let path = cwd.join(o);
+                        if let Some(flag) = arg.flag_str() {
+                            outputs.insert(
+                                flag,
+                                ArtifactDescriptor {
+                                    path,
+                                    optional: false,
+                                },
+                            );
+                        }
+                        &mut common_args
+                    }
                     Some(PassThrough(_)) => {
                         take_next = false;
                         &mut common_args
@@ -146,7 +161,7 @@ where
                     }
                     Some(GenModuleIdFileFlag) => {
                         take_next = false;
-                        gen_module_id_file = true;
+                        // gen_module_id_file = true;
                         &mut common_args
                     }
                     Some(ModuleIdFileName(o)) => {
@@ -158,24 +173,6 @@ where
                         take_next = false;
                         &mut unhashed_args
                     }
-                    Some(UnhashedOutput(o)) => {
-                        take_next = false;
-                        let path = cwd.join(o);
-                        if let Some(flag) = arg.flag_str() {
-                            outputs.insert(
-                                flag,
-                                ArtifactDescriptor {
-                                    path,
-                                    optional: false,
-                                },
-                            );
-                        }
-                        &mut unhashed_args
-                    }
-                    Some(UnhashedFlag) => {
-                        take_next = false;
-                        &mut unhashed_args
-                    }
                     None => match arg {
                         Argument::Raw(ref p) => {
                             if take_next {
@@ -200,17 +197,16 @@ where
     }
 
     if let Some(module_id_path) = module_id_file_name {
-        if gen_module_id_file {
-            outputs.insert(
-                "--module_id_file_name",
-                ArtifactDescriptor {
-                    path: module_id_path,
-                    optional: true,
-                },
-            );
-        } else {
-            extra_dist_files.push(module_id_path);
+        if module_id_path.exists() {
+            extra_dist_files.push(module_id_path.clone());
         }
+        outputs.insert(
+            "--module_id_file_name",
+            ArtifactDescriptor {
+                path: module_id_path,
+                optional: false,
+            },
+        );
     }
 
     CompilerArguments::Ok(ParsedArguments {
@@ -236,13 +232,7 @@ where
 }
 
 pub async fn preprocess(cwd: &Path, parsed_args: &ParsedArguments) -> Result<process::Output> {
-    // cicc and ptxas expect input to be an absolute path
-    let input = if parsed_args.input.is_absolute() {
-        parsed_args.input.clone()
-    } else {
-        cwd.join(&parsed_args.input)
-    };
-    std::fs::read(input)
+    std::fs::read(cwd.join(&parsed_args.input))
         .map_err(anyhow::Error::new)
         .map(|s| process::Output {
             status: process::ExitStatus::default(),
@@ -329,23 +319,22 @@ pub fn generate_compile_commands(
 }
 
 ArgData! { pub
-    Output(PathBuf),
-    PassThrough(OsString),
-    UnhashedFlag,
+    ExtraOutput(PathBuf),
     GenModuleIdFileFlag,
     ModuleIdFileName(PathBuf),
+    Output(PathBuf),
+    PassThrough(OsString),
     UnhashedPassThrough(OsString),
-    UnhashedOutput(PathBuf),
 }
 
 use self::ArgData::*;
 
 counted_array!(pub static ARGS: [ArgInfo<ArgData>; _] = [
-    take_arg!("--gen_c_file_name", PathBuf, Separated, UnhashedOutput),
-    take_arg!("--gen_device_file_name", PathBuf, Separated, UnhashedOutput),
+    take_arg!("--gen_c_file_name", PathBuf, Separated, ExtraOutput),
+    take_arg!("--gen_device_file_name", PathBuf, Separated, ExtraOutput),
     flag!("--gen_module_id_file", GenModuleIdFileFlag),
     take_arg!("--include_file_name", OsString, Separated, PassThrough),
     take_arg!("--module_id_file_name", PathBuf, Separated, ModuleIdFileName),
-    take_arg!("--stub_file_name", PathBuf, Separated, UnhashedOutput),
+    take_arg!("--stub_file_name", PathBuf, Separated, ExtraOutput),
     take_arg!("-o", PathBuf, Separated, Output),
 ]);
diff --git a/src/compiler/clang.rs b/src/compiler/clang.rs
index edbfe5434..9689860e8 100644
--- a/src/compiler/clang.rs
+++ b/src/compiler/clang.rs
@@ -154,6 +154,7 @@ impl CCompilerImpl for Clang {
         cwd: &Path,
         env_vars: &[(OsString, OsString)],
         rewrite_includes_only: bool,
+        _hash_key: &str,
     ) -> Result<(
         Box<dyn CompileCommand<T>>,
         Option<dist::CompileCommand>,
diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs
index 2ae421679..a73d51213 100644
--- a/src/compiler/compiler.rs
+++ b/src/compiler/compiler.rs
@@ -577,6 +577,7 @@ where
                     compilation,
                     weak_toolchain_key,
                     out_pretty.clone(),
+                    &key,
                 )
                 .await?;
                 let duration_compilation = start.elapsed();
@@ -671,6 +672,7 @@ where
 }
 
 #[cfg(not(feature = "dist-client"))]
+#[allow(clippy::too_many_arguments)]
 async fn dist_or_local_compile<T>(
     service: &server::SccacheService<T>,
     _dist_client: Option<Arc<dyn dist::Client>>,
@@ -679,13 +681,14 @@ async fn dist_or_local_compile<T>(
     compilation: Box<dyn Compilation<T>>,
     _weak_toolchain_key: String,
     out_pretty: String,
+    hash_key: &str,
 ) -> Result<(Cacheable, DistType, process::Output)>
 where
     T: CommandCreatorSync,
 {
     let mut path_transformer = dist::PathTransformer::new();
     let (compile_cmd, _dist_compile_cmd, cacheable) = compilation
-        .generate_compile_commands(&mut path_transformer, true)
+        .generate_compile_commands(&mut path_transformer, true, hash_key)
         .context("Failed to generate compile commands")?;
 
     debug!("[{}]: Compiling locally", out_pretty);
@@ -696,6 +699,7 @@ where
 }
 
 #[cfg(feature = "dist-client")]
+#[allow(clippy::too_many_arguments)]
 async fn dist_or_local_compile<T>(
     service: &server::SccacheService<T>,
     dist_client: Option<Arc<dyn dist::Client>>,
@@ -704,6 +708,7 @@ async fn dist_or_local_compile<T>(
     compilation: Box<dyn Compilation<T>>,
     weak_toolchain_key: String,
     out_pretty: String,
+    hash_key: &str,
 ) -> Result<(Cacheable, DistType, process::Output)>
 where
     T: CommandCreatorSync,
@@ -716,7 +721,7 @@ where
     };
     let mut path_transformer = dist::PathTransformer::new();
     let (compile_cmd, dist_compile_cmd, cacheable) = compilation
-        .generate_compile_commands(&mut path_transformer, rewrite_includes_only)
+        .generate_compile_commands(&mut path_transformer, rewrite_includes_only, hash_key)
         .context("Failed to generate compile commands")?;
 
     let dist_client = match dist_compile_cmd.clone().and(dist_client) {
@@ -921,6 +926,7 @@ where
         &self,
         path_transformer: &mut dist::PathTransformer,
         rewrite_includes_only: bool,
+        hash_key: &str,
     ) -> Result<(
         Box<dyn CompileCommand<T>>,
         Option<dist::CompileCommand>,
diff --git a/src/compiler/cudafe.rs b/src/compiler/cudafe.rs
index 9ca1eb663..004372158 100644
--- a/src/compiler/cudafe.rs
+++ b/src/compiler/cudafe.rs
@@ -86,6 +86,7 @@ impl CCompilerImpl for CudaFE {
         cwd: &Path,
         env_vars: &[(OsString, OsString)],
         _rewrite_includes_only: bool,
+        _hash_key: &str,
     ) -> Result<(
         Box<dyn CompileCommand<T>>,
         Option<dist::CompileCommand>,
@@ -182,7 +183,7 @@ pub fn generate_compile_commands(
 use cicc::ArgData::*;
 
 counted_array!(pub static ARGS: [ArgInfo<cicc::ArgData>; _] = [
-    take_arg!("--gen_c_file_name", PathBuf, Separated, UnhashedOutput),
+    take_arg!("--gen_c_file_name", PathBuf, Separated, ExtraOutput),
     flag!("--gen_module_id_file", GenModuleIdFileFlag),
     take_arg!("--module_id_file_name", PathBuf, Separated, Output),
     take_arg!("--stub_file_name", OsString, Separated, UnhashedPassThrough),
diff --git a/src/compiler/diab.rs b/src/compiler/diab.rs
index 6f16c526d..befc1ffc8 100644
--- a/src/compiler/diab.rs
+++ b/src/compiler/diab.rs
@@ -87,6 +87,7 @@ impl CCompilerImpl for Diab {
         cwd: &Path,
         env_vars: &[(OsString, OsString)],
         _rewrite_includes_only: bool,
+        _hash_key: &str,
     ) -> Result<(
         Box<dyn CompileCommand<T>>,
         Option<dist::CompileCommand>,
diff --git a/src/compiler/gcc.rs b/src/compiler/gcc.rs
index aefd7cd54..d4598bdaa 100644
--- a/src/compiler/gcc.rs
+++ b/src/compiler/gcc.rs
@@ -104,6 +104,7 @@ impl CCompilerImpl for Gcc {
         cwd: &Path,
         env_vars: &[(OsString, OsString)],
         rewrite_includes_only: bool,
+        _hash_key: &str,
     ) -> Result<(
         Box<dyn CompileCommand<T>>,
         Option<dist::CompileCommand>,
@@ -905,10 +906,21 @@ where
             let mut language: Option<String> =
                 language_to_arg(parsed_args.language).map(|lang| lang.into());
             if !rewrite_includes_only {
-                match parsed_args.language {
-                    Language::C => language = Some("cpp-output".into()),
-                    Language::GenericHeader | Language::CHeader | Language::CxxHeader => {}
-                    _ => language.as_mut()?.push_str("-cpp-output"),
+                if let CCompilerKind::Nvhpc = kind {
+                    // -x=c|cpp|c++|i|cpp-output|asm|assembler|ASM|assembler-with-cpp|none
+                    // Specify the language for any following input files, instead of letting
+                    // the compiler choose based on suffix. Turn off with -x none
+                    match parsed_args.language {
+                        Language::C | Language::Cxx => language = Some("cpp-output".into()),
+                        Language::GenericHeader | Language::CHeader | Language::CxxHeader => {}
+                        _ => *(language.as_mut()?) = "none".into(),
+                    }
+                } else {
+                    match parsed_args.language {
+                        Language::C => language = Some("cpp-output".into()),
+                        Language::GenericHeader | Language::CHeader | Language::CxxHeader => {}
+                        _ => language.as_mut()?.push_str("-cpp-output"),
+                    }
                 }
             }
 
diff --git a/src/compiler/msvc.rs b/src/compiler/msvc.rs
index fcfe66bdb..77c27c81a 100644
--- a/src/compiler/msvc.rs
+++ b/src/compiler/msvc.rs
@@ -101,6 +101,7 @@ impl CCompilerImpl for Msvc {
         cwd: &Path,
         env_vars: &[(OsString, OsString)],
         _rewrite_includes_only: bool,
+        _hash_key: &str,
     ) -> Result<(
         Box<dyn CompileCommand<T>>,
         Option<dist::CompileCommand>,
@@ -971,7 +972,7 @@ where
     let includes_prefix = includes_prefix.to_string();
     let cwd = cwd.to_owned();
 
-    let output = run_input_output(cmd, None).await?;
+    let mut output = run_input_output(cmd, None).await?;
 
     if !is_clang {
         return Ok(output);
@@ -989,13 +990,8 @@ where
         encode_path(&mut f, &parsed_args.input)
             .with_context(|| format!("Couldn't encode input filename: '{:?}'", objfile))?;
         write!(f, " ")?;
-        let process::Output {
-            status,
-            stdout,
-            stderr: stderr_bytes,
-        } = output;
         let stderr =
-            from_local_codepage(&stderr_bytes).context("Failed to convert preprocessor stderr")?;
+            from_local_codepage(&output.stderr).context("Failed to convert preprocessor stderr")?;
         let mut deps = HashSet::new();
         let mut stderr_bytes = vec![];
         for line in stderr.lines() {
@@ -1025,11 +1021,8 @@ where
                 writeln!(f, "{}:", dep)?;
             }
         }
-        Ok(process::Output {
-            status,
-            stdout,
-            stderr: stderr_bytes,
-        })
+        output.stderr = stderr_bytes;
+        Ok(output)
     } else {
         Ok(output)
     }
diff --git a/src/compiler/nvcc.rs b/src/compiler/nvcc.rs
index c9d4ecd59..6f6dc303e 100644
--- a/src/compiler/nvcc.rs
+++ b/src/compiler/nvcc.rs
@@ -33,6 +33,7 @@ use fs_err as fs;
 use futures::{FutureExt, StreamExt, TryFutureExt, TryStreamExt};
 use itertools::Itertools;
 use log::Level::Trace;
+use once_cell::sync::Lazy;
 use regex::Regex;
 use std::collections::HashMap;
 use std::ffi::{OsStr, OsString};
@@ -41,11 +42,18 @@ use std::io::{self, BufRead, Read, Write};
 #[cfg(unix)]
 use std::os::unix::process::ExitStatusExt;
 use std::path::{Path, PathBuf};
-use std::process;
+use std::{env, process};
 use which::which_in;
 
 use crate::errors::*;
 
+static IS_VALID_LINE_RE: Lazy<Regex> = regex_static::lazy_regex!(r"^#\$ (.*)$");
+static IS_ENVVAR_LINE_RE: Lazy<Regex> = regex_static::lazy_regex!(r"^([_A-Z]+)=(.*)$");
+static HAS_SM_IN_NAME_RE: Lazy<Regex> = regex_static::lazy_regex!(r"^(.*).sm_([0-9A-Za-z]+).(.*)$");
+static HAS_COMPUTE_IN_NAME_RE: Lazy<Regex> =
+    regex_static::lazy_regex!(r"^(.*).compute_([0-9A-Za-z]+).(.*)$");
+static ARG_HAS_FILE_WITH_EXTENSION_RE: Lazy<Regex> = regex_static::lazy_regex!(r"-.*=(.*)");
+
 /// A unit struct on which to implement `CCompilerImpl`.
 #[derive(Clone, Debug)]
 pub enum NvccHostCompiler {
@@ -214,18 +222,18 @@ impl CCompilerImpl for Nvcc {
                     .collect::<Vec<_>>(),
             );
             if log_enabled!(Trace) {
-                let output_file_name = &parsed_args
-                    .outputs
-                    .get("obj")
-                    .context("Missing object file output")
-                    .unwrap()
-                    .path
-                    .file_name()
-                    .unwrap();
+                let output_path = cwd.join(
+                    &parsed_args
+                        .outputs
+                        .get("obj")
+                        .context("Missing object file output")
+                        .unwrap()
+                        .path,
+                );
 
                 trace!(
                     "[{}]: dependencies command: {:?}",
-                    output_file_name.to_string_lossy(),
+                    output_path.display(),
                     dependency_cmd
                 );
             }
@@ -245,18 +253,18 @@ impl CCompilerImpl for Nvcc {
                 NvccHostCompiler::Gcc => "-Xcompiler=-P",
             });
             if log_enabled!(Trace) {
-                let output_file_name = &parsed_args
-                    .outputs
-                    .get("obj")
-                    .context("Missing object file output")
-                    .unwrap()
-                    .path
-                    .file_name()
-                    .unwrap();
+                let output_path = cwd.join(
+                    &parsed_args
+                        .outputs
+                        .get("obj")
+                        .context("Missing object file output")
+                        .unwrap()
+                        .path,
+                );
 
                 trace!(
                     "[{}]: preprocessor command: {:?}",
-                    output_file_name.to_string_lossy(),
+                    output_path.display(),
                     preprocess_cmd
                 );
             }
@@ -279,6 +287,7 @@ impl CCompilerImpl for Nvcc {
         cwd: &Path,
         env_vars: &[(OsString, OsString)],
         rewrite_includes_only: bool,
+        hash_key: &str,
     ) -> Result<(
         Box<dyn CompileCommand<T>>,
         Option<dist::CompileCommand>,
@@ -287,20 +296,27 @@ impl CCompilerImpl for Nvcc {
     where
         T: CommandCreatorSync,
     {
-        generate_compile_commands(parsed_args, executable, cwd, env_vars, &self.host_compiler).map(
-            |(command, dist_command, cacheable)| {
-                (CCompileCommand::new(command), dist_command, cacheable)
-            },
+        generate_compile_commands(
+            parsed_args,
+            executable,
+            cwd,
+            env_vars,
+            &self.host_compiler,
+            hash_key,
         )
+        .map(|(command, dist_command, cacheable)| {
+            (CCompileCommand::new(command), dist_command, cacheable)
+        })
     }
 }
 
-pub fn generate_compile_commands(
+fn generate_compile_commands(
     parsed_args: &ParsedArguments,
     executable: &Path,
     cwd: &Path,
     env_vars: &[(OsString, OsString)],
     host_compiler: &NvccHostCompiler,
+    hash_key: &str,
 ) -> Result<(NvccCompileCommand, Option<dist::CompileCommand>, Cacheable)> {
     let mut unhashed_args = parsed_args.unhashed_args.clone();
 
@@ -314,11 +330,7 @@ pub fn generate_compile_commands(
                 .position(|x| x == "-keep-dir" || x == "--keep-dir")
             {
                 let dir = PathBuf::from(unhashed_args[idx + 1].as_os_str());
-                let dir = if dir.is_absolute() {
-                    dir
-                } else {
-                    cwd.join(dir)
-                };
+                let dir = cwd.join(dir);
                 unhashed_args.splice(idx..(idx + 2), []);
                 keep_dir = Some(dir);
                 continue;
@@ -382,12 +394,6 @@ pub fn generate_compile_commands(
         .cloned()
         .collect::<Vec<_>>();
 
-    let temp_dir = tempfile::Builder::new()
-        .prefix("sccache_nvcc")
-        .tempdir()
-        .unwrap()
-        .into_path();
-
     let mut arguments = vec![];
 
     if let Some(lang) = gcc::language_to_gcc_arg(parsed_args.language) {
@@ -401,6 +407,34 @@ pub fn generate_compile_commands(
         .unwrap()
         .path;
 
+    // Build nvcc's internal files in `$TMPDIR/$hash_key` so the paths are
+    // stable across compilations. This is important because this path ends
+    // up in the preprocessed output, so using random tmpdir paths leads to
+    // erroneous cache misses.
+    let out_dir = env::temp_dir().join("sccache_nvcc").join({
+        // Combine `hash_key` with the output path in case
+        // the same file is concurrently built to separate
+        // output paths.
+        let mut m = crate::util::Digest::new();
+        m.update(hash_key.as_bytes());
+        m.update(cwd.join(output).as_os_str().as_encoded_bytes());
+        m.finish()
+    });
+    fs::create_dir_all(&out_dir).ok();
+
+    let compile_flag = match parsed_args.compilation_flag.to_str() {
+        Some("") // compile to executable
+        | Some("-c") | Some("--compile") // compile to object
+        | Some("-dc") | Some("--device-c") // compile to object with -rdc=true
+        | Some("-dw") | Some("--device-w") // compile to object with -rdc=false
+        => NvccCompileFlag::Device,
+        Some("-cubin") | Some("--cubin") => NvccCompileFlag::Cubin,
+        Some("-ptx") | Some("--ptx") => NvccCompileFlag::Ptx,
+        Some("-cuda") | Some("--cuda") => NvccCompileFlag::Preprocess,
+        Some("-fatbin") | Some("--fatbin") => NvccCompileFlag::Fatbin,
+        _ => unreachable!()
+    };
+
     arguments.extend(vec![
         "-o".into(),
         // Canonicalize the output path if the compile flag indicates we won't
@@ -408,18 +442,10 @@ pub fn generate_compile_commands(
         // but we run the host compiler in `cwd` (the dir from which sccache was
         // executed), cicc/ptxas `-o` argument should point at the real out path
         // that's potentially relative to `cwd`.
-        match parsed_args.compilation_flag.to_str() {
-            Some("-c") | Some("--compile") // compile to object
-            | Some("-dc") | Some("--device-c") // compile to object with -rdc=true
-            | Some("-dw") | Some("--device-w") // compile to object with -rdc=false
-            => output.clone().into(),
-            _ => {
-                if output.is_absolute() {
-                    output.clone().into()
-                } else {
-                    cwd.join(output).into()
-                }
-            }
+        if compile_flag == NvccCompileFlag::Device {
+            output.clone().into()
+        } else {
+            cwd.join(output).into()
         },
     ]);
 
@@ -438,28 +464,21 @@ pub fn generate_compile_commands(
     // transforms, its cache key is sensitive to the preprocessor output. The
     // preprocessor embeds the name of the input file in comments, so without
     // canonicalizing here, cicc will get cache misses on otherwise identical
-    // input that should produce a cache hit.
-    arguments.push(
-        (if parsed_args.input.is_absolute() {
-            parsed_args.input.clone()
-        } else {
-            cwd.join(&parsed_args.input).canonicalize().unwrap()
-        })
-        .into(),
-    );
+    // inputs that should produce cache hits.
+    arguments.push(cwd.join(&parsed_args.input).canonicalize().unwrap().into());
 
     let command = NvccCompileCommand {
-        temp_dir,
+        out_dir,
         keep_dir,
         num_parallel,
         executable: executable.to_owned(),
         arguments,
-        compilation_flag: parsed_args.compilation_flag.clone(),
+        compile_flag,
         env_vars,
         cwd: cwd.to_owned(),
         host_compiler: host_compiler.clone(),
         // Only here so we can include it in logs
-        output_file_name: output.file_name().unwrap().to_owned(),
+        output_path: cwd.join(output),
     };
 
     Ok((
@@ -478,18 +497,27 @@ pub fn generate_compile_commands(
     ))
 }
 
+#[derive(Clone, Debug, Eq, PartialEq)]
+enum NvccCompileFlag {
+    Cubin,
+    Device,
+    Fatbin,
+    Preprocess,
+    Ptx,
+}
+
 #[derive(Clone, Debug)]
-pub struct NvccCompileCommand {
-    pub temp_dir: PathBuf,
+struct NvccCompileCommand {
+    pub out_dir: PathBuf,
     pub keep_dir: Option<PathBuf>,
     pub num_parallel: usize,
     pub executable: PathBuf,
     pub arguments: Vec<OsString>,
-    pub compilation_flag: OsString,
+    pub compile_flag: NvccCompileFlag,
     pub env_vars: Vec<(OsString, OsString)>,
     pub cwd: PathBuf,
     pub host_compiler: NvccHostCompiler,
-    pub output_file_name: OsString,
+    pub output_path: PathBuf,
 }
 
 #[async_trait]
@@ -516,58 +544,59 @@ impl CompileCommandImpl for NvccCompileCommand {
         T: CommandCreatorSync,
     {
         let NvccCompileCommand {
-            temp_dir,
+            out_dir,
             keep_dir,
             num_parallel,
             executable,
             arguments,
-            compilation_flag,
+            compile_flag,
             env_vars,
             cwd,
             host_compiler,
-            output_file_name,
+            output_path,
         } = self;
 
-        let nvcc_subcommand_groups = group_nvcc_subcommands_by_compilation_stage(
-            creator,
-            executable,
-            arguments,
-            compilation_flag,
-            cwd,
-            temp_dir.as_path(),
-            keep_dir.clone(),
-            env_vars,
-            host_compiler,
-            output_file_name,
-        )
-        .await?;
-
-        let maybe_keep_temps_then_clean = || {
-            // If the caller passed `-keep` or `-keep-dir`, copy the
-            // temp files to the requested location. We do this because we
-            // override `-keep` and `-keep-dir` in our `nvcc --dryrun` call.
-            let maybe_keep_temps = keep_dir.as_ref().and_then(|dst| {
-                fs::create_dir_all(dst)
-                    .and_then(|_| fs::read_dir(temp_dir))
-                    .and_then(|files| {
-                        files
-                            .filter_map(|path| path.ok())
-                            .filter_map(|path| {
-                                path.file_name()
-                                    .to_str()
-                                    .map(|file| (path.path(), file.to_owned()))
-                            })
-                            .try_fold((), |res, (path, file)| fs::rename(path, dst.join(file)))
-                    })
-                    .ok()
-            });
+        let (mut nvcc_internal_files, nvcc_subcommand_groups) =
+            group_nvcc_subcommands_by_compilation_stage(
+                creator,
+                executable,
+                arguments,
+                compile_flag,
+                cwd,
+                out_dir.as_path(),
+                keep_dir.clone(),
+                env_vars,
+                host_compiler,
+                output_path,
+            )
+            .await?;
 
-            maybe_keep_temps
-                .map_or_else(
-                    || fs::remove_dir_all(temp_dir).ok(),
-                    |_| fs::remove_dir_all(temp_dir).ok(),
-                )
-                .unwrap_or(());
+        let mut maybe_keep_temps_then_clean = || {
+            // Move and/or delete nvcc's internal files.
+            //
+            // If the caller passed `-keep` or `-keep-dir`, copy the internal
+            // files to the requested location. We do this because we override
+            // `-keep` and `-keep-dir` in our `nvcc --dryrun` call.
+            //
+            // Renames the files back to the original names nvcc gave them.
+            if let Some(dst) = keep_dir {
+                if fs::create_dir_all(dst).is_ok() {
+                    nvcc_internal_files
+                        .drain()
+                        .filter_map(|(orig, path)| {
+                            let path = out_dir.join(path);
+                            if path.exists() {
+                                PathBuf::from(orig)
+                                    .file_name()
+                                    .map(|name| (path, name.to_owned()))
+                            } else {
+                                None
+                            }
+                        })
+                        .try_fold((), |_, (src, name)| fs::rename(src, dst.join(name)).ok());
+                }
+            }
+            fs::remove_dir_all(out_dir).ok();
         };
 
         let mut output = process::Output {
@@ -591,7 +620,7 @@ impl CompileCommandImpl for NvccCompileCommand {
         ] {
             for command_groups in command_group_chunks {
                 let results = futures::future::join_all(command_groups.iter().map(|commands| {
-                    run_nvcc_subcommands_group(service, creator, cwd, commands, output_file_name)
+                    run_nvcc_subcommands_group(service, creator, cwd, commands, output_path)
                 }))
                 .await;
 
@@ -629,14 +658,14 @@ async fn group_nvcc_subcommands_by_compilation_stage<T>(
     creator: &T,
     executable: &Path,
     arguments: &[OsString],
-    compilation_flag: &OsStr,
+    compile_flag: &NvccCompileFlag,
     cwd: &Path,
-    tmp: &Path,
+    out: &Path,
     keep_dir: Option<PathBuf>,
     env_vars: &[(OsString, OsString)],
     host_compiler: &NvccHostCompiler,
-    output_file_name: &OsStr,
-) -> Result<Vec<Vec<NvccGeneratedSubcommand>>>
+    output_path: &Path,
+) -> Result<(HashMap<String, String>, Vec<Vec<NvccGeneratedSubcommand>>)>
 where
     T: CommandCreatorSync,
 {
@@ -650,7 +679,7 @@ where
     // All the host compiler invocations are run in the original `cwd` where
     // sccache was invoked. Arguments will be relative to the cwd, except
     // any arguments that reference nvcc-generated files should be absolute
-    // to the temp dir, e.g. `gcc -E [...] x.cu -o /tmp/dir/x.cpp4.ii`
+    // to the temp dir, e.g. `gcc -E [...] x.cu -o /out/dir/x.cpp4.ii`
 
     // Roughly equivalent to:
     // ```shell
@@ -658,7 +687,7 @@ where
     //       | nl -n ln -s ' ' -w 1                                       \
     //       | grep -P    "^[0-9]+ (cicc|ptxas|cudafe|nvlink|fatbinary)") \
     //                                                                    \
-    //       <(nvcc --dryrun --keep --keep-dir /tmp/dir                   \
+    //       <(nvcc --dryrun --keep --keep-dir /out/dir                   \
     //       | nl -n ln -s ' ' -w 1                                       \
     //       | grep -P -v "^[0-9]+ (cicc|ptxas|cudafe|nvlink|fatbinary)") \
     //                                                                    \
@@ -671,30 +700,28 @@ where
     let is_nvcc_exe =
         |exe: &str| matches!(exe, "cicc" | "ptxas" | "cudafe++" | "nvlink" | "fatbinary");
 
-    let (nvcc_commands, host_commands) = futures::future::try_join(
-        // Get the nvcc compile command lines with paths relative to `tmp`
+    let (mut nvcc_commands, mut host_commands) = futures::future::try_join(
+        // Get the nvcc compile command lines with paths relative to `out`
         select_nvcc_subcommands(
             creator,
             executable,
             cwd,
             &mut env_vars_1,
-            keep_dir.is_none(),
             arguments,
             is_nvcc_exe,
             host_compiler,
-            output_file_name,
+            output_path,
         ),
-        // Get the host compile command lines with paths relative to `cwd` and absolute paths to `tmp`
+        // Get the host compile command lines with paths relative to `cwd` and absolute paths to `out`
         select_nvcc_subcommands(
             creator,
             executable,
             cwd,
             &mut env_vars_2,
-            keep_dir.is_none(),
-            &[arguments, &["--keep-dir".into(), tmp.into()][..]].concat(),
+            &[arguments, &["--keep-dir".into(), out.into()][..]].concat(),
             |exe| !is_nvcc_exe(exe),
             host_compiler,
-            output_file_name,
+            output_path,
         ),
     )
     .await?;
@@ -702,19 +729,94 @@ where
     drop(env_vars_2);
     let env_vars = env_vars_1;
 
+    //
+    // Remap nvcc's generated file names to deterministic names.
+    // nvcc generates different file names depending on whether it's compiling one vs. many archs.
+    //
+    // For example, both of these commands generate PTX for sm60, so we should get a cicc cache hit:
+    // 1. `nvcc -x cu -c x.cu -o x.cu.o -gencode=arch=compute_60,code=[compute_60,sm_60]`
+    // 2. `nvcc -x cu -c x.cu -o x.cu.o -gencode=arch=compute_60,code=[sm_60] -gencode=arch=compute_70,code=[compute_70,sm_70]`
+    //
+    // The first command generates:
+    // ```
+    // cicc --gen_c_file_name x.cudafe1.c \
+    //      --stub_file_name x.cudafe1.stub.c \
+    //      --gen_device_file_name x.cudafe1.gpu \
+    //      -o x.ptx
+    // ```
+    //
+    // The second command generates:
+    // ```
+    // cicc --gen_c_file_name x.compute_60.cudafe1.c \
+    //      --stub_file_name x.compute_60.cudafe1.stub.c \
+    //      --gen_device_file_name x.compute_60.cudafe1.gpu \
+    //      -o x.compute_60.ptx
+    // ```
+    //
+    // The second command yields a false-positive cache miss because the names are different.
+    //
+    // This matters because CI jobs will often compile .cu files in "many-arch" mode, but devs
+    // who have just one GPU locally prefer to only compile for their specific GPU arch. It is
+    // preferrable if they can reuse the PTX populated by "many-arch" CI jobs.
+    //
+    // So to avoid this, we detect these "single-arch" compilations and rewrite the names to
+    // match what nvcc generates for "many-arch" compilations.
+    //
+    let mut nvcc_internal_files = HashMap::<String, String>::new();
+    if let Some(arch) = find_last_compute_arch(&nvcc_commands) {
+        host_commands = remap_generated_filenames(
+            &arch,
+            compile_flag,
+            &host_commands,
+            &mut nvcc_internal_files,
+        );
+        nvcc_commands = remap_generated_filenames(
+            &arch,
+            compile_flag,
+            &nvcc_commands,
+            &mut nvcc_internal_files,
+        );
+    }
+
+    let gen_module_id_file_flag = "--gen_module_id_file".to_owned();
+    let gen_c_file_name_flag = "--gen_c_file_name".to_owned();
+    let gen_device_file_name_flag = "--gen_device_file_name".to_owned();
+    let module_id_file_name_flag = "--module_id_file_name".to_owned();
+    let stub_file_name_flag = "--stub_file_name".to_owned();
+
+    let mut cudafe_has_gen_module_id_file_flag = false;
+
     // Now zip the two lists of commands again by sorting on original line index.
     // Transform to tuples that include the dir in which each command should run.
-    let all_commands = nvcc_commands
+    let mut all_commands = nvcc_commands
         .iter()
-        // Run cudafe++, nvlink, cicc, ptxas, and fatbinary in `tmp`
-        .map(|(idx, exe, args)| (idx, tmp, exe, args))
+        // Run cudafe++, nvlink, cicc, ptxas, and fatbinary in `out`
+        .map(|(idx, exe, args)| (idx, out, exe, args))
         .chain(
             host_commands
                 .iter()
                 // Run host preprocessing and compilation steps in `cwd`
                 .map(|(idx, exe, args)| (idx, cwd, exe, args)),
         )
-        .sorted_by(|a, b| Ord::cmp(&a.0, &b.0));
+        .sorted_by(|a, b| Ord::cmp(&a.0, &b.0))
+        .map(|(_, dir, exe, args)| (dir.to_owned(), exe.to_owned(), args.to_owned()))
+        .collect::<Vec<_>>();
+
+    // First pass over commands because in CTK < 12.0, `cudafe++` is at the end of the commands list,
+    // but we need to set `cudafe_has_gen_module_id_file_flag` in order to adjust the cicc commands.
+    for (dir, exe, args) in all_commands.iter_mut() {
+        if let Some("cudafe++") = exe.file_stem().and_then(|s| s.to_str()) {
+            // Fix for CTK < 12.0:
+            // Add `--gen_module_id_file` if the cudafe++ args include `--module_id_file_name`
+            if let Some(idx) = args.iter().position(|x| x == &module_id_file_name_flag) {
+                if !args.contains(&gen_module_id_file_flag) {
+                    // Insert `--gen_module_id_file` just before `--module_id_file_name` to match nvcc behavior
+                    args.splice(idx..idx, [gen_module_id_file_flag.clone()]);
+                }
+            }
+            cudafe_has_gen_module_id_file_flag = args.contains(&gen_module_id_file_flag);
+        }
+    }
 
     // Create groups of commands that should be run sequential relative to each other,
     // but can optionally be run in parallel to other groups if the user requested via
@@ -726,14 +828,11 @@ where
     }
     .to_owned();
 
-    let gen_module_id_file_flag = "--gen_module_id_file".to_owned();
     let mut cuda_front_end_group = Vec::<NvccGeneratedSubcommand>::new();
     let mut final_assembly_group = Vec::<NvccGeneratedSubcommand>::new();
     let mut device_compile_groups = HashMap::<String, Vec<NvccGeneratedSubcommand>>::new();
 
-    for (_, dir, exe, args) in all_commands {
-        let mut args = args.clone();
-
+    for (dir, exe, args) in all_commands.iter_mut() {
         if let (env_vars, cacheable, Some(group)) = match exe.file_stem().and_then(|s| s.to_str()) {
             // fatbinary and nvlink are not cacheable
             Some("fatbinary") | Some("nvlink") => (
@@ -743,19 +842,38 @@ where
             ),
             // cicc and ptxas are cacheable
             Some("cicc") => {
-                match compilation_flag.to_str() {
-                    // Fix for CTK < 12.8:
-                    // If `nvcc` is invoked with `-c` (or any of its variants), remove the
-                    // `--gen_module_id_file` flag. In this mode, we instruct `cudafe++`
-                    // to generate this file, so cicc shouldn't generate it again.
-                    Some("-c") | Some("--compile") | Some("-dc") | Some("--device-c")
-                    | Some("-dw") | Some("--device-w") => {
-                        if let Some(idx) = args.iter().position(|x| x == &gen_module_id_file_flag) {
-                            args.splice(idx..(idx + 1), []);
-                        }
+                // Fix for CTK < 12.0:
+                // Remove `--gen_module_id_file` if cudafe++ already does it
+                if cudafe_has_gen_module_id_file_flag {
+                    if let Some(idx) = args.iter().position(|x| x == &gen_module_id_file_flag) {
+                        args.splice(idx..idx + 1, []);
                     }
-                    _ => {}
                 }
+                // Add these flags if they're missing:
+                // * `--gen_c_file_name test_a.compute_XX.cudafe1.c`
+                // * `--stub_file_name test_a.compute_XX.cudafe1.stub.c`
+                // * `--gen_device_file_name test_a.compute_XX.cudafe1.gpu`
+                //
+                // This ensures the same `cicc` command is generated regardless
+                // of whether the compilation flag is `-c`, `-ptx`, or `-cubin`
+
+                // e.g. test_a.compute_XX.cpp1.ii
+                let mut nidx = args.len() - 3;
+                let name = args[nidx].clone();
+                // test_a.compute_XX.cpp1.ii -> test_a.compute_XX
+                let name = name.split(".cpp1.ii").next().unwrap();
+
+                for (flag, name) in [
+                    (&gen_c_file_name_flag, format!("{name}.cudafe1.c")),
+                    (&stub_file_name_flag, format!("{name}.cudafe1.stub.c")),
+                    (&gen_device_file_name_flag, format!("{name}.cudafe1.gpu")),
+                ] {
+                    if !args.contains(flag) {
+                        args.splice(nidx..nidx, [flag.clone(), name]);
+                        nidx = args.len() - 3;
+                    }
+                }
+
                 let group = device_compile_groups.get_mut(&args[args.len() - 3]);
                 (env_vars.clone(), Cacheable::Yes, group)
             }
@@ -771,31 +889,24 @@ where
                 (env_vars.clone(), Cacheable::Yes, group)
             }
             // cudafe++ _must be_ cached, because the `.module_id` file is unique to each invocation (new in CTK 12.8)
-            Some("cudafe++") => {
-                // Fix for CTK < 12.0:
-                // Add `--gen_module_id_file` if the cudafe++ args include `--module_id_file_name`
-                if !args.contains(&gen_module_id_file_flag) {
-                    if let Some(idx) = args.iter().position(|x| x == "--module_id_file_name") {
-                        // Insert `--gen_module_id_file` just before `--module_id_file_name` to match nvcc behavior
-                        args.splice(idx..idx, [gen_module_id_file_flag.clone()]);
-                    }
-                }
-                (
-                    env_vars.clone(),
-                    Cacheable::Yes,
-                    Some(&mut cuda_front_end_group),
-                )
-            }
+            Some("cudafe++") => (
+                env_vars.clone(),
+                Cacheable::Yes,
+                Some(&mut cuda_front_end_group),
+            ),
             _ => {
                 // All generated host compiler commands include one of these defines.
                 // If one of these isn't present, this command is either a new binary
                 // in the CTK that we don't know about, or a line like `rm x_dlink.reg.c`
                 // that nvcc generates in certain cases.
                 if !args.iter().any(|arg| {
-                    arg.starts_with("-D__CUDACC__")
-                        || arg.starts_with("-D__NVCC__")
-                        || arg.starts_with("-D__CUDA_ARCH__")
+                    arg.starts_with("-D__CUDA_ARCH__")
                         || arg.starts_with("-D__CUDA_ARCH_LIST__")
+                        || arg.starts_with("-D__CUDACC__")
+                        || arg.starts_with("-D__CUDACC_VER")
+                        || arg.starts_with("-D__NVCC__")
+                        || arg.starts_with("-lcudart")
+                        || arg.starts_with("-lcudadevrt")
                 }) {
                     continue;
                 }
@@ -878,7 +989,7 @@ where
             if log_enabled!(log::Level::Trace) {
                 trace!(
                     "[{}]: transformed nvcc command: \"{}\"",
-                    output_file_name.to_string_lossy(),
+                    output_path.display(),
                     [
                         &[format!("cd {} &&", dir.to_string_lossy()).to_string()],
                         &[exe.to_str().unwrap_or_default().to_string()][..],
@@ -892,7 +1003,7 @@ where
             group.push(NvccGeneratedSubcommand {
                 exe: exe.clone(),
                 args: args.clone(),
-                cwd: dir.into(),
+                cwd: dir.to_owned(),
                 env_vars,
                 cacheable,
             });
@@ -905,7 +1016,7 @@ where
     command_groups.extend(device_compile_groups.into_values());
     command_groups.push(final_assembly_group);
 
-    Ok(command_groups)
+    Ok((nvcc_internal_files, command_groups))
 }
 
 #[allow(clippy::too_many_arguments)]
@@ -914,11 +1025,10 @@ async fn select_nvcc_subcommands<T, F>(
     executable: &Path,
     cwd: &Path,
     env_vars: &mut Vec<(OsString, OsString)>,
-    remap_filenames: bool,
     arguments: &[OsString],
     select_subcommand: F,
     host_compiler: &NvccHostCompiler,
-    output_file_name: &OsStr,
+    output_path: &Path,
 ) -> Result<Vec<(usize, PathBuf, Vec<String>)>>
 where
     F: Fn(&str) -> bool,
@@ -927,7 +1037,7 @@ where
     if log_enabled!(log::Level::Trace) {
         trace!(
             "[{}]: nvcc dryrun command: {:?}",
-            output_file_name.to_string_lossy(),
+            output_path.display(),
             [
                 &[executable.to_str().unwrap_or_default().to_string()][..],
                 &dist::osstrings_to_strings(arguments).unwrap_or_default()[..],
@@ -948,11 +1058,6 @@ where
 
     let nvcc_dryrun_output = run_input_output(nvcc_dryrun_cmd, None).await?;
 
-    let mut ext_counts = HashMap::<String, i32>::new();
-    let mut old_to_new = HashMap::<String, String>::new();
-    let is_valid_line_re = Regex::new(r"^#\$ (.*)$").unwrap();
-    let is_envvar_line_re = Regex::new(r"^([_A-Z]+)=(.*)$").unwrap();
-
     let mut dryrun_env_vars = Vec::<(OsString, OsString)>::new();
     let mut dryrun_env_vars_re_map = HashMap::<String, regex::Regex>::new();
 
@@ -966,7 +1071,7 @@ where
     for pair in reader.lines().enumerate() {
         let (idx, line) = pair;
         // Select lines that match the `#$ ` prefix from nvcc --dryrun
-        let line = match select_valid_dryrun_lines(&is_valid_line_re, &line?) {
+        let line = match select_valid_dryrun_lines(&IS_VALID_LINE_RE, &line?) {
             Ok(line) => line,
             // Ignore lines that don't start with `#$ `. For some reason, nvcc
             // on Windows prints the name of the input file without the prefix
@@ -974,7 +1079,7 @@ where
         };
 
         let maybe_exe_and_args = fold_env_vars_or_split_into_exe_and_args(
-            &is_envvar_line_re,
+            &IS_ENVVAR_LINE_RE,
             &mut dryrun_env_vars,
             &mut dryrun_env_vars_re_map,
             cwd,
@@ -982,16 +1087,11 @@ where
             host_compiler,
         )?;
 
-        let (exe, mut args) = match maybe_exe_and_args {
+        let (exe, args) = match maybe_exe_and_args {
             Some(exe_and_args) => exe_and_args,
             _ => continue,
         };
 
-        // Remap nvcc's generated file names to deterministic names
-        if remap_filenames {
-            args = remap_generated_filenames(&args, &mut old_to_new, &mut ext_counts);
-        }
-
         match exe.file_stem().and_then(|s| s.to_str()) {
             None => continue,
             Some(exe_name) => {
@@ -1114,104 +1214,262 @@ fn fold_env_vars_or_split_into_exe_and_args(
     Ok(Some((exe.clone(), args.to_vec())))
 }
 
-fn remap_generated_filenames(
-    args: &[String],
-    old_to_new: &mut HashMap<String, String>,
-    ext_counts: &mut HashMap<String, i32>,
-) -> Vec<String> {
-    args.iter()
-        .map(|arg| {
-            // Special case for MSVC's preprocess output file name flag
-            let arg_is_msvc_preprocessor_output = arg.starts_with("-Fi");
-
-            let arg = if arg_is_msvc_preprocessor_output {
-                arg.trim_start_matches("-Fi").to_owned()
-            } else {
-                arg.to_owned()
-            };
-
-            // If the argument doesn't start with `-` and is a file that
-            // ends in one of the below extensions, rename the file to an
-            // auto-incrementing stable name
-            let maybe_extension = (!arg.starts_with('-'))
-                .then(|| {
-                    [
-                        ".cpp1.ii",
-                        ".cpp4.ii",
-                        ".cudafe1.c",
-                        ".cudafe1.cpp",
-                        ".cudafe1.stub.c",
-                    ]
-                    .iter()
-                    .find(|ext| arg.ends_with(*ext))
-                    .copied()
-                })
-                .unwrap_or(None);
-
-            // If the argument is a file that ends in one of the above extensions:
-            // * If it's our first time seeing this file, create a unique name for it
-            // * If we've seen this file before, lookup its unique name in the hash map
-            //
-            // This ensures stable names are in cudafe++ output and #include directives,
-            // eliminating one source of false-positive cache misses.
-            let arg = match maybe_extension {
-                Some(extension) => {
-                    old_to_new
-                        .entry(arg)
-                        .or_insert_with_key(|arg| {
-                            // Initialize or update the number of files with a given extension:
-                            // compute_70.cudafe1.stub.c -> x_0.cudafe1.stub.c
-                            // compute_60.cudafe1.stub.c -> x_1.cudafe1.stub.c
-                            // etc.
-                            let count = ext_counts
-                                .entry(extension.into())
-                                .and_modify(|c| *c += 1)
-                                .or_insert(0)
-                                .to_string();
-                            // Return `/tmp/dir/x_{count}.{ext}` as the new name, i.e. `/tmp/dir/x_0.cudafe1.stub.c`
-                            PathBuf::from(arg)
-                                .parent()
-                                .unwrap_or(Path::new(""))
-                                // Don't use the count as the first character of the file name, because the file name
-                                // may be used as an identifier (via the __FILE__ macro) and identifiers with leading
-                                // digits are not valid in C/C++, i.e. `x_0.cudafe1.cpp` instead of `0.cudafe1.cpp`.
-                                .join("x_".to_owned() + &count + extension)
-                                .to_string_lossy()
-                                .to_string()
-                        })
-                        .to_owned()
+fn find_last_compute_arch(lines: &[(usize, PathBuf, Vec<String>)]) -> Option<String> {
+    for (_, _, args) in lines.iter().rev() {
+        if let Some(idx) = args.iter().position(|arg| arg == "-arch") {
+            if let Some(val) = args.get(idx + 1) {
+                if let Some((_, arch)) = val.split_once('_') {
+                    return Some(arch.to_owned());
                 }
-                None => {
-                    // If the argument isn't a file name with one of our extensions,
-                    // it may _reference_ files we've renamed. Go through and replace
-                    // all old names with their new stable names.
-                    //
-                    // Sort by string length descending so we don't accidentally replace
-                    // `zzz.cudafe1.cpp` with the new name for `zzz.cudafe1.c`.
-                    //
-                    // For example, if we have these renames:
-                    //
-                    //   compute_70.cudafe1.cpp -> x_0.cudafe1.cpp
-                    //   compute_70.cudafe1.c   -> x_2.cudafe1.c
-                    //
-                    // `compute_70.cudafe1.cpp` should be replaced with `x_0.cudafe1.cpp`, not `x_2.cudafe1.c`
-                    //
-                    let mut arg = arg.clone();
-                    for (old, new) in old_to_new
-                        .iter()
-                        .sorted_by(|a, b| b.0.len().cmp(&a.0.len()))
-                    {
-                        arg = arg.replace(old, new);
-                    }
-                    arg
-                }
-            };
-
-            if arg_is_msvc_preprocessor_output {
-                format!("-Fi{}", arg)
-            } else {
-                arg
             }
+        }
+    }
+    None
+}
+
+fn remap_generated_filenames(
+    last_arch: &str,
+    compile_flag: &NvccCompileFlag,
+    lines: &[(usize, PathBuf, Vec<String>)],
+    nvcc_internal_files: &mut HashMap<String, String>,
+) -> Vec<(usize, PathBuf, Vec<String>)> {
+    let extensions = [
+        "_dlink.fatbin.c",
+        "_dlink.fatbin",
+        "_dlink.o",
+        "_dlink.reg.c",
+        ".cpp1.ii",
+        ".cpp4.ii",
+        ".cubin",
+        ".cudafe1.c",
+        ".cudafe1.cpp",
+        ".cudafe1.gpu",
+        ".cudafe1.stub.c",
+        ".fatbin.c",
+        ".fatbin",
+        ".module_id",
+        ".ptx",
+    ];
+
+    let should_rename = compile_flag != &NvccCompileFlag::Preprocess;
+    let mut extensions_to_rename = vec![
+        ".cpp1.ii",
+        ".cudafe1.c",
+        ".cudafe1.cpp",
+        ".cudafe1.gpu",
+        ".cudafe1.stub.c",
+    ];
+
+    match compile_flag {
+        // Rewrite PTX names if the compile flag is `-cubin`
+        NvccCompileFlag::Cubin => {
+            extensions_to_rename.push(".ptx");
+        }
+        // Rewrite both PTX and cubin names if the compile flag is `-c` or `-fatbin`
+        NvccCompileFlag::Device | NvccCompileFlag::Fatbin => {
+            extensions_to_rename.push(".ptx");
+            extensions_to_rename.push(".cubin");
+        }
+        _ => {}
+    }
+
+    lines
+        .iter()
+        .map(|(idx, exe, args)| {
+            (
+                *idx,
+                exe.clone(),
+                args.iter()
+                    .map(|arg| {
+                        // Special case for MSVC's preprocess output file name flag
+                        let arg_is_msvc_preprocessor_output = arg.starts_with("-Fi");
+
+                        let arg = if arg_is_msvc_preprocessor_output {
+                            arg.trim_start_matches("-Fi").to_owned()
+                        } else {
+                            arg.to_owned()
+                        };
+
+                        // If the argument doesn't start with `-` and is a file that
+                        // ends in one of these extensions, rename the file to a
+                        // stable name that includes the compute architecture.
+                        let maybe_extension = if !arg.starts_with('-') {
+                            extensions.iter().find(|ext| arg.ends_with(*ext)).copied()
+                        } else {
+                            None
+                        };
+
+                        // If the argument is a file that ends in one of the above extensions:
+                        // * If it's our first time seeing this file, compute a stable name for it
+                        // * If we've seen this file before, lookup its stable name in the hash map
+                        //
+                        // This ensures stable names are in cudafe++ output and #include directives,
+                        // eliminating one source of false-positive cache misses.
+                        let arg = match maybe_extension {
+                            // nvcc generates different cubin names under different conditions:
+                            // 1. `x.cubin` with one `-gencode` arg and *not* embedding sm_XX PTX
+                            // 2. `x.sm_XX.cubin` with one `-gencode` arg and embedding sm_XX PTX
+                            // 2. `x.compute_XX.cubin` with multiple `-gencode` args and *not* embedding sm_XX PTX
+                            // 3. `x.compute_XX.sm_XX.cubin` with multiple `-gencode` args and embedding sm_XX PTX
+                            //
+                            // Since the output cubin is identical, rewrite the first three forms
+                            // to match the fourth form so we get more cache hits.
+                            Some(".cubin") => {
+                                nvcc_internal_files
+                                    .entry(arg)
+                                    .or_insert_with_key(|arg| {
+                                        if !should_rename
+                                            || !extensions_to_rename
+                                                .iter()
+                                                .any(|ext| arg.ends_with(*ext))
+                                        {
+                                            return arg.to_owned();
+                                        }
+                                        let mut path = PathBuf::from(arg);
+
+                                        let cubin_arch = path
+                                            .file_name()
+                                            .and_then(|name| name.to_str())
+                                            .and_then(|name| {
+                                                let mut pair = if HAS_SM_IN_NAME_RE.is_match(name) {
+                                                    name.split(".sm_")
+                                                } else if HAS_COMPUTE_IN_NAME_RE.is_match(name) {
+                                                    name.split(".compute_")
+                                                } else {
+                                                    return None;
+                                                };
+                                                // Ignore everything before `.sm_` or `.compute_`
+                                                let _ = pair.next().unwrap();
+                                                // Take everything after `.sm_` or `_.compute`, i.e. `{arch}.cubin`
+                                                let s = pair.next().unwrap();
+                                                // This is the arch number
+                                                let (s, _) = s.split_once('.').unwrap();
+                                                Some(s)
+                                            })
+                                            .unwrap_or(last_arch)
+                                            .to_owned();
+
+                                        // Add the `sm_{arch}` component if necessary
+                                        if let Some(name) = path
+                                            .file_name()
+                                            .and_then(|name| name.to_str())
+                                            .and_then(|name| {
+                                                (!HAS_SM_IN_NAME_RE.is_match(name)).then_some(name)
+                                            })
+                                        {
+                                            // test_a.cubin -> test_a
+                                            // test_a.compute_60.cubin -> (test_a.compute_60, cubin)
+                                            let name = name.strip_suffix(".cubin").unwrap();
+                                            // test_a.cubin -> test_a.sm_60.cubin
+                                            // test_a.compute_60.cubin -> test_a.compute_60.sm_60.cubin
+                                            let name = format!("{name}.sm_{cubin_arch}.cubin");
+                                            path.set_file_name(name);
+                                        }
+
+                                        // Add the `compute_{arch}` component if necessary
+                                        if let Some(name) = path
+                                            .file_name()
+                                            .and_then(|name| name.to_str())
+                                            .and_then(|name| {
+                                                (!HAS_COMPUTE_IN_NAME_RE.is_match(name))
+                                                    .then_some(name)
+                                            })
+                                        {
+                                            // test_a.sm_60.cubin -> test_a
+                                            let ext = format!(".sm_{cubin_arch}.cubin");
+                                            let name = name.strip_suffix(&ext).unwrap();
+                                            // test_a.sm_60.cubin -> test_a.compute_60.sm_60.cubin
+                                            let name = format!("{name}.compute_{cubin_arch}{ext}");
+                                            path.set_file_name(name);
+                                        }
+                                        path.into_os_string().into_string().unwrap()
+                                    })
+                                    .to_owned()
+                            }
+                            Some(ext) => {
+                                nvcc_internal_files
+                                    .entry(arg)
+                                    .or_insert_with_key(|arg| {
+                                        if !should_rename
+                                            || !extensions_to_rename
+                                                .iter()
+                                                .any(|ext| arg.ends_with(*ext))
+                                        {
+                                            return arg.to_owned();
+                                        }
+                                        let mut path = PathBuf::from(arg);
+                                        // Add the `compute_{arch}` component if necessary
+                                        if let Some(name) = path
+                                            .file_name()
+                                            .and_then(|name| name.to_str())
+                                            .and_then(|name| {
+                                                (!HAS_COMPUTE_IN_NAME_RE.is_match(name))
+                                                    .then_some(name)
+                                            })
+                                        {
+                                            // test_a.cudafe1.c -> test_a
+                                            let name = name.strip_suffix(ext).unwrap();
+                                            // test_a.cudafe1.c -> test_a.compute_60.cudafe1.c
+                                            let name = format!("{name}.compute_{last_arch}{ext}");
+                                            path.set_file_name(name);
+                                        }
+                                        path.into_os_string().into_string().unwrap()
+                                    })
+                                    .to_owned()
+                            }
+                            None => {
+                                // If the argument isn't a file name with one of our extensions,
+                                // it may _reference_ files we've renamed. Go through and replace
+                                // all old names with their new stable names.
+                                //
+                                // Sort by string length descending so we don't accidentally replace
+                                // `test_a.cudafe1.cpp` with the new name for `test_a.cudafe1.c`.
+                                //
+                                // For example, if we have these renames:
+                                //
+                                //   test_a.cudafe1.cpp -> test_a.compute_70.cudafe1.cpp
+                                //   test_a.cudafe1.c   -> test_a.compute_70.cudafe1.c
+                                //
+                                // `test_a.cudafe1.cpp` should be replaced with
+                                // `test_a.compute_70.cudafe1.cpp`, not
+                                // `test_a.compute_70.cudafe1.c`
+                                //
+                                let mut arg = arg.clone();
+                                for (old, new) in nvcc_internal_files
+                                    .iter()
+                                    .sorted_by(|a, b| b.0.len().cmp(&a.0.len()))
+                                {
+                                    arg = arg.replace(old, new);
+                                }
+                                arg
+                            }
+                        };
+
+                        if arg_is_msvc_preprocessor_output {
+                            format!("-Fi{}", arg)
+                        } else {
+                            arg
+                        }
+                    })
+                    .collect::<Vec<_>>()
+                    .iter()
+                    .map(|arg| {
+                        // If the argument matches the form `-<arg>=<val>` and `val` is a
+                        // filename or path that ends in one of these extensions, extract
+                        // and track the filename as an internal nvcc file
+                        if let Some(groups) = ARG_HAS_FILE_WITH_EXTENSION_RE.captures(arg.as_str())
+                        {
+                            let (_, [name]) = groups.extract();
+                            if !nvcc_internal_files.contains_key(name)
+                                && extensions.iter().any(|ext| name.ends_with(*ext))
+                            {
+                                nvcc_internal_files.insert(name.to_owned(), name.to_owned());
+                            }
+                        }
+                        arg.to_owned()
+                    })
+                    .collect::<Vec<_>>(),
+            )
         })
         .collect::<Vec<_>>()
 }
@@ -1221,7 +1479,7 @@ async fn run_nvcc_subcommands_group<T>(
     creator: &T,
     cwd: &Path,
     commands: &[NvccGeneratedSubcommand],
-    output_file_name: &OsStr,
+    output_path: &Path,
 ) -> Result<process::Output>
 where
     T: CommandCreatorSync,
@@ -1232,6 +1490,30 @@ where
         stderr: vec![],
     };
 
+    async fn run_subcommand<T>(cmd: &NvccGeneratedSubcommand, creator: &T) -> process::Output
+    where
+        T: CommandCreatorSync,
+    {
+        let NvccGeneratedSubcommand {
+            exe,
+            args,
+            cwd,
+            env_vars,
+            cacheable,
+        } = cmd;
+
+        let mut cmd = creator.clone().new_command_sync(exe);
+
+        cmd.args(args)
+            .current_dir(cwd)
+            .env_clear()
+            .envs(env_vars.to_vec());
+
+        run_input_output(cmd, None)
+            .await
+            .unwrap_or_else(error_to_output)
+    }
+
     for cmd in commands {
         let NvccGeneratedSubcommand {
             exe,
@@ -1244,7 +1526,7 @@ where
         if log_enabled!(log::Level::Trace) {
             trace!(
                 "[{}]: run_commands_sequential cwd={:?}, cmd=\"{}\"",
-                output_file_name.to_string_lossy(),
+                output_path.display(),
                 cwd,
                 [
                     vec![exe.clone().into_os_string().into_string().unwrap()],
@@ -1256,18 +1538,7 @@ where
         }
 
         let out = match cacheable {
-            Cacheable::No => {
-                let mut cmd = creator.clone().new_command_sync(exe);
-
-                cmd.args(args)
-                    .current_dir(cwd)
-                    .env_clear()
-                    .envs(env_vars.to_vec());
-
-                run_input_output(cmd, None)
-                    .await
-                    .unwrap_or_else(error_to_output)
-            }
+            Cacheable::No => run_subcommand(cmd, creator).await,
             Cacheable::Yes => {
                 let srvc = service.clone();
                 let args = dist::strings_to_osstrings(args);
@@ -1278,30 +1549,31 @@ where
                 {
                     Err(err) => error_to_output(err),
                     Ok(compiler) => match compiler.parse_arguments(&args, cwd, env_vars) {
-                        CompilerArguments::NotCompilation => Err(anyhow!("Not compilation")),
-                        CompilerArguments::CannotCache(why, extra_info) => Err(extra_info
-                            .map_or_else(
+                        CompilerArguments::NotCompilation => run_subcommand(cmd, creator).await,
+                        CompilerArguments::CannotCache(why, extra_info) => {
+                            error_to_output(extra_info.map_or_else(
                                 || anyhow!("Cannot cache({}): {:?} {:?}", why, exe, args),
                                 |desc| {
                                     anyhow!("Cannot cache({}, {}): {:?} {:?}", why, desc, exe, args)
                                 },
-                            )),
-                        CompilerArguments::Ok(hasher) => {
-                            srvc.start_compile_task(
+                            ))
+                        }
+                        CompilerArguments::Ok(hasher) => srvc
+                            .start_compile_task(
                                 compiler,
                                 hasher,
                                 args,
                                 cwd.to_owned(),
                                 env_vars
                                     .iter()
+                                    .filter(|(key, _)| key != "SCCACHE_DIRECT")
                                     .chain([("SCCACHE_DIRECT".into(), "false".into())].iter())
                                     .cloned()
                                     .collect::<Vec<_>>(),
                             )
                             .await
-                        }
-                    }
-                    .map_or_else(error_to_output, |res| compile_result_to_output(exe, res)),
+                            .map_or_else(error_to_output, result_to_output),
+                    },
                 }
             }
         };
@@ -1318,10 +1590,10 @@ where
 
 fn aggregate_output(lhs: process::Output, rhs: process::Output) -> process::Output {
     process::Output {
-        status: exit_status(
-            std::cmp::max(status_to_code(lhs.status), status_to_code(rhs.status))
-                as ExitStatusValue,
-        ),
+        status: exit_status(std::cmp::max(
+            !lhs.status.success() as ExitStatusValue,
+            !rhs.status.success() as ExitStatusValue,
+        )),
         stdout: [lhs.stdout, rhs.stdout].concat(),
         stderr: [lhs.stderr, rhs.stderr].concat(),
     }
@@ -1338,48 +1610,18 @@ fn error_to_output(err: Error) -> process::Output {
     }
 }
 
-fn compile_result_to_output(exe: &Path, res: protocol::CompileFinished) -> process::Output {
-    if let Some(signal) = res.signal {
-        return process::Output {
-            status: exit_status(signal as ExitStatusValue),
-            stdout: res.stdout,
-            stderr: [
-                format!(
-                    "{} terminated (signal: {})",
-                    exe.file_stem().unwrap().to_string_lossy(),
-                    signal
-                )
-                .as_bytes(),
-                &res.stderr,
-            ]
-            .concat(),
-        };
-    }
+fn result_to_output(res: protocol::CompileFinished) -> process::Output {
     process::Output {
-        status: exit_status(res.retcode.unwrap_or(0) as ExitStatusValue),
+        status: exit_status(
+            res.retcode
+                .or(res.signal)
+                .map_or(0, |c| (c != 0) as ExitStatusValue),
+        ),
         stdout: res.stdout,
         stderr: res.stderr,
     }
 }
 
-#[cfg(unix)]
-fn status_to_code(res: process::ExitStatus) -> ExitStatusValue {
-    if res.success() {
-        0 as ExitStatusValue
-    } else {
-        res.signal().or(res.code()).unwrap_or(1) as ExitStatusValue
-    }
-}
-
-#[cfg(windows)]
-fn status_to_code(res: process::ExitStatus) -> ExitStatusValue {
-    if res.success() {
-        0 as ExitStatusValue
-    } else {
-        res.code().unwrap_or(1) as ExitStatusValue
-    }
-}
-
 counted_array!(pub static ARGS: [ArgInfo<gcc::ArgData>; _] = [
     //todo: refactor show_includes into dependency_args
     take_arg!("--Werror", OsString, CanBeSeparated('='), PreprocessorArgument),
@@ -1391,6 +1633,7 @@ counted_array!(pub static ARGS: [ArgInfo<gcc::ArgData>; _] = [
     take_arg!("--default-stream", OsString, CanBeSeparated('='), PassThrough),
     flag!("--device-c", DoCompilation),
     flag!("--device-w", DoCompilation),
+    take_arg!("--diag-suppress", OsString, CanBeSeparated('='), PassThrough),
     flag!("--expt-extended-lambda", PreprocessorArgumentFlag),
     flag!("--expt-relaxed-constexpr", PreprocessorArgumentFlag),
     flag!("--extended-lambda", PreprocessorArgumentFlag),
diff --git a/src/compiler/nvhpc.rs b/src/compiler/nvhpc.rs
index 0eb2c924a..67454e63e 100644
--- a/src/compiler/nvhpc.rs
+++ b/src/compiler/nvhpc.rs
@@ -165,6 +165,7 @@ impl CCompilerImpl for Nvhpc {
         cwd: &Path,
         env_vars: &[(OsString, OsString)],
         rewrite_includes_only: bool,
+        _hash_key: &str,
     ) -> Result<(
         Box<dyn CompileCommand<T>>,
         Option<dist::CompileCommand>,
diff --git a/src/compiler/ptxas.rs b/src/compiler/ptxas.rs
index 97e0c1061..4e566fcdd 100644
--- a/src/compiler/ptxas.rs
+++ b/src/compiler/ptxas.rs
@@ -86,6 +86,7 @@ impl CCompilerImpl for Ptxas {
         cwd: &Path,
         env_vars: &[(OsString, OsString)],
         _rewrite_includes_only: bool,
+        _hash_key: &str,
     ) -> Result<(
         Box<dyn CompileCommand<T>>,
         Option<dist::CompileCommand>,
diff --git a/src/compiler/rust.rs b/src/compiler/rust.rs
index 771896ff1..7368cbfe4 100644
--- a/src/compiler/rust.rs
+++ b/src/compiler/rust.rs
@@ -1684,6 +1684,7 @@ impl<T: CommandCreatorSync> Compilation<T> for RustCompilation {
         &self,
         path_transformer: &mut dist::PathTransformer,
         _rewrite_includes_only: bool,
+        _hash_key: &str,
     ) -> Result<(
         Box<dyn CompileCommand<T>>,
         Option<dist::CompileCommand>,
diff --git a/src/compiler/tasking_vx.rs b/src/compiler/tasking_vx.rs
index ce07857be..82d87645d 100644
--- a/src/compiler/tasking_vx.rs
+++ b/src/compiler/tasking_vx.rs
@@ -98,6 +98,7 @@ impl CCompilerImpl for TaskingVX {
         cwd: &Path,
         env_vars: &[(OsString, OsString)],
         _rewrite_includes_only: bool,
+        _hash_key: &str,
     ) -> Result<(
         Box<dyn CompileCommand<T>>,
         Option<dist::CompileCommand>,

From 1103c1651be54e196791d328823fca1cdd6b0b85 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 15 Apr 2025 19:00:26 +0000
Subject: [PATCH 4/9] Add more CUDA multi-device-arch tests

add more multi-arch tests to ensure combining cached/new PTX and cubins doesn't produce corrupted objects
---
 tests/system.rs      | 489 +++++++++++++++++++++++++++++++++++++++----
 tests/test_a.cu      |  21 +-
 tests/test_a_copy.cu |  23 ++
 tests/test_b.cu      |  21 +-
 4 files changed, 502 insertions(+), 52 deletions(-)
 create mode 100644 tests/test_a_copy.cu

diff --git a/tests/system.rs b/tests/system.rs
index 2a9795e95..136b15477 100644
--- a/tests/system.rs
+++ b/tests/system.rs
@@ -194,6 +194,7 @@ const INPUT_ERR: &str = "test_err.c";
 const INPUT_MACRO_EXPANSION: &str = "test_macro_expansion.c";
 const INPUT_WITH_DEFINE: &str = "test_with_define.c";
 const INPUT_FOR_CUDA_A: &str = "test_a.cu";
+const INPUT_FOR_CUDA_A_COPY: &str = "test_a_copy.cu";
 const INPUT_FOR_CUDA_B: &str = "test_b.cu";
 const INPUT_FOR_CUDA_C: &str = "test_c.cu";
 const INPUT_FOR_HIP_A: &str = "test_a.hip";
@@ -735,6 +736,7 @@ fn test_nvcc_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir:
     let mut stats = client.stats().unwrap();
 
     let extra_args = vec![];
+    let with_debug_flags = false;
 
     let Compiler {
         name,
@@ -743,7 +745,10 @@ fn test_nvcc_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir:
     } = compiler;
     println!("test_nvcc_cuda_compiles: {}", name);
     // Compile multiple source files.
-    copy_to_tempdir(&[INPUT_FOR_CUDA_A, INPUT_FOR_CUDA_B], tempdir);
+    copy_to_tempdir(
+        &[INPUT_FOR_CUDA_A, INPUT_FOR_CUDA_A_COPY, INPUT_FOR_CUDA_B],
+        tempdir,
+    );
 
     let build_dir = PathBuf::from("build");
     fs::create_dir_all(tempdir.join(&build_dir)).unwrap();
@@ -798,8 +803,6 @@ fn test_nvcc_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir:
         assert_eq!(
             stats,
             ServerStats {
-                // TODO: Fix this in the next PR
-                cache_errors: stats.cache_errors.clone(),
                 cache_write_duration: stats.cache_write_duration,
                 cache_read_hit_duration: stats.cache_read_hit_duration,
                 compiler_write_duration: stats.compiler_write_duration,
@@ -808,6 +811,39 @@ fn test_nvcc_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir:
         );
     };
 
+    trace!("compile A ptx");
+    run_cuda_test(
+        "-ptx",
+        Path::new(INPUT_FOR_CUDA_A), // relative path for input
+        &build_dir.join("test.ptx"), // relative path for output
+        &extra_args,
+        AdditionalStats {
+            cache_writes: Some(1),
+            compilations: Some(2),
+            compile_requests: Some(1),
+            requests_executed: Some(2),
+            cache_misses: Some(vec![(CCompilerKind::Cicc, Language::Ptx, 1)]),
+            ..Default::default()
+        },
+    );
+
+    trace!("compile A cubin");
+    run_cuda_test(
+        "-cubin",
+        Path::new(INPUT_FOR_CUDA_A), // relative path for input
+        &tempdir.join(&build_dir).join("test.cubin"), // absolute path for output
+        &extra_args,
+        AdditionalStats {
+            cache_writes: Some(1),
+            compilations: Some(2),
+            compile_requests: Some(1),
+            requests_executed: Some(3),
+            cache_hits: Some(vec![(CCompilerKind::Cicc, Language::Ptx, 1)]),
+            cache_misses: Some(vec![(CCompilerKind::Ptxas, Language::Cubin, 1)]),
+            ..Default::default()
+        },
+    );
+
     trace!("compile A");
     run_cuda_test(
         "-c",
@@ -815,99 +851,141 @@ fn test_nvcc_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir:
         &build_dir.join(OUTPUT),     // relative path for output
         &extra_args,
         AdditionalStats {
-            cache_writes: Some(4),
-            compilations: Some(5),
+            cache_writes: Some(3 + with_debug_flags as u64),
+            compilations: Some(4 + with_debug_flags as u64),
             compile_requests: Some(1),
             requests_executed: Some(5),
+            cache_hits: Some(vec![(
+                CCompilerKind::Ptxas,
+                Language::Cubin,
+                !with_debug_flags as u64,
+            )]),
             cache_misses: Some(vec![
-                (CCompilerKind::CudaFE, Language::CudaFE, 1),
                 (CCompilerKind::Cicc, Language::Ptx, 1),
-                (CCompilerKind::Ptxas, Language::Cubin, 1),
                 (CCompilerKind::Nvcc, Language::Cuda, 1),
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+                (
+                    CCompilerKind::Ptxas,
+                    Language::Cubin,
+                    with_debug_flags as u64,
+                ),
             ]),
             ..Default::default()
         },
     );
 
-    trace!("compile A");
+    trace!("compile A (cached)");
     run_cuda_test(
         "-c",
-        Path::new(INPUT_FOR_CUDA_A),            // relative path for input
+        &tempdir.join(INPUT_FOR_CUDA_A), // absolute path for input
         &tempdir.join(&build_dir).join(OUTPUT), // absolute path for output
         &extra_args,
         AdditionalStats {
-            cache_writes: Some(0),
             compilations: Some(1),
             compile_requests: Some(1),
             requests_executed: Some(5),
             cache_hits: Some(vec![
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
                 (CCompilerKind::CudaFE, Language::CudaFE, 1),
                 (CCompilerKind::Cicc, Language::Ptx, 1),
                 (CCompilerKind::Ptxas, Language::Cubin, 1),
-                (CCompilerKind::Nvcc, Language::Cuda, 1),
             ]),
             ..Default::default()
         },
     );
 
-    // By compiling another input source we verify that the pre-processor
-    // phase is correctly running and outputting text
-    trace!("compile B");
+    // Compile a copy of `test_a.cu` to ensure we get cache hits for identical PTX across different files.
+    trace!("compile A (copy)");
     run_cuda_test(
         "-c",
-        &tempdir.join(INPUT_FOR_CUDA_B), // absolute path for input
-        &tempdir.join(&build_dir).join(OUTPUT), // absolute path for output
+        Path::new(INPUT_FOR_CUDA_A_COPY), // relative path for input
+        &build_dir.join(OUTPUT),          // relative path for output
         &extra_args,
+        // Since `test_a_copy.cu` is a copy of `test_a.cu`, its PTX will be identical when *not* using -G.
+        // But -G causes cudafe++ and cicc to embed the source path their output, and we get cache misses.
         AdditionalStats {
-            cache_writes: Some(3),
-            compilations: Some(4),
+            cache_writes: Some(3 + with_debug_flags as u64),
+            compilations: Some(4 + with_debug_flags as u64),
             compile_requests: Some(1),
             requests_executed: Some(5),
-            cache_hits: Some(vec![(CCompilerKind::Ptxas, Language::Cubin, 1)]),
+            cache_hits: Some(vec![(
+                CCompilerKind::Ptxas,
+                Language::Cubin,
+                !with_debug_flags as u64,
+            )]),
             cache_misses: Some(vec![
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
                 (CCompilerKind::CudaFE, Language::CudaFE, 1),
                 (CCompilerKind::Cicc, Language::Ptx, 1),
+                (
+                    CCompilerKind::Ptxas,
+                    Language::Cubin,
+                    with_debug_flags as u64,
+                ),
+            ]),
+            ..Default::default()
+        },
+    );
+
+    trace!("compile A (copy) (cached)");
+    run_cuda_test(
+        "-c",
+        &tempdir.join(INPUT_FOR_CUDA_A_COPY), // absolute path for input
+        &tempdir.join(&build_dir).join(OUTPUT), // absolute path for output
+        &extra_args,
+        AdditionalStats {
+            compilations: Some(1),
+            compile_requests: Some(1),
+            requests_executed: Some(5),
+            cache_hits: Some(vec![
                 (CCompilerKind::Nvcc, Language::Cuda, 1),
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (CCompilerKind::Ptxas, Language::Cubin, 1),
             ]),
             ..Default::default()
         },
     );
 
-    trace!("compile ptx");
+    // By compiling another input source we verify that the pre-processor
+    // phase is correctly running and outputting text
+    trace!("compile B");
     run_cuda_test(
-        "-ptx",
-        Path::new(INPUT_FOR_CUDA_A), // relative path for input
-        &build_dir.join("test.ptx"), // relative path for output
+        "-c",
+        Path::new(INPUT_FOR_CUDA_B), // relative path for input
+        &build_dir.join(OUTPUT),     // relative path for output
         &extra_args,
         AdditionalStats {
-            cache_writes: Some(1),
-            compilations: Some(2),
+            cache_writes: Some(4),
+            compilations: Some(5),
             compile_requests: Some(1),
-            requests_executed: Some(2),
-            cache_misses: Some(vec![(CCompilerKind::Cicc, Language::Ptx, 1)]),
+            requests_executed: Some(5),
+            cache_misses: Some(vec![
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (CCompilerKind::Ptxas, Language::Cubin, 1),
+            ]),
             ..Default::default()
         },
     );
 
-    trace!("compile cubin");
+    trace!("compile B (cached)");
     run_cuda_test(
-        "-cubin",
-        Path::new(INPUT_FOR_CUDA_A), // relative path for input
-        &tempdir.join(&build_dir).join("test.cubin"), // absolute path for output
+        "-c",
+        &tempdir.join(INPUT_FOR_CUDA_B), // absolute path for input
+        &tempdir.join(&build_dir).join(OUTPUT), // absolute path for output
         &extra_args,
         AdditionalStats {
-            cache_writes: Some(1),
-            compilations: Some(2),
+            compilations: Some(1),
             compile_requests: Some(1),
-            requests_executed: Some(3),
+            requests_executed: Some(5),
             cache_hits: Some(vec![
-                // TODO: Fix this in the next PR
-                // (CCompilerKind::Cicc, Language::Ptx, 1),
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+                (CCompilerKind::Cicc, Language::Ptx, 1),
                 (CCompilerKind::Ptxas, Language::Cubin, 1),
             ]),
-            // TODO: Should not be a cache miss.
-            //       Fix this in the next PR
-            cache_misses: Some(vec![(CCompilerKind::Cicc, Language::Ptx, 1)]),
             ..Default::default()
         },
     );
@@ -933,7 +1011,6 @@ int main(int argc, char** argv) {
 }
 ";
     write_source(tempdir, test_2299_src_name, test_2299_cu_src_1);
-    trace!("compile test_2299.cu (1)");
     run_cuda_test(
         "-c",
         Path::new(test_2299_src_name),       // relative path for input
@@ -999,6 +1076,330 @@ int main(int argc, char** argv) {
             ..Default::default()
         },
     );
+
+    // Precompile sm_86 PTX and cubin so their cache entries potentially have a different .module_id file
+    trace!("compile A cubin sm_86");
+    run_cuda_test(
+        "-cubin",
+        Path::new(INPUT_FOR_CUDA_A), // relative path for input
+        &build_dir.join(OUTPUT),     // relative path for output
+        &[
+            extra_args.as_slice(),
+            &["-gencode=arch=compute_86,code=[sm_86]".into()],
+        ]
+        .concat(),
+        AdditionalStats {
+            cache_writes: Some(2),
+            compilations: Some(3),
+            compile_requests: Some(1),
+            requests_executed: Some(3),
+            cache_misses: Some(vec![
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (CCompilerKind::Ptxas, Language::Cubin, 1),
+            ]),
+            ..Default::default()
+        },
+    );
+
+    // Test compiling a file whose PTX yields a cache hit for a cubin from another file (`test_a.cu`)
+    trace!("compile B cubin sm_86");
+    run_cuda_test(
+        "-cubin",
+        Path::new(INPUT_FOR_CUDA_B), // relative path for input
+        &build_dir.join(OUTPUT),     // relative path for output
+        &[
+            extra_args.as_slice(),
+            &["-gencode=arch=compute_86,code=[sm_86]".into()],
+        ]
+        .concat(),
+        AdditionalStats {
+            cache_writes: Some(1 + with_debug_flags as u64),
+            compilations: Some(2 + with_debug_flags as u64),
+            compile_requests: Some(1),
+            requests_executed: Some(3),
+            cache_hits: Some(vec![(
+                CCompilerKind::Ptxas,
+                Language::Cubin,
+                !with_debug_flags as u64,
+            )]),
+            cache_misses: Some(vec![
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (
+                    CCompilerKind::Ptxas,
+                    Language::Cubin,
+                    with_debug_flags as u64,
+                ),
+            ]),
+            ..Default::default()
+        },
+    );
+
+    // Test compiling a multiarch object where the PTX and cubin for one of the archs is cached
+    trace!("compile A sm_80,sm_86");
+    run_cuda_test(
+        "-c",
+        Path::new(INPUT_FOR_CUDA_A), // relative path for input
+        &build_dir.join(OUTPUT),     // relative path for output
+        &[
+            extra_args.as_slice(),
+            &[
+                "-gencode=arch=compute_80,code=[sm_80]".into(),
+                "-gencode=arch=compute_86,code=[compute_86,sm_86]".into(),
+            ],
+        ]
+        .concat(),
+        AdditionalStats {
+            cache_writes: Some(5 + with_debug_flags as u64),
+            compilations: Some(6 + with_debug_flags as u64),
+            compile_requests: Some(1),
+            requests_executed: Some(7),
+            cache_hits: Some(vec![(
+                CCompilerKind::Ptxas,
+                Language::Cubin,
+                !with_debug_flags as u64,
+            )]),
+            cache_misses: Some(vec![
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+                (CCompilerKind::Cicc, Language::Ptx, 2),
+                (
+                    CCompilerKind::Ptxas,
+                    Language::Cubin,
+                    1 + with_debug_flags as u64,
+                ),
+            ]),
+            ..Default::default()
+        },
+    );
+
+    // Test compiling a multiarch object of a different source file, but
+    // whose device code is the same as a previously-compiled files'
+    trace!("compile A (copy) sm_80,sm_86");
+    run_cuda_test(
+        "-c",
+        Path::new(INPUT_FOR_CUDA_A_COPY), // relative path for input
+        &build_dir.join(OUTPUT),          // relative path for output
+        &[
+            extra_args.as_slice(),
+            &[
+                "-gencode=arch=compute_80,code=[sm_80]".into(),
+                "-gencode=arch=compute_86,code=[compute_86,sm_86]".into(),
+            ],
+        ]
+        .concat(),
+        AdditionalStats {
+            cache_writes: Some(4 + 2 * with_debug_flags as u64),
+            compilations: Some(5 + 2 * with_debug_flags as u64),
+            compile_requests: Some(1),
+            requests_executed: Some(7),
+            cache_hits: Some(vec![(
+                CCompilerKind::Ptxas,
+                Language::Cubin,
+                2 * !with_debug_flags as u64,
+            )]),
+            cache_misses: Some(vec![
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+                (CCompilerKind::Cicc, Language::Ptx, 2),
+                (
+                    CCompilerKind::Ptxas,
+                    Language::Cubin,
+                    2 * with_debug_flags as u64,
+                ),
+            ]),
+            ..Default::default()
+        },
+    );
+
+    trace!("compile B sm_80,sm_86");
+    run_cuda_test(
+        "-c",
+        Path::new(INPUT_FOR_CUDA_B), // relative path for input
+        &build_dir.join(OUTPUT),     // relative path for output
+        &[
+            extra_args.as_slice(),
+            &[
+                "-gencode=arch=compute_80,code=[sm_80]".into(),
+                "-gencode=arch=compute_86,code=[compute_86,sm_86]".into(),
+            ],
+        ]
+        .concat(),
+        AdditionalStats {
+            cache_writes: Some(5 + with_debug_flags as u64),
+            compilations: Some(6 + with_debug_flags as u64),
+            compile_requests: Some(1),
+            requests_executed: Some(7),
+            cache_hits: Some(vec![(
+                CCompilerKind::Ptxas,
+                Language::Cubin,
+                !with_debug_flags as u64,
+            )]),
+            cache_misses: Some(vec![
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+                (CCompilerKind::Cicc, Language::Ptx, 2),
+                (
+                    CCompilerKind::Ptxas,
+                    Language::Cubin,
+                    1 + with_debug_flags as u64,
+                ),
+            ]),
+            ..Default::default()
+        },
+    );
+
+    // Test that compiling a single-arch object where the arch is a subset of
+    // a previous multi-arch compilation produces cache hits on the underlying
+    // PTX and cubin compilations.
+    trace!("compile A sm_80");
+    run_cuda_test(
+        "-c",
+        Path::new(INPUT_FOR_CUDA_A), // relative path for input
+        &build_dir.join(OUTPUT),     // relative path for output
+        &[
+            extra_args.as_slice(),
+            &["-gencode=arch=compute_80,code=[compute_80,sm_80]".into()],
+        ]
+        .concat(),
+        AdditionalStats {
+            cache_writes: Some(2),
+            compilations: Some(3),
+            compile_requests: Some(1),
+            requests_executed: Some(5),
+            cache_hits: Some(vec![
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (CCompilerKind::Ptxas, Language::Cubin, 1),
+            ]),
+            cache_misses: Some(vec![
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+            ]),
+            ..Default::default()
+        },
+    );
+
+    trace!("compile B sm_80");
+    run_cuda_test(
+        "-c",
+        Path::new(INPUT_FOR_CUDA_B), // relative path for input
+        &build_dir.join(OUTPUT),     // relative path for output
+        &[
+            extra_args.as_slice(),
+            &["-gencode=arch=compute_80,code=[compute_80,sm_80]".into()],
+        ]
+        .concat(),
+        AdditionalStats {
+            cache_writes: Some(2),
+            compilations: Some(3),
+            compile_requests: Some(1),
+            requests_executed: Some(5),
+            cache_hits: Some(vec![
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (CCompilerKind::Ptxas, Language::Cubin, 1),
+            ]),
+            cache_misses: Some(vec![
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+            ]),
+            ..Default::default()
+        },
+    );
+
+    // Test that compiling a single-arch cubin where the arch is a subset of
+    // a previous multi-arch compilation produces cache hits on the underlying
+    // PTX and cubin compilations.
+    trace!("compile A cubin sm_80");
+    run_cuda_test(
+        "-cubin",
+        &tempdir.join(INPUT_FOR_CUDA_A), // absolute path for input
+        &tempdir.join(&build_dir).join("test.cubin"), // absolute path for output
+        &[
+            extra_args.as_slice(),
+            &["-gencode=arch=compute_80,code=[sm_80]".into()],
+        ]
+        .concat(),
+        AdditionalStats {
+            cache_writes: Some(1 + with_debug_flags as u64),
+            compilations: Some(2 + with_debug_flags as u64),
+            compile_requests: Some(1),
+            requests_executed: Some(3),
+            cache_hits: Some(vec![(
+                CCompilerKind::Ptxas,
+                Language::Cubin,
+                !with_debug_flags as u64,
+            )]),
+            cache_misses: Some(vec![
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (
+                    CCompilerKind::Ptxas,
+                    Language::Cubin,
+                    with_debug_flags as u64,
+                ),
+            ]),
+            ..Default::default()
+        },
+    );
+
+    trace!("compile B cubin sm_80");
+    run_cuda_test(
+        "-cubin",
+        &tempdir.join(INPUT_FOR_CUDA_B), // absolute path for input
+        &tempdir.join(&build_dir).join("test.cubin"), // absolute path for output
+        &[
+            extra_args.as_slice(),
+            &["-gencode=arch=compute_80,code=[sm_80]".into()],
+        ]
+        .concat(),
+        AdditionalStats {
+            cache_writes: Some(1 + with_debug_flags as u64),
+            compilations: Some(2 + with_debug_flags as u64),
+            compile_requests: Some(1),
+            requests_executed: Some(3),
+            cache_hits: Some(vec![(
+                CCompilerKind::Ptxas,
+                Language::Cubin,
+                !with_debug_flags as u64,
+            )]),
+            cache_misses: Some(vec![
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (
+                    CCompilerKind::Ptxas,
+                    Language::Cubin,
+                    with_debug_flags as u64,
+                ),
+            ]),
+            ..Default::default()
+        },
+    );
+
+    // Test that compiling sm80 PTX and assembling as an sm86 cubin
+    // yields a cache hit for the PTX and cache miss for the cubin
+    trace!("compile A compute_80,sm_86");
+    run_cuda_test(
+        "-c",
+        Path::new(INPUT_FOR_CUDA_A), // relative path for input
+        &build_dir.join(OUTPUT),     // relative path for output
+        &[
+            extra_args.as_slice(),
+            &["-gencode=arch=compute_80,code=sm_86".into()],
+        ]
+        .concat(),
+        AdditionalStats {
+            cache_writes: Some(2),
+            compilations: Some(3),
+            compile_requests: Some(1),
+            requests_executed: Some(5),
+            cache_hits: Some(vec![
+                (CCompilerKind::Cicc, Language::Ptx, 1),
+                (CCompilerKind::CudaFE, Language::CudaFE, 1),
+            ]),
+            cache_misses: Some(vec![
+                (CCompilerKind::Nvcc, Language::Cuda, 1),
+                (CCompilerKind::Ptxas, Language::Cubin, 1),
+            ]),
+            ..Default::default()
+        },
+    );
 }
 
 fn test_nvcc_proper_lang_stat_tracking(
@@ -1030,7 +1431,7 @@ fn test_nvcc_proper_lang_stat_tracking(
             exe,
             INPUT_FOR_CUDA_C,
             OUTPUT,
-            Vec::new(),
+            extra_args.clone(),
         ))
         .current_dir(tempdir)
         .envs(env_vars.clone())
@@ -1038,8 +1439,8 @@ fn test_nvcc_proper_lang_stat_tracking(
         .success();
     fs::remove_file(&out_file).unwrap();
 
-    stats.cache_writes += 3;
-    stats.compilations += 4;
+    stats.cache_writes += 4;
+    stats.compilations += 5;
     stats.compile_requests += 1;
     stats.requests_executed += 5;
     stats.non_cacheable_compilations += 1;
@@ -1053,7 +1454,7 @@ fn test_nvcc_proper_lang_stat_tracking(
         .cache_misses
         .increment(&CompilerKind::C(CCompilerKind::Cicc), &Language::Ptx);
     stats
-        .cache_hits
+        .cache_misses
         .increment(&CompilerKind::C(CCompilerKind::Ptxas), &Language::Cubin);
     assert_eq!(
         stats,
diff --git a/tests/test_a.cu b/tests/test_a.cu
index c6d61ec3d..2ad4df746 100644
--- a/tests/test_a.cu
+++ b/tests/test_a.cu
@@ -1,10 +1,23 @@
-
 #include <stdio.h>
 #include "cuda_runtime.h"
 
-__global__ void cuda_entry_point(int*, int*) {}
-__device__ void cuda_device_func(int*, int*) {}
+__device__ void cuda_device_func(int* a) {
+  #  if __CUDA_ARCH__ < 860
+  a[0] = 1;
+  #  else
+  a[0] = 2;
+  #  endif
+}
+
+__global__ void cuda_entry_point(int* a) {
+  cuda_device_func(a);
+}
 
 int main() {
-  printf("%s says hello world\n", __FILE__);
+  int* a;
+  cudaMalloc(&a, sizeof(int));
+  cuda_entry_point<<<1,1>>>(a);
+  int b;
+  cudaMemcpy(&b, a, sizeof(int), cudaMemcpyDeviceToHost);
+  printf("%s says hello world, result=%d\n", __FILE__, b);
 }
diff --git a/tests/test_a_copy.cu b/tests/test_a_copy.cu
new file mode 100644
index 000000000..2ad4df746
--- /dev/null
+++ b/tests/test_a_copy.cu
@@ -0,0 +1,23 @@
+#include <stdio.h>
+#include "cuda_runtime.h"
+
+__device__ void cuda_device_func(int* a) {
+  #  if __CUDA_ARCH__ < 860
+  a[0] = 1;
+  #  else
+  a[0] = 2;
+  #  endif
+}
+
+__global__ void cuda_entry_point(int* a) {
+  cuda_device_func(a);
+}
+
+int main() {
+  int* a;
+  cudaMalloc(&a, sizeof(int));
+  cuda_entry_point<<<1,1>>>(a);
+  int b;
+  cudaMemcpy(&b, a, sizeof(int), cudaMemcpyDeviceToHost);
+  printf("%s says hello world, result=%d\n", __FILE__, b);
+}
diff --git a/tests/test_b.cu b/tests/test_b.cu
index c6d61ec3d..71a877288 100644
--- a/tests/test_b.cu
+++ b/tests/test_b.cu
@@ -1,10 +1,23 @@
-
 #include <stdio.h>
 #include "cuda_runtime.h"
 
-__global__ void cuda_entry_point(int*, int*) {}
-__device__ void cuda_device_func(int*, int*) {}
+__device__ void cuda_device_func(int* a) {
+  #  if __CUDA_ARCH__ < 860
+  a[0] = 3;
+  #  else
+  a[0] = 2;
+  #  endif
+}
+
+__global__ void cuda_entry_point(int* a) {
+  cuda_device_func(a);
+}
 
 int main() {
-  printf("%s says hello world\n", __FILE__);
+  int* a;
+  cudaMalloc(&a, sizeof(int));
+  cuda_entry_point<<<1,1>>>(a);
+  int b;
+  cudaMemcpy(&b, a, sizeof(int), cudaMemcpyDeviceToHost);
+  printf("%s says hello world, result=%d\n", __FILE__, b);
 }

From 3d1ea7a41aac66e9dd9028c2a32eb7d9ba91655e Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 15 Apr 2025 12:13:57 -0700
Subject: [PATCH 5/9] bump GHA runners to ubuntu-22.04 due to ubuntu-20.04
 brown-out and pending deprecation

---
 .github/workflows/ci.yml | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 48d3e107a..87b750428 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,6 +2,17 @@ name: ci
 on: [ push, pull_request ]
 
 jobs:
+  checks:
+    name: checks
+    runs-on: ubuntu-latest
+    outputs:
+      has-codecov-token: ${{ steps.has-codecov-token.outputs.defined }}
+    steps:
+      - id: has-codecov-token
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+        if: "${{ env.CODECOV_TOKEN != '' }}"
+        run: echo "::set-output name=defined::true"
   lint:
     name: ${{ matrix.component }} ${{ matrix.os }}
     runs-on: ${{ matrix.os }}
@@ -101,11 +112,11 @@ jobs:
           # # M1 CPU
           - os: macos-14
           - os: windows-2019
-            cuda: "11.1"
+            cuda: "11.8"
             # Oldest supported version, keep in sync with README.md
             rustc: "1.75.0"
             extra_args: --no-fail-fast
-            extra_desc: cuda11.1
+            extra_desc: cuda11.8
           - os: windows-2019
             cuda: "11.8"
             rustc: nightly
@@ -141,6 +152,7 @@ jobs:
       BINARY_DIR: "target/debug"
       GRCOV_IGNORE_OPTION: '--ignore build.rs --ignore "/*" --ignore "[a-zA-Z]:/*"'
       GRCOV_EXCLUDE_OPTION: '--excl-br-line "^\s*((debug_)?assert(_eq|_ne)?!|#\[derive\()"'
+      SCCACHE_DEBUG: 1
     steps:
       - uses: ilammy/msvc-dev-cmd@v1
 
@@ -282,7 +294,7 @@ jobs:
         if: ${{ !matrix.container }}
 
       - name: Build
-        run: cargo build --locked --release --bin ${{ matrix.binary || 'sccache' }} --target ${{ matrix.target }} --features=openssl/vendored ${{ matrix.extra_args }}
+        run: cargo build --locked --release --bin ${{ matrix.binary || 'sccache' }} --target ${{ matrix.target }} --features=vendored-openssl ${{ matrix.extra_args }}
         env:
           MACOSX_DEPLOYMENT_TARGET: ${{ matrix.macosx_deployment_target }}
           DEVELOPER_DIR: ${{ matrix.developer_dir }}
@@ -322,7 +334,7 @@ jobs:
           mem: 8192
           usesh: true
           sync: rsync
-          copyback: false
+          copyback: true
           prepare: pkg install -y ca_root_nss curl gmake gtar pot sudo
           run: |
             #####################################################################################

From a6fb6adced6e2d72d14f881d306d25ff17d58c72 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 29 Apr 2025 08:43:46 -0700
Subject: [PATCH 6/9] only do coverage steps if repo has codecov token

---
 .github/workflows/ci.yml | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 87b750428..f142b2228 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -70,6 +70,7 @@ jobs:
 
 
   test:
+    needs: [checks]
     name: test ${{ matrix.os }} rust ${{ matrix.rustc || 'stable' }} ${{ matrix.extra_desc }}
     runs-on: ${{ matrix.os }}
     continue-on-error: ${{ matrix.allow_failure || false }}
@@ -83,40 +84,50 @@ jobs:
             rustc: "1.75.0"
             extra_desc: dist-tests
             extra_args: --no-default-features --features=dist-tests test_dist_ -- --test-threads 1
+            no_coverage: ${{ needs.checks.outputs.has-codecov-token != 'true' }}
           - os: ubuntu-22.04
             rustc: stable
             extra_desc: dist-server
             extra_args: --features=dist-server
+            no_coverage: ${{ needs.checks.outputs.has-codecov-token != 'true' }}
           - os: ubuntu-22.04
             rustc: stable
+            no_coverage: ${{ needs.checks.outputs.has-codecov-token != 'true' }}
           - os: ubuntu-22.04
             rustc: beta
+            no_coverage: ${{ needs.checks.outputs.has-codecov-token != 'true' }}
           - os: ubuntu-22.04
             rustc: nightly
             allow_failure: true
             extra_args: --features=unstable
+            no_coverage: ${{ needs.checks.outputs.has-codecov-token != 'true' }}
           - os: ubuntu-22.04
             extra_desc: no-default-features
             extra_args: --no-default-features
             allow_failure: true
+            no_coverage: ${{ needs.checks.outputs.has-codecov-token != 'true' }}
           - os: ubuntu-22.04
             cuda: "11.8"
             extra_desc: cuda11.8
+            no_coverage: ${{ needs.checks.outputs.has-codecov-token != 'true' }}
           - os: ubuntu-24.04
             cuda: "12.8"
             # Oldest supported version, keep in sync with README.md
             rustc: "1.75.0"
             extra_desc: cuda12.8
+            no_coverage: ${{ needs.checks.outputs.has-codecov-token != 'true' }}
           - os: macos-13
             no_coverage: true
           # # M1 CPU
           - os: macos-14
+            no_coverage: ${{ needs.checks.outputs.has-codecov-token != 'true' }}
           - os: windows-2019
             cuda: "11.8"
             # Oldest supported version, keep in sync with README.md
             rustc: "1.75.0"
             extra_args: --no-fail-fast
             extra_desc: cuda11.8
+            no_coverage: ${{ needs.checks.outputs.has-codecov-token != 'true' }}
           - os: windows-2019
             cuda: "11.8"
             rustc: nightly
@@ -128,18 +139,21 @@ jobs:
             cuda: "11.8"
             rustc: beta
             extra_desc: cuda11.8
+            no_coverage: ${{ needs.checks.outputs.has-codecov-token != 'true' }}
           - os: windows-2022
             cuda: "12.8"
             # Oldest supported version, keep in sync with README.md
             rustc: "1.75.0"
             extra_args: --no-fail-fast
             extra_desc: cuda12.8
+            no_coverage: ${{ needs.checks.outputs.has-codecov-token != 'true' }}
           - os: windows-2022
             cuda: "12.8"
             rustc: nightly
             allow_failure: true
             extra_args: --features=unstable
             extra_desc: cuda12.8
+            no_coverage: ${{ needs.checks.outputs.has-codecov-token != 'true' }}
           - os: windows-2022
             cuda: "12.8"
             rustc: beta
@@ -199,7 +213,6 @@ jobs:
           echo '[env]
           LLVM_PROFILE_FILE = { value = "target/debug/coverage/default-%p-%8m.profraw", relative = true }' >> .cargo/config.toml
 
-
       - name: Execute tests
         run: cargo test --locked --all-targets ${{ matrix.extra_args }}
         env:

From 9ac88a8459520269c74dd7c4ddd477754bc06eed Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 29 Apr 2025 08:46:54 -0700
Subject: [PATCH 7/9] remove dead code

---
 src/compiler/cicc.rs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/compiler/cicc.rs b/src/compiler/cicc.rs
index 0e16c3cec..ec8640bfd 100644
--- a/src/compiler/cicc.rs
+++ b/src/compiler/cicc.rs
@@ -119,7 +119,6 @@ where
     let mut take_next = false;
     let mut outputs = HashMap::new();
     let mut extra_dist_files = vec![];
-    // let mut gen_module_id_file = false;
     let mut module_id_file_name = Option::<PathBuf>::None;
 
     let mut common_args = vec![];
@@ -161,7 +160,6 @@ where
                     }
                     Some(GenModuleIdFileFlag) => {
                         take_next = false;
-                        // gen_module_id_file = true;
                         &mut common_args
                     }
                     Some(ModuleIdFileName(o)) => {

From 497c505a4fb995fb01be683a8e29403391e14833 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 29 Apr 2025 15:54:19 -0700
Subject: [PATCH 8/9] revert unrelated changes

---
 src/compiler/gcc.rs  | 19 ++++---------------
 src/compiler/msvc.rs | 16 ++++++++++++----
 2 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/src/compiler/gcc.rs b/src/compiler/gcc.rs
index d4598bdaa..00cd28836 100644
--- a/src/compiler/gcc.rs
+++ b/src/compiler/gcc.rs
@@ -906,21 +906,10 @@ where
             let mut language: Option<String> =
                 language_to_arg(parsed_args.language).map(|lang| lang.into());
             if !rewrite_includes_only {
-                if let CCompilerKind::Nvhpc = kind {
-                    // -x=c|cpp|c++|i|cpp-output|asm|assembler|ASM|assembler-with-cpp|none
-                    // Specify the language for any following input files, instead of letting
-                    // the compiler choose based on suffix. Turn off with -x none
-                    match parsed_args.language {
-                        Language::C | Language::Cxx => language = Some("cpp-output".into()),
-                        Language::GenericHeader | Language::CHeader | Language::CxxHeader => {}
-                        _ => *(language.as_mut()?) = "none".into(),
-                    }
-                } else {
-                    match parsed_args.language {
-                        Language::C => language = Some("cpp-output".into()),
-                        Language::GenericHeader | Language::CHeader | Language::CxxHeader => {}
-                        _ => language.as_mut()?.push_str("-cpp-output"),
-                    }
+                match parsed_args.language {
+                    Language::C => language = Some("cpp-output".into()),
+                    Language::GenericHeader | Language::CHeader | Language::CxxHeader => {}
+                    _ => language.as_mut()?.push_str("-cpp-output"),
                 }
             }
 
diff --git a/src/compiler/msvc.rs b/src/compiler/msvc.rs
index 77c27c81a..83daa8010 100644
--- a/src/compiler/msvc.rs
+++ b/src/compiler/msvc.rs
@@ -972,7 +972,7 @@ where
     let includes_prefix = includes_prefix.to_string();
     let cwd = cwd.to_owned();
 
-    let mut output = run_input_output(cmd, None).await?;
+    let output = run_input_output(cmd, None).await?;
 
     if !is_clang {
         return Ok(output);
@@ -990,8 +990,13 @@ where
         encode_path(&mut f, &parsed_args.input)
             .with_context(|| format!("Couldn't encode input filename: '{:?}'", objfile))?;
         write!(f, " ")?;
+        let process::Output {
+            status,
+            stdout,
+            stderr: stderr_bytes,
+        } = output;
         let stderr =
-            from_local_codepage(&output.stderr).context("Failed to convert preprocessor stderr")?;
+            from_local_codepage(&stderr_bytes).context("Failed to convert preprocessor stderr")?;
         let mut deps = HashSet::new();
         let mut stderr_bytes = vec![];
         for line in stderr.lines() {
@@ -1021,8 +1026,11 @@ where
                 writeln!(f, "{}:", dep)?;
             }
         }
-        output.stderr = stderr_bytes;
-        Ok(output)
+        Ok(process::Output {
+            status,
+            stdout,
+            stderr: stderr_bytes,
+        })
     } else {
         Ok(output)
     }

From 064ca8b8f3fefe1fea49285bf31a437803665973 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 15 Apr 2025 19:47:03 +0000
Subject: [PATCH 9/9] support nvcc --device-debug

---
 src/compiler/clang.rs |  6 ++-
 src/compiler/nvcc.rs  | 63 +++++++++++++++++++++++++++
 tests/system.rs       | 99 ++++++++++++++++++++++++++++++++++---------
 3 files changed, 147 insertions(+), 21 deletions(-)

diff --git a/src/compiler/clang.rs b/src/compiler/clang.rs
index 9689860e8..22c2ae6fb 100644
--- a/src/compiler/clang.rs
+++ b/src/compiler/clang.rs
@@ -413,6 +413,7 @@ mod test {
             "-x",
             "cuda",
             "--cuda-gpu-arch=sm_50",
+            "--cuda-noopt-device-debug",
             "-o",
             "foo.o"
         );
@@ -429,7 +430,10 @@ mod test {
             )
         );
         assert!(a.preprocessor_args.is_empty());
-        assert_eq!(ovec!["--cuda-gpu-arch=sm_50"], a.common_args);
+        assert_eq!(
+            ovec!["--cuda-gpu-arch=sm_50", "--cuda-noopt-device-debug"],
+            a.common_args
+        );
 
         let b = parses!(
             "-c",
diff --git a/src/compiler/nvcc.rs b/src/compiler/nvcc.rs
index 6f6dc303e..648b3453d 100644
--- a/src/compiler/nvcc.rs
+++ b/src/compiler/nvcc.rs
@@ -1632,6 +1632,7 @@ counted_array!(pub static ARGS: [ArgInfo<gcc::ArgData>; _] = [
     flag!("--cubin", DoCompilation),
     take_arg!("--default-stream", OsString, CanBeSeparated('='), PassThrough),
     flag!("--device-c", DoCompilation),
+    flag!("--device-debug", PassThroughFlag),
     flag!("--device-w", DoCompilation),
     take_arg!("--diag-suppress", OsString, CanBeSeparated('='), PassThrough),
     flag!("--expt-extended-lambda", PreprocessorArgumentFlag),
@@ -1664,6 +1665,7 @@ counted_array!(pub static ARGS: [ArgInfo<gcc::ArgData>; _] = [
     take_arg!("--time=", OsString, Concatenated, TooHard),
     take_arg!("--x", OsString, CanBeSeparated('='), Language),
 
+    flag!("-G", PassThroughFlag),
     take_arg!("-Werror", OsString, CanBeSeparated('='), PreprocessorArgument),
     take_arg!("-Xarchive", OsString, CanBeSeparated('='), PassThrough),
     take_arg!("-Xcompiler", OsString, CanBeSeparated('='), PreprocessorArgument),
@@ -2282,4 +2284,65 @@ mod test {
             );
         }
     }
+
+    #[test]
+    fn test_parse_device_debug_flag_short_cu() {
+        let a = parses!(
+            "-x", "cu", "-c", "foo.c", "-G", "-MD", "-MT", "foo.o", "-MF", "foo.o.d", "-o", "foo.o"
+        );
+        assert_eq!(Some("foo.c"), a.input.to_str());
+        assert_eq!(Language::Cuda, a.language);
+        assert_eq!(Some("-c"), a.compilation_flag.to_str());
+        assert_map_contains!(
+            a.outputs,
+            (
+                "obj",
+                ArtifactDescriptor {
+                    path: "foo.o".into(),
+                    optional: false
+                }
+            )
+        );
+        assert_eq!(
+            ovec!["-MD", "-MF", "foo.o.d", "-MT", "foo.o"],
+            a.dependency_args
+        );
+        assert_eq!(ovec!["-G", "-c"], a.common_args);
+    }
+
+    #[test]
+    fn test_parse_device_debug_flag_long_cu() {
+        let a = parses!(
+            "-x",
+            "cu",
+            "-c",
+            "foo.c",
+            "--device-debug",
+            "-MD",
+            "-MT",
+            "foo.o",
+            "-MF",
+            "foo.o.d",
+            "-o",
+            "foo.o"
+        );
+        assert_eq!(Some("foo.c"), a.input.to_str());
+        assert_eq!(Language::Cuda, a.language);
+        assert_eq!(Some("-c"), a.compilation_flag.to_str());
+        assert_map_contains!(
+            a.outputs,
+            (
+                "obj",
+                ArtifactDescriptor {
+                    path: "foo.o".into(),
+                    optional: false
+                }
+            )
+        );
+        assert_eq!(
+            ovec!["-MD", "-MF", "foo.o.d", "-MT", "foo.o"],
+            a.dependency_args
+        );
+        assert_eq!(ovec!["--device-debug", "-c"], a.common_args);
+    }
 }
diff --git a/tests/system.rs b/tests/system.rs
index 136b15477..7f01e84f3 100644
--- a/tests/system.rs
+++ b/tests/system.rs
@@ -732,11 +732,19 @@ struct AdditionalStats {
     cache_misses: Option<Vec<(CCompilerKind, Language, u64)>>,
 }
 
-fn test_nvcc_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir: &Path) {
+fn test_nvcc_cuda_compiles(
+    client: &SccacheClient,
+    compiler: &Compiler,
+    tempdir: &Path,
+    with_debug_flags: bool,
+) {
     let mut stats = client.stats().unwrap();
 
-    let extra_args = vec![];
-    let with_debug_flags = false;
+    let extra_args = if with_debug_flags {
+        vec!["-G".into()]
+    } else {
+        vec![]
+    };
 
     let Compiler {
         name,
@@ -1406,10 +1414,15 @@ fn test_nvcc_proper_lang_stat_tracking(
     client: &SccacheClient,
     compiler: &Compiler,
     tempdir: &Path,
+    with_debug_flags: bool,
 ) {
     let mut stats = client.stats().unwrap();
 
-    let extra_args = vec![];
+    let extra_args = if with_debug_flags {
+        vec!["-G".into()]
+    } else {
+        vec![]
+    };
 
     let Compiler {
         name,
@@ -1576,15 +1589,29 @@ fn test_nvcc_proper_lang_stat_tracking(
     );
 }
 
-fn run_sccache_nvcc_cuda_command_tests(client: &SccacheClient, compiler: Compiler, tempdir: &Path) {
-    test_nvcc_cuda_compiles(client, &compiler, tempdir);
-    test_nvcc_proper_lang_stat_tracking(client, &compiler, tempdir);
+fn run_sccache_nvcc_cuda_command_tests(
+    client: &SccacheClient,
+    compiler: Compiler,
+    tempdir: &Path,
+    with_debug_flags: bool,
+) {
+    test_nvcc_cuda_compiles(client, &compiler, tempdir, with_debug_flags);
+    test_nvcc_proper_lang_stat_tracking(client, &compiler, tempdir, with_debug_flags);
 }
 
-fn test_clang_cuda_compiles(client: &SccacheClient, compiler: &Compiler, tempdir: &Path) {
+fn test_clang_cuda_compiles(
+    client: &SccacheClient,
+    compiler: &Compiler,
+    tempdir: &Path,
+    with_debug_flags: bool,
+) {
     let mut stats = client.stats().unwrap();
 
-    let extra_args = vec![];
+    let extra_args = if with_debug_flags {
+        vec!["-g".into(), "--cuda-noopt-device-debug".into()]
+    } else {
+        vec![]
+    };
 
     let Compiler {
         name,
@@ -1703,9 +1730,16 @@ fn test_clang_proper_lang_stat_tracking(
     client: &SccacheClient,
     compiler: &Compiler,
     tempdir: &Path,
+    with_debug_flags: bool,
 ) {
     let mut stats = client.stats().unwrap();
 
+    let extra_args = if with_debug_flags {
+        vec!["-g".into(), "--cuda-noopt-device-debug".into()]
+    } else {
+        vec![]
+    };
+
     let Compiler {
         name,
         exe,
@@ -1726,7 +1760,7 @@ fn test_clang_proper_lang_stat_tracking(
             "-c",
             INPUT_FOR_CUDA_C,
             OUTPUT,
-            &[],
+            &extra_args,
         ))
         .current_dir(tempdir)
         .envs(env_vars.clone())
@@ -1759,7 +1793,7 @@ fn test_clang_proper_lang_stat_tracking(
             "-c",
             INPUT_FOR_CUDA_C,
             OUTPUT,
-            &[],
+            &extra_args,
         ))
         .current_dir(tempdir)
         .envs(env_vars.clone())
@@ -1784,7 +1818,13 @@ fn test_clang_proper_lang_stat_tracking(
     trace!("compile C++ A");
     client
         .cmd()
-        .args(compile_cmdline(name, exe, INPUT, OUTPUT, Vec::new()))
+        .args(compile_cmdline(
+            name,
+            exe,
+            INPUT,
+            OUTPUT,
+            extra_args.clone(),
+        ))
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
@@ -1810,7 +1850,13 @@ fn test_clang_proper_lang_stat_tracking(
     trace!("compile C++ A");
     client
         .cmd()
-        .args(compile_cmdline(name, exe, INPUT, OUTPUT, Vec::new()))
+        .args(compile_cmdline(
+            name,
+            exe,
+            INPUT,
+            OUTPUT,
+            extra_args.clone(),
+        ))
         .current_dir(tempdir)
         .envs(env_vars.clone())
         .assert()
@@ -1836,9 +1882,10 @@ fn run_sccache_clang_cuda_command_tests(
     client: &SccacheClient,
     compiler: Compiler,
     tempdir: &Path,
+    with_debug_flags: bool,
 ) {
-    test_clang_cuda_compiles(client, &compiler, tempdir);
-    test_clang_proper_lang_stat_tracking(client, &compiler, tempdir);
+    test_clang_cuda_compiles(client, &compiler, tempdir, with_debug_flags);
+    test_clang_proper_lang_stat_tracking(client, &compiler, tempdir, with_debug_flags);
 }
 
 fn test_hip_compiles(client: &SccacheClient, compiler: &Compiler, tempdir: &Path) {
@@ -2303,10 +2350,12 @@ fn test_stats_no_server() {
     );
 }
 
-#[test_case(true ; "with preprocessor cache")]
-#[test_case(false ; "without preprocessor cache")]
+#[test_case(true, false ; "preprocessor_cache=true, device_debug=false")]
+#[test_case(false, false ; "preprocessor_cache=false, device_debug=false")]
+#[test_case(true, true ; "preprocessor_cache=true, device_debug=true")]
+#[test_case(false, true ; "preprocessor_cache=false, device_debug=true")]
 #[cfg(any(unix, target_env = "msvc"))]
-fn test_cuda_sccache_command(preprocessor_cache_mode: bool) {
+fn test_cuda_sccache_command(preprocessor_cache_mode: bool, with_debug_flags: bool) {
     let _ = env_logger::try_init();
     let compilers = find_cuda_compilers();
     println!(
@@ -2325,8 +2374,18 @@ fn test_cuda_sccache_command(preprocessor_cache_mode: bool) {
 
     for compiler in compilers {
         match compiler.name {
-            "nvcc" => run_sccache_nvcc_cuda_command_tests(&client, compiler, &tempdir_path),
-            "clang++" => run_sccache_clang_cuda_command_tests(&client, compiler, &tempdir_path),
+            "nvcc" => run_sccache_nvcc_cuda_command_tests(
+                &client,
+                compiler,
+                &tempdir_path,
+                with_debug_flags,
+            ),
+            "clang++" => run_sccache_clang_cuda_command_tests(
+                &client,
+                compiler,
+                &tempdir_path,
+                with_debug_flags,
+            ),
             _ => {}
         }
     }