From 460da8c8aad61a8222f67fac6ffff6264756b895 Mon Sep 17 00:00:00 2001 From: Erin Howard Date: Wed, 28 May 2025 01:14:39 -0700 Subject: [PATCH] Make raw deletion more thorough. --- python/activator/activator.py | 2 +- python/activator/middleware_interface.py | 27 ++++++++++-------------- tests/test_middleware_interface.py | 2 +- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/python/activator/activator.py b/python/activator/activator.py index b59247fe..4d87174d 100644 --- a/python/activator/activator.py +++ b/python/activator/activator.py @@ -878,7 +878,7 @@ def process_visit(expected_visit: FannedOutVisit): _get_local_repo().name, _get_local_cache()) # TODO: pipeline execution requires a clean run until DM-38041. - cleanups.callback(mwi.clean_local_repo, expid_set) + cleanups.callback(mwi.clean_local_repo) # Copy calibrations for this detector/visit mwi.prep_butler() diff --git a/python/activator/middleware_interface.py b/python/activator/middleware_interface.py index c7dfdfa8..a5817094 100644 --- a/python/activator/middleware_interface.py +++ b/python/activator/middleware_interface.py @@ -1720,28 +1720,23 @@ def _query_datasets_by_storage_class(self, butler, exposure_ids, collections, st ) for t in matching_types ) - def clean_local_repo(self, exposure_ids: set[int]) -> None: + def clean_local_repo(self) -> None: """Remove local repo content that is only needed for a single visit. This includes raws and pipeline outputs. - - Parameter - --------- - exposure_ids : `set` [`int`] - Identifiers of the exposures to be removed. """ with lsst.utils.timer.time_this(_log, msg="clean_local_repo", level=logging.DEBUG): self.butler.registry.refresh() - if exposure_ids: - raws = self.butler.query_datasets( - 'raw', - collections=self.instrument.makeDefaultRawIngestRunName(), - where=f"exposure in ({', '.join(str(x) for x in exposure_ids)})", - explain=False, # Raws might not have been ingested. - instrument=self.visit.instrument, - detector=self.visit.detector, - ) - self.butler.pruneDatasets(raws, disassociate=True, unstore=True, purge=True) + # Clean out raws + raws = self.butler.query_datasets( + 'raw', + collections=self.instrument.makeDefaultRawIngestRunName(), + explain=False, # Raws might not have been ingested. + instrument=self.visit.instrument, + detector=self.visit.detector, + ) + self.butler.pruneDatasets(raws, disassociate=True, unstore=True, purge=True) + # Outputs are all in their own runs, so just drop them. preload_run = runs.get_preload_run(self.instrument, self._deployment, self._day_obs) _remove_run_completely(self.butler, preload_run) diff --git a/tests/test_middleware_interface.py b/tests/test_middleware_interface.py index b517e045..13801f15 100644 --- a/tests/test_middleware_interface.py +++ b/tests/test_middleware_interface.py @@ -1019,7 +1019,7 @@ def test_clean_local_repo(self): self._assert_in_collection(butler, "*", "bias", calib_data_id_2) self._assert_in_collection(butler, "*", "bias", calib_data_id_3) - self.interface.clean_local_repo({raw_data_id["exposure"]}) + self.interface.clean_local_repo() self._assert_not_in_collection(butler, "*", "raw", raw_data_id) self._assert_not_in_collection(butler, "*", "src", processed_data_id) self._assert_not_in_collection(butler, "*", "calexp", processed_data_id)