add tests

JPatrickPett · JPatrickPett · commit aa3c052fc8a2 · 2025-02-18T18:01:37.000Z
diff --git a/snp2cell/__init__.py b/snp2cell/__init__.py
@@ -1,4 +1,4 @@
-from snp2cell.snp2cell_class import SNP2CELL, NCPU
+from snp2cell.snp2cell_class import SNP2CELL, SUFFIX, NCPU
 from snp2cell import util
 from snp2cell import cli
 from snp2cell import recipes
diff --git a/snp2cell/snp2cell_class.py b/snp2cell/snp2cell_class.py
@@ -34,7 +34,11 @@ class SUFFIX(Enum):
 
 
 class SNP2CELL:
-    def __init__(self, path: Optional[Union[str, os.PathLike]] = None, seed: Optional[int] = RANDOM_SEED) -> None:
+    def __init__(
+        self,
+        path: Optional[Union[str, os.PathLike]] = None,
+        seed: Optional[int] = RANDOM_SEED,
+    ) -> None:
         """
         Initialize the SNP2CELL object.
 
@@ -127,7 +131,7 @@ def _add_de_groups(self, groupby: str, groups: List[str]) -> None:
             shared_keys = set(groups) & set(v)
             if shared_keys:
                 raise ValueError(f"Groups {shared_keys} already exist in {k}")
-        
+
         self.de_groups[groupby] = groups
 
     def _scale_score(
@@ -212,7 +216,7 @@ def _get_perturbed_stats(self, score_key: str, suffix: SUFFIX) -> pd.DataFrame:
         """
         if suffix not in [s.value for s in SUFFIX]:
             raise ValueError(
-                f"Invalid suffix. Must be one of {[s.value for s in SUFFIX]}."
+                f"Invalid suffix. Must be one of {[s.value for s in SUFFIX]}. Got '{suffix}'."
             )
         score = self.scores_rand[score_key]
         if suffix == SUFFIX.ZSCORE:
@@ -407,14 +411,16 @@ def add_grn_from_pandas(self, adjacency_df: pd.DataFrame) -> None:
         """
         raise NotImplementedError("This method is not yet implemented.")
 
-    def add_grn_from_networkx(self, nx_grn: nx.Graph, overwrite: bool = False) -> None:
+    def add_grn_from_networkx(
+        self, nx_grn: Union[nx.Graph, str, Path], overwrite: bool = False
+    ) -> None:
         """
         Add GRN from networkx object to snp2cell object.
 
         Parameters
         ----------
-        nx_grn : nx.Graph
-            Networkx object.
+        nx_grn : Union[nx.Graph, str, Path]
+            Networkx object or path to a pickled networkx object.
         overwrite : bool, optional
             Whether to overwrite existing networkx object, by default False.
 
@@ -423,7 +429,7 @@ def add_grn_from_networkx(self, nx_grn: nx.Graph, overwrite: bool = False) -> No
         IndexError
             If existing scores are found and overwrite is False.
         """
-        if self.scores and not overwrite:
+        if self.scores is not None and not overwrite:
             raise IndexError(
                 "existing scores found, set overwrite=True to discard them."
             )
@@ -506,7 +512,12 @@ def add_score(
                 )
             self.scores_prop[score_key] = self.scores_prop.index.map(p_scr_dct)  # type: ignore
             if statistics:
-                self.rand_sim(score_key=score_key, num_cores=num_cores, n=num_rand, reset_seed=reset_seed)
+                self.rand_sim(
+                    score_key=score_key,
+                    num_cores=num_cores,
+                    n=num_rand,
+                    reset_seed=reset_seed,
+                )
                 self.add_score_statistics(score_keys=score_key)
         self._defrag_pandas()
 
@@ -528,7 +539,7 @@ def propagate_score(self, score_key: str = "score") -> Tuple[str, Dict[str, floa
         scr_dct = self.scores[score_key].to_dict()  # type: ignore
         p_scr_dct = self._prop_scr(scr_dct)
         return score_key, p_scr_dct
-   
+
     @add_logger()
     def propagate_scores(
         self,
@@ -992,33 +1003,79 @@ def remove_scores(self, which: str = "propagated", **kwargs: Any) -> None:
         """
         Delete selected scores from the object.
 
+        By default, this will delete all scores of the selected type.
+        Set `**kwargs` to select specific columns to delete.
+
         Parameters
         ----------
         which : str, optional
-            Type of scores to retrieve. Can be "original" (before propagation), "propagated" (after propagation) or "perturbed" (random permutations), by default "propagated".
+            Type of scores to delete. Can be "original" (before propagation), "propagated" (after propagation), "perturbed" (random permutations) or "all" (all scores), by default "propagated".
         kwargs : Any
-            Options passed to `pd.filter(**kwargs)` for selecting columns to DROP.
+            Options passed to `pd.filter(**kwargs)` for selecting columns to DROP. If not set, all columns will be dropped.
+            Set `items=[]` to drop columns by name, `like=""` to drop columns by partial name, `regex=""` to drop columns by regex.
         """
         self._check_init()
-        if which == "perturbed":
-            self.scores_rand = {}
-        elif which == "all":
+        if which == "all":
             self._init_scores()
+        elif which == "perturbed":
+            if kwargs:
+                # remove columns from random / perturbed scores
+                keys_to_remove = self.scores_rand.keys()
+                if "items" in kwargs:
+                    keys_to_remove = kwargs["items"]
+                elif "like" in kwargs:
+                    keys_to_remove = [
+                        k for k in self.scores_rand if kwargs["like"] in k
+                    ]
+                elif "regex" in kwargs:
+                    keys_to_remove = [
+                        k for k in self.scores_rand if re.search(kwargs["regex"], k)
+                    ]
+                for key in keys_to_remove:
+                    self.scores_rand.pop(key, None)
+            else:
+                self.scores_rand = {}
         elif which == "propagated":
             if kwargs:
+                # remove columns from propagated scores
                 cols = self.scores_prop.filter(**kwargs).columns  # type: ignore
                 self.scores_prop = self.scores_prop.drop(columns=cols)  # type: ignore
+
+                # also remove columns from random / perturbed scores
+                for key in cols:
+                    if key in self.scores_rand:
+                        self.scores_rand.pop(key, None)
             else:
                 self.scores_prop = pd.DataFrame(index=list(self.grn.nodes))  # type: ignore
+                self.scores_rand = {}
         elif which == "original":
             if kwargs:
+                # remove columns from original scores
                 cols = self.scores.filter(**kwargs).columns  # type: ignore
                 self.scores = self.scores.drop(columns=cols)  # type: ignore
                 for k in self.de_groups:
                     self.de_groups[k] = [i for i in self.de_groups[k] if i not in cols]
+
+                # also remove columns from propagated scores
+                self.scores_prop = self.scores_prop.drop(columns=cols, errors="ignore")  # type: ignore
+                stat_cols = [
+                    col
+                    for col in self.scores_prop.columns
+                    if any(col.startswith(f"{c}__") for c in cols)
+                ]
+                self.scores_prop = self.scores_prop.drop(columns=stat_cols, errors="ignore")  # type: ignore
+
+                # also remove columns from random / perturbed scores
+                for key in cols:
+                    if key in self.scores_rand:
+                        self.scores_rand.pop(key, None)
             else:
                 self.scores = pd.DataFrame(index=list(self.grn.nodes))  # type: ignore
                 self.de_groups = {}
+                self.scores_prop = pd.DataFrame(index=list(self.grn.nodes))  # type: ignore
+                self.scores_rand = {}
+        else:
+            raise ValueError(f"unknown score type: {which}")
 
     def get_components(self, sel_nodes: List[str]) -> Tuple[nx.Graph, List[set]]:
         """
@@ -1215,5 +1272,11 @@ def plot_group_heatmap(
         sns.heatmap(plt_df, cmap="mako", yticklabels=False)
 
 
-SNP2CELL._get_perturbed_stats.__doc__ = SNP2CELL._get_perturbed_stats.__doc__.format(_SUFFIX_=str([e.value for e in SUFFIX]))
-SNP2CELL.adata_combine_de_scores.__doc__ = SNP2CELL.adata_combine_de_scores.__doc__.format(_SUFFIX_=str([e.value for e in SUFFIX]))
+SNP2CELL._get_perturbed_stats.__doc__ = SNP2CELL._get_perturbed_stats.__doc__.format(
+    _SUFFIX_=str([e.value for e in SUFFIX])
+)
+SNP2CELL.adata_combine_de_scores.__doc__ = (
+    SNP2CELL.adata_combine_de_scores.__doc__.format(
+        _SUFFIX_=str([e.value for e in SUFFIX])
+    )
+)
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -3,7 +3,11 @@
 import pandas as pd
 import networkx as nx
 import scanpy as sc
+from snp2cell.snp2cell_class import SNP2CELL
 
+@pytest.fixture
+def snp2cell_instance():
+    return SNP2CELL(seed=42)
 
 @pytest.fixture(scope="session")
 def fake_grn():
diff --git a/tests/test_snp2cell.py b/tests/test_snp2cell.py
@@ -0,0 +1,196 @@
+import pytest
+import networkx as nx
+import pandas as pd
+import numpy as np
+import snp2cell
+
+snp2cell.util.set_num_cpu(1)
+
+
+def test_initialization_with_path(snp2cell_instance, tmp_path):
+    # Create a temporary file to simulate the path
+    path = tmp_path / "test_data.pkl"
+    snp2cell_instance.save_data(path=str(path))
+
+    s2c = snp2cell.SNP2CELL(path=str(path), seed=42)
+    assert s2c is not None, "snp2cell object was not created"
+    assert s2c.grn is None, "GRN should be None"
+    assert s2c.adata is None, "AnnData should be None"
+    assert s2c.scores is None, "Scores should be None"
+
+
+def test_init_scores(snp2cell_instance):
+    G = nx.Graph()
+    G.add_edges_from([(1, 2), (2, 3)])
+    snp2cell_instance._set_grn(G)
+    snp2cell_instance._init_scores()
+    assert snp2cell_instance.scores is not None, "Scores should be initialized"
+    assert (
+        snp2cell_instance.scores_prop is not None
+    ), "Propagated scores should be initialized"
+    assert snp2cell_instance.scores_rand == {}, "Random scores should be initialized"
+    assert snp2cell_instance.de_groups == {}, "DE groups should be initialized"
+
+
+def test_set_grn(snp2cell_instance):
+    G = nx.Graph()
+    G.add_edges_from([(1, 2), (2, 3)])
+    snp2cell_instance._set_grn(G)
+    assert snp2cell_instance.grn is not None, "GRN should be set"
+    assert list(snp2cell_instance.grn.edges) == [
+        (1, 2),
+        (2, 3),
+    ], "GRN edges should match"
+
+
+def test_add_de_groups(snp2cell_instance):
+    snp2cell_instance._add_de_groups("group1", ["A", "B"])
+    assert "group1" in snp2cell_instance.de_groups, "Group1 should be added"
+    assert snp2cell_instance.de_groups["group1"] == [
+        "A",
+        "B",
+    ], "Group1 values should match"
+
+    with pytest.raises(ValueError):
+        snp2cell_instance._add_de_groups("group1", ["C"])
+
+    snp2cell_instance._add_de_groups("group2", ["C"])
+    with pytest.raises(ValueError):
+        snp2cell_instance._add_de_groups("group2", ["A"])
+
+
+def test_get_perturbed_stats(snp2cell_instance):
+    snp2cell_instance.scores_rand["test_key"] = pd.DataFrame(np.random.randn(10, 3))
+
+    for suffix in snp2cell.SUFFIX:
+        result = snp2cell_instance._get_perturbed_stats("test_key", suffix.value)
+        assert isinstance(result, pd.DataFrame), "Result should be a DataFrame"
+
+
+def test_robust_z_score():
+    series = pd.Series([1, 2, 3, 4, 5])
+    result = snp2cell.SNP2CELL._robust_z_score(series)
+    assert isinstance(result, pd.Series), "Result should be a Series"
+    assert len(result) == 5, "Result length should match input length"
+
+
+def test_get_scores(snp2cell_instance):
+    # Add some scores to the instance
+    snp2cell_instance.add_grn_from_networkx(nx.from_edgelist([(1, 2), (2, 3)]))
+    snp2cell_instance.scores = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+    snp2cell_instance.scores_prop = pd.DataFrame({"A": [7, 8, 9], "B": [10, 11, 12]})
+    snp2cell_instance.scores_rand = {"test_key": pd.DataFrame(np.random.randn(10, 3))}
+
+    # Test retrieving original and propagated scores
+    for which in ["original", "propagated"]:
+        scores = snp2cell_instance.get_scores(which=which)
+        assert scores is not None, "Scores should be retrieved"
+        assert isinstance(scores, pd.DataFrame), "Scores should be a DataFrame"
+        assert "A" in scores.columns, "Scores should have column 'A'"
+        assert "B" in scores.columns, "Scores should have column 'B'"
+
+    # Test retrieving perturbed scores
+    scores = snp2cell_instance.get_scores(which="perturbed")
+    assert scores is not None, "Scores should be retrieved"
+    assert isinstance(scores, dict), "Scores should be a dictionary"
+    assert "test_key" in scores, "Scores should have key 'test_key'"
+    assert isinstance(scores["test_key"], pd.DataFrame), "Scores should be a DataFrame"
+
+    # Test retrieving with query
+    scores = snp2cell_instance.get_scores(which="propagated", query="A > 7")
+    assert len(scores) == 2, "Query should filter the DataFrame"
+
+    # Test retrieving with sort_key
+    scores = snp2cell_instance.get_scores(which="propagated", sort_key="A")
+    assert scores.iloc[0]["A"] == 9, "Scores should be sorted in descending order"
+
+
+def test_remove_scores(snp2cell_instance):
+    snp2cell_instance.add_grn_from_networkx(nx.from_edgelist([(1, 2), (2, 3)]))
+
+    # Add some scores to the instance
+    snp2cell_instance.scores = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+    snp2cell_instance.scores_prop = pd.DataFrame(
+        {"A": [7, 8, 9], "A__pval": [7, 8, 9], "B": [10, 11, 12]}
+    )
+    snp2cell_instance.scores_rand = {"A": pd.DataFrame(np.random.randn(10, 3))}
+
+    # Test removing non-existing scores (should not raise an error)
+    snp2cell_instance.remove_scores(which="original", items=["C"])
+    assert snp2cell_instance.scores is not None, "Original scores should not be removed"
+    assert (
+        snp2cell_instance.scores.shape[1] == 2
+    ), "Original scores should not be removed"
+    assert (
+        snp2cell_instance.scores_prop is not None
+    ), "Propagated scores should not be removed"
+    assert (
+        snp2cell_instance.scores_prop.shape[1] == 3
+    ), "Propagated scores should not be removed"
+    assert "A" in snp2cell_instance.scores_rand, "Random scores should not be removed"
+
+    # Test removing original scores
+    snp2cell_instance.remove_scores(which="original", items=["A"])
+    assert (
+        "A" not in snp2cell_instance.scores.columns
+    ), "Original scores should be removed"
+    assert (
+        "A" not in snp2cell_instance.scores_prop.columns
+    ), "Propagated scores should also be removed"
+    assert (
+        "A__pval" not in snp2cell_instance.scores_prop.columns
+    ), "Corresponding statistics should also be removed"
+    assert (
+        "A" not in snp2cell_instance.scores_rand
+    ), "Random scores should also be removed"
+
+    # Add scores to the instance
+    snp2cell_instance.scores = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+    snp2cell_instance.scores_prop = pd.DataFrame(
+        {"A": [7, 8, 9], "A__pval": [7, 8, 9], "B": [10, 11, 12]}
+    )
+    snp2cell_instance.scores_rand = {"A": pd.DataFrame(np.random.randn(10, 3))}
+
+    # Test removing propagated scores
+    snp2cell_instance.remove_scores(which="propagated", items=["A"])
+    assert (
+        "A" in snp2cell_instance.scores.columns
+    ), "Original scores should not be removed"
+    assert (
+        "A" not in snp2cell_instance.scores_prop.columns
+    ), "Propagated scores should be removed"
+    assert (
+        "A" not in snp2cell_instance.scores_rand
+    ), "Random scores should also be removed"
+
+    # Add scores to the instance
+    snp2cell_instance.scores = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+    snp2cell_instance.scores_prop = pd.DataFrame(
+        {"A": [7, 8, 9], "A__pval": [7, 8, 9], "B": [10, 11, 12]}
+    )
+    snp2cell_instance.scores_rand = {"A": pd.DataFrame(np.random.randn(10, 3))}
+
+    # Test removing random scores
+    snp2cell_instance.remove_scores(which="perturbed", items=["A"])
+    assert (
+        "A" in snp2cell_instance.scores.columns
+    ), "Original scores should not be removed"
+    assert (
+        "A" in snp2cell_instance.scores_prop.columns
+    ), "Propagated scores should not be removed"
+    assert "A" not in snp2cell_instance.scores_rand, "Random scores should be removed"
+
+    # Test removing all propagated scores
+    snp2cell_instance.remove_scores(which="propagated")
+    assert (
+        snp2cell_instance.scores_prop.shape[1] == 0
+    ), "All propagated scores should be removed"
+    assert (
+        len(snp2cell_instance.scores_rand) == 0
+    ), "All random scores should also be removed"
+
+    # Test removing all original scores
+    snp2cell_instance.remove_scores(which="original")
+    assert (
+        snp2cell_instance.scores.shape[1] == 0
+    ), "All original scores should be removed"

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-from snp2cell.snp2cell_class import SNP2CELL, NCPU`
	`1`	`+from snp2cell.snp2cell_class import SNP2CELL, SUFFIX, NCPU`
`2`	`2`	`from snp2cell import util`
`3`	`3`	`from snp2cell import cli`
`4`	`4`	`from snp2cell import recipes`