scverse · flying-sheep · Mar 7, 2025 · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,14 @@
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+max_line_length = 88
+indent_size = 4
+indent_style = space
+
+[*.toml]
+indent_size = 2
+max_line_length = 120
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -33,5 +33,6 @@ repos:
           - dask
           - zarr
           - h5py
+          - anndata
 ci:
   skip: [mypy]  # too big
diff --git a/.taplo.toml b/.taplo.toml
@@ -0,0 +1,5 @@
+[formatting]
+array_auto_collapse = false
+column_width = 120
+compact_arrays = false
+indent_string = '  '
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,12 +1,20 @@
 {
-    "python.testing.pytestArgs": ["-vv", "--color=yes"],
-    "python.testing.pytestEnabled": true,
+    "[toml][json][jsonc][python]": {
+        "editor.formatOnSave": true,
+    },
+    "[toml]": {
+        "editor.defaultFormatter": "tamasfe.even-better-toml",
+    },
+    "[json][jsonc]": {
+        "editor.defaultFormatter": "biomejs.biome",
+    },
     "[python]": {
         "editor.defaultFormatter": "charliermarsh.ruff",
-        "editor.formatOnSave": true,
         "editor.codeActionsOnSave": {
             "source.fixAll": "explicit",
             "source.organizeImports": "explicit",
         },
     },
+    "python.testing.pytestArgs": ["-vv", "--color=yes"],
+    "python.testing.pytestEnabled": true,
 }
diff --git a/biome.jsonc b/biome.jsonc
@@ -1,9 +1,6 @@
 {
-    "$schema": "https://biomejs.dev/schemas/1.8.3/schema.json",
-    "formatter": {
-        "indentStyle": "space",
-        "indentWidth": 4,
-    },
+    "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
+    "formatter": { "useEditorconfig": true },
     "overrides": [
         {
             "include": ["./.vscode/*.json", "**/*.jsonc"],

diff --git a/docs/index.rst b/docs/index.rst
@@ -22,10 +22,3 @@
 
 .. automodule:: fast_array_utils.stats
    :members:
-
-
-``fast_array_utils.types``
---------------------------
-
-.. automodule:: fast_array_utils.types
-   :members: OutOfCoreDataset
diff --git a/pyproject.toml b/pyproject.toml
@@ -33,7 +33,14 @@ optional-dependencies.doc = [
 ]
 optional-dependencies.full = [ "dask", "fast-array-utils[sparse]", "h5py", "zarr" ]
 optional-dependencies.sparse = [ "scipy>=1.8" ]
-optional-dependencies.test = [ "coverage[toml]", "pytest", "pytest-codspeed" ]
+optional-dependencies.test = [
+  "anndata",
+  "coverage[toml]",
+  "packaging",
+  "pytest",
+  "pytest-codspeed",
+  "zarr<3",          # anndata needs this
+]
 urls.'Documentation' = "https://icb-fast-array-utils.readthedocs-hosted.com/"
 urls.'Issue Tracker' = "https://github.com/scverse/fast-array-utils/issues"
 urls.'Source Code' = "https://github.com/scverse/fast-array-utils"

diff --git a/src/fast_array_utils/conv/_to_dense.py b/src/fast_array_utils/conv/_to_dense.py
@@ -2,46 +2,29 @@
 from __future__ import annotations
 
 from functools import singledispatch
-from typing import TYPE_CHECKING, overload
+from typing import TYPE_CHECKING, cast, overload
 
 import numpy as np
 
 from .. import types
 
 
 if TYPE_CHECKING:
-    from typing import Any, Literal
+    from typing import Any, Literal, TypeAlias
 
     from numpy.typing import NDArray
 
-    Array = (
-        NDArray[Any]
-        | types.CSBase
-        | types.CupyArray
-        | types.CupySparseMatrix
-        | types.DaskArray
-        | types.OutOfCoreDataset[Any]
-        | types.H5Dataset
-        | types.ZarrArray
+    MemDiskArray: TypeAlias = (
+        NDArray[Any] | types.CSBase | types.H5Dataset | types.ZarrArray | types.CSDataset
     )
+    Array: TypeAlias = MemDiskArray | types.CupyArray | types.CupySparseMatrix | types.DaskArray
 
 
 __all__ = ["to_dense"]
 
 
 @overload
-def to_dense(
-    x: (
-        NDArray[Any]
-        | types.CSBase
-        | types.OutOfCoreDataset[Any]
-        | types.H5Dataset
-        | types.ZarrArray
-    ),
-    /,
-    *,
-    to_memory: bool = False,
-) -> NDArray[Any]: ...
+def to_dense(x: MemDiskArray, /, *, to_memory: bool = False) -> NDArray[Any]: ...
 
 
 @overload
@@ -103,19 +86,17 @@ def _to_dense_dask(
 ) -> NDArray[Any] | types.DaskArray:
     import dask.array as da
 
-    x = da.map_blocks(to_dense, x)  # type: ignore[arg-type]
+    x = da.map_blocks(to_dense, x)
     return x.compute() if to_memory else x  # type: ignore[return-value]
 
 
-@_to_dense.register(types.OutOfCoreDataset)
-def _to_dense_ooc(
-    x: types.OutOfCoreDataset[types.CSBase | NDArray[Any]], /, *, to_memory: bool = False
-) -> NDArray[Any]:
+@_to_dense.register(types.CSDataset)
+def _to_dense_ooc(x: types.CSDataset, /, *, to_memory: bool = False) -> NDArray[Any]:
     if not to_memory:
-        msg = "to_memory must be True if x is an OutOfCoreDataset"
+        msg = "to_memory must be True if x is an CS{R,C}Dataset"
         raise ValueError(msg)
     # TODO(flying-sheep): why is to_memory of type Any?  # noqa: TD003
-    return to_dense(x.to_memory())
+    return to_dense(cast("types.CSBase", x.to_memory()))
 
 
 @_to_dense.register(types.CupyArray | types.CupySparseMatrix)  # type: ignore[call-overload,misc]

diff --git a/src/fast_array_utils/stats/_mean.py b/src/fast_array_utils/stats/_mean.py
@@ -16,7 +16,7 @@
 
     from .. import types
 
-    # all supported types except Dask and OutOfCoreDataset (TODO)
+    # all supported types except Dask and CSDataset (TODO)
     NonDaskArray = (
         NDArray[Any]
         | types.CSBase

diff --git a/src/fast_array_utils/stats/_power.py b/src/fast_array_utils/stats/_power.py
@@ -12,7 +12,7 @@
 
     from numpy.typing import NDArray
 
-    # All supported array types except for disk ones and OutOfCoreDataset
+    # All supported array types except for disk ones and CSDataset
     Array = NDArray[Any] | types.CSBase | types.CupyArray | types.CupySparseMatrix | types.DaskArray
 
     _Arr = TypeVar("_Arr", bound=Array)

diff --git a/src/fast_array_utils/stats/_sum.py b/src/fast_array_utils/stats/_sum.py
@@ -16,27 +16,24 @@
     from numpy._typing._array_like import _ArrayLikeFloat_co as ArrayLike
     from numpy.typing import DTypeLike, NDArray
 
-    # all supported types except CSBase, Dask and OutOfCoreDataset (TODO)
+    # all supported types except Dask and CSDataset (TODO)
     Array = (
-        NDArray[Any] | types.H5Dataset | types.ZarrArray | types.CupyArray | types.CupySparseMatrix
+        NDArray[Any]
+        | types.CSBase
+        | types.H5Dataset
+        | types.ZarrArray
+        | types.CupyArray
+        | types.CupySparseMatrix
     )
 
 
 @overload
 def sum(
-    x: ArrayLike | Array | types.CSBase,
-    /,
-    *,
-    axis: None = None,
-    dtype: DTypeLike | None = None,
+    x: ArrayLike | Array, /, *, axis: None = None, dtype: DTypeLike | None = None
 ) -> np.number[Any]: ...
 @overload
 def sum(
-    x: ArrayLike | Array | types.CSBase,
-    /,
-    *,
-    axis: Literal[0, 1],
-    dtype: DTypeLike | None = None,
+    x: ArrayLike | Array, /, *, axis: Literal[0, 1], dtype: DTypeLike | None = None
 ) -> NDArray[Any]: ...
 @overload
 def sum(
@@ -45,7 +42,7 @@ def sum(
 
 
 def sum(
-    x: ArrayLike | Array | types.CSBase | types.DaskArray,
+    x: ArrayLike | Array | types.DaskArray,
     /,
     *,
     axis: Literal[0, 1, None] = None,
@@ -69,7 +66,7 @@ def sum(
 
 @singledispatch
 def _sum(
-    x: ArrayLike | Array | types.CSBase | types.DaskArray,
+    x: ArrayLike | Array | types.DaskArray,
     /,
     *,
     axis: Literal[0, 1, None] = None,

diff --git a/src/fast_array_utils/types.py b/src/fast_array_utils/types.py
@@ -4,7 +4,7 @@
 from __future__ import annotations
 
 from importlib.util import find_spec
-from typing import TYPE_CHECKING, Generic, Protocol, TypeVar, runtime_checkable
+from typing import TYPE_CHECKING, TypeVar
 
 
 __all__ = [
@@ -13,7 +13,6 @@
     "CupySparseMatrix",
     "DaskArray",
     "H5Dataset",
-    "OutOfCoreDataset",
     "ZarrArray",
 ]
 
@@ -22,14 +21,10 @@
 
 # scipy sparse
 if TYPE_CHECKING:
-    from typing import Any
-
-    import numpy as np
     from scipy.sparse import csc_array, csc_matrix, csr_array, csr_matrix
 
     CSArray = csr_array | csc_array
     CSMatrix = csr_matrix | csc_matrix
-    CSBase = CSMatrix | CSArray
 else:
     try:  # cs?_array isn’t available in older scipy versions
         from scipy.sparse import csc_array, csr_array
@@ -44,8 +39,7 @@
         CSMatrix = csr_matrix | csc_matrix
     except ImportError:  # pragma: no cover
         CSMatrix = type("CSMatrix", (), {})
-
-    CSBase = CSMatrix | CSArray
+CSBase = CSMatrix | CSArray
 
 
 if TYPE_CHECKING or find_spec("cupy"):
@@ -70,23 +64,23 @@
 
 if TYPE_CHECKING or find_spec("h5py"):
     from h5py import Dataset as H5Dataset
+    from h5py import Group as H5Group
 else:  # pragma: no cover
     H5Dataset = type("Dataset", (), {})
+    H5Group = type("Group", (), {})
 
 
 if TYPE_CHECKING or find_spec("zarr"):
     from zarr import Array as ZarrArray
+    from zarr import Group as ZarrGroup
 else:  # pragma: no cover
     ZarrArray = type("Array", (), {})
+    ZarrGroup = type("Group", (), {})
 
 
-@runtime_checkable
-class OutOfCoreDataset(Protocol, Generic[T_co]):
-    """An out-of-core dataset."""
-
-    shape: tuple[int, int]
-    dtype: np.dtype[Any]
-
-    def to_memory(self) -> T_co:
-        """Load data into memory."""
-        ...
+if TYPE_CHECKING or find_spec("anndata"):
+    from anndata.abc import CSCDataset, CSRDataset
+else:  # pragma: no cover
+    CSRDataset = type("CSRDataset", (), {})
+    CSCDataset = type("CSCDataset", (), {})
+CSDataset = CSRDataset | CSCDataset
diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py
@@ -9,7 +9,12 @@
 
 
 if TYPE_CHECKING:
-    from ._array_type import Array, MemArray, ToArray  # noqa: TC004
+    from ._array_type import (
+        Array,  # noqa: TC004
+        InnerArrayDask,
+        InnerArrayDisk,
+        ToArray,  # noqa: TC004
+    )
 
 
 __all__ = [
@@ -34,12 +39,17 @@
     ),
 )
 _TP_DASK = tuple(
-    ArrayType("dask.array", "Array", Flags.Dask | t.flags, inner=t)
-    for t in cast("tuple[ArrayType[MemArray, None], ...]", _TP_MEM)
+    ArrayType("dask.array", "Array", Flags.Dask | t.flags, inner=t)  # type: ignore[type-var]
+    for t in cast("tuple[ArrayType[InnerArrayDask, None], ...]", _TP_MEM)
 )
-_TP_DISK = tuple(
+_TP_DISK_DENSE = tuple(
     ArrayType(m, n, Flags.Any | Flags.Disk) for m, n in [("h5py", "Dataset"), ("zarr", "Array")]
 )
+_TP_DISK_SPARSE = tuple(
+    ArrayType("anndata.abc", n, Flags.Any | Flags.Disk | Flags.Sparse, inner=t)  # type: ignore[type-var]
+    for t in cast("tuple[ArrayType[InnerArrayDisk, None], ...]", _TP_DISK_DENSE)
+    for n in ["CSRDataset", "CSCDataset"]
+)
 
-SUPPORTED_TYPES: tuple[ArrayType, ...] = (*_TP_MEM, *_TP_DASK, *_TP_DISK)
+SUPPORTED_TYPES: tuple[ArrayType, ...] = (*_TP_MEM, *_TP_DASK, *_TP_DISK_DENSE, *_TP_DISK_SPARSE)
 """All supported array types."""
-Original file line number
+Diff line change
@@ Expand Up / @@ -33,5 +33,6 @@ repos: @@
               - dask
               - zarr
               - h5py
+              - anndata
     ci:
       skip: [mypy]  # too big