Skip to content

anndata.abc.CS{RC}Dataset support #50

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
root = true

[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
max_line_length = 88
indent_size = 4
indent_style = space

[*.toml]
indent_size = 2
max_line_length = 120
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,6 @@ repos:
- dask
- zarr
- h5py
- anndata
ci:
skip: [mypy] # too big
5 changes: 5 additions & 0 deletions .taplo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[formatting]
array_auto_collapse = false
column_width = 120
compact_arrays = false
indent_string = ' '
14 changes: 11 additions & 3 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
{
"python.testing.pytestArgs": ["-vv", "--color=yes"],
"python.testing.pytestEnabled": true,
"[toml][json][jsonc][python]": {
"editor.formatOnSave": true,
},
"[toml]": {
"editor.defaultFormatter": "tamasfe.even-better-toml",
},
"[json][jsonc]": {
"editor.defaultFormatter": "biomejs.biome",
},
"[python]": {
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.fixAll": "explicit",
"source.organizeImports": "explicit",
},
},
"python.testing.pytestArgs": ["-vv", "--color=yes"],
"python.testing.pytestEnabled": true,
}
7 changes: 2 additions & 5 deletions biome.jsonc
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
{
"$schema": "https://biomejs.dev/schemas/1.8.3/schema.json",
"formatter": {
"indentStyle": "space",
"indentWidth": 4,
},
"$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
"formatter": { "useEditorconfig": true },
"overrides": [
{
"include": ["./.vscode/*.json", "**/*.jsonc"],
Expand Down
7 changes: 0 additions & 7 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,3 @@

.. automodule:: fast_array_utils.stats
:members:


``fast_array_utils.types``
--------------------------

.. automodule:: fast_array_utils.types
:members: OutOfCoreDataset
9 changes: 8 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,14 @@ optional-dependencies.doc = [
]
optional-dependencies.full = [ "dask", "fast-array-utils[sparse]", "h5py", "zarr" ]
optional-dependencies.sparse = [ "scipy>=1.8" ]
optional-dependencies.test = [ "coverage[toml]", "pytest", "pytest-codspeed" ]
optional-dependencies.test = [
"anndata",
"coverage[toml]",
"packaging",
"pytest",
"pytest-codspeed",
"zarr<3", # anndata needs this
]
urls.'Documentation' = "https://icb-fast-array-utils.readthedocs-hosted.com/"
urls.'Issue Tracker' = "https://github.com/scverse/fast-array-utils/issues"
urls.'Source Code' = "https://github.com/scverse/fast-array-utils"
Expand Down
41 changes: 11 additions & 30 deletions src/fast_array_utils/conv/_to_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,29 @@
from __future__ import annotations

from functools import singledispatch
from typing import TYPE_CHECKING, overload
from typing import TYPE_CHECKING, cast, overload

import numpy as np

from .. import types


if TYPE_CHECKING:
from typing import Any, Literal
from typing import Any, Literal, TypeAlias

from numpy.typing import NDArray

Array = (
NDArray[Any]
| types.CSBase
| types.CupyArray
| types.CupySparseMatrix
| types.DaskArray
| types.OutOfCoreDataset[Any]
| types.H5Dataset
| types.ZarrArray
MemDiskArray: TypeAlias = (
NDArray[Any] | types.CSBase | types.H5Dataset | types.ZarrArray | types.CSDataset
)
Array: TypeAlias = MemDiskArray | types.CupyArray | types.CupySparseMatrix | types.DaskArray


__all__ = ["to_dense"]


@overload
def to_dense(
x: (
NDArray[Any]
| types.CSBase
| types.OutOfCoreDataset[Any]
| types.H5Dataset
| types.ZarrArray
),
/,
*,
to_memory: bool = False,
) -> NDArray[Any]: ...
def to_dense(x: MemDiskArray, /, *, to_memory: bool = False) -> NDArray[Any]: ...


@overload
Expand Down Expand Up @@ -103,19 +86,17 @@ def _to_dense_dask(
) -> NDArray[Any] | types.DaskArray:
import dask.array as da

x = da.map_blocks(to_dense, x) # type: ignore[arg-type]
x = da.map_blocks(to_dense, x)
return x.compute() if to_memory else x # type: ignore[return-value]


@_to_dense.register(types.OutOfCoreDataset)
def _to_dense_ooc(
x: types.OutOfCoreDataset[types.CSBase | NDArray[Any]], /, *, to_memory: bool = False
) -> NDArray[Any]:
@_to_dense.register(types.CSDataset)
def _to_dense_ooc(x: types.CSDataset, /, *, to_memory: bool = False) -> NDArray[Any]:
if not to_memory:
msg = "to_memory must be True if x is an OutOfCoreDataset"
msg = "to_memory must be True if x is an CS{R,C}Dataset"
raise ValueError(msg)
# TODO(flying-sheep): why is to_memory of type Any? # noqa: TD003
return to_dense(x.to_memory())
return to_dense(cast("types.CSBase", x.to_memory()))


@_to_dense.register(types.CupyArray | types.CupySparseMatrix) # type: ignore[call-overload,misc]
Expand Down
2 changes: 1 addition & 1 deletion src/fast_array_utils/stats/_mean.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from .. import types

# all supported types except Dask and OutOfCoreDataset (TODO)
# all supported types except Dask and CSDataset (TODO)
NonDaskArray = (
NDArray[Any]
| types.CSBase
Expand Down
2 changes: 1 addition & 1 deletion src/fast_array_utils/stats/_power.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from numpy.typing import NDArray

# All supported array types except for disk ones and OutOfCoreDataset
# All supported array types except for disk ones and CSDataset
Array = NDArray[Any] | types.CSBase | types.CupyArray | types.CupySparseMatrix | types.DaskArray

_Arr = TypeVar("_Arr", bound=Array)
Expand Down
25 changes: 11 additions & 14 deletions src/fast_array_utils/stats/_sum.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,24 @@
from numpy._typing._array_like import _ArrayLikeFloat_co as ArrayLike
from numpy.typing import DTypeLike, NDArray

# all supported types except CSBase, Dask and OutOfCoreDataset (TODO)
# all supported types except Dask and CSDataset (TODO)
Array = (
NDArray[Any] | types.H5Dataset | types.ZarrArray | types.CupyArray | types.CupySparseMatrix
NDArray[Any]
| types.CSBase
| types.H5Dataset
| types.ZarrArray
| types.CupyArray
| types.CupySparseMatrix
)


@overload
def sum(
x: ArrayLike | Array | types.CSBase,
/,
*,
axis: None = None,
dtype: DTypeLike | None = None,
x: ArrayLike | Array, /, *, axis: None = None, dtype: DTypeLike | None = None
) -> np.number[Any]: ...
@overload
def sum(
x: ArrayLike | Array | types.CSBase,
/,
*,
axis: Literal[0, 1],
dtype: DTypeLike | None = None,
x: ArrayLike | Array, /, *, axis: Literal[0, 1], dtype: DTypeLike | None = None
) -> NDArray[Any]: ...
@overload
def sum(
Expand All @@ -45,7 +42,7 @@ def sum(


def sum(
x: ArrayLike | Array | types.CSBase | types.DaskArray,
x: ArrayLike | Array | types.DaskArray,
/,
*,
axis: Literal[0, 1, None] = None,
Expand All @@ -69,7 +66,7 @@ def sum(

@singledispatch
def _sum(
x: ArrayLike | Array | types.CSBase | types.DaskArray,
x: ArrayLike | Array | types.DaskArray,
/,
*,
axis: Literal[0, 1, None] = None,
Expand Down
30 changes: 12 additions & 18 deletions src/fast_array_utils/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from __future__ import annotations

from importlib.util import find_spec
from typing import TYPE_CHECKING, Generic, Protocol, TypeVar, runtime_checkable
from typing import TYPE_CHECKING, TypeVar


__all__ = [
Expand All @@ -13,7 +13,6 @@
"CupySparseMatrix",
"DaskArray",
"H5Dataset",
"OutOfCoreDataset",
"ZarrArray",
]

Expand All @@ -22,14 +21,10 @@

# scipy sparse
if TYPE_CHECKING:
from typing import Any

import numpy as np
from scipy.sparse import csc_array, csc_matrix, csr_array, csr_matrix

CSArray = csr_array | csc_array
CSMatrix = csr_matrix | csc_matrix
CSBase = CSMatrix | CSArray
else:
try: # cs?_array isn’t available in older scipy versions
from scipy.sparse import csc_array, csr_array
Expand All @@ -44,8 +39,7 @@
CSMatrix = csr_matrix | csc_matrix
except ImportError: # pragma: no cover
CSMatrix = type("CSMatrix", (), {})

CSBase = CSMatrix | CSArray
CSBase = CSMatrix | CSArray


if TYPE_CHECKING or find_spec("cupy"):
Expand All @@ -70,23 +64,23 @@

if TYPE_CHECKING or find_spec("h5py"):
from h5py import Dataset as H5Dataset
from h5py import Group as H5Group
else: # pragma: no cover
H5Dataset = type("Dataset", (), {})
H5Group = type("Group", (), {})


if TYPE_CHECKING or find_spec("zarr"):
from zarr import Array as ZarrArray
from zarr import Group as ZarrGroup
else: # pragma: no cover
ZarrArray = type("Array", (), {})
ZarrGroup = type("Group", (), {})


@runtime_checkable
class OutOfCoreDataset(Protocol, Generic[T_co]):
"""An out-of-core dataset."""

shape: tuple[int, int]
dtype: np.dtype[Any]

def to_memory(self) -> T_co:
"""Load data into memory."""
...
if TYPE_CHECKING or find_spec("anndata"):
from anndata.abc import CSCDataset, CSRDataset
else: # pragma: no cover
CSRDataset = type("CSRDataset", (), {})
CSCDataset = type("CSCDataset", (), {})
CSDataset = CSRDataset | CSCDataset
20 changes: 15 additions & 5 deletions src/testing/fast_array_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@


if TYPE_CHECKING:
from ._array_type import Array, MemArray, ToArray # noqa: TC004
from ._array_type import (
Array, # noqa: TC004
InnerArrayDask,
InnerArrayDisk,
ToArray, # noqa: TC004
)


__all__ = [
Expand All @@ -34,12 +39,17 @@
),
)
_TP_DASK = tuple(
ArrayType("dask.array", "Array", Flags.Dask | t.flags, inner=t)
for t in cast("tuple[ArrayType[MemArray, None], ...]", _TP_MEM)
ArrayType("dask.array", "Array", Flags.Dask | t.flags, inner=t) # type: ignore[type-var]
for t in cast("tuple[ArrayType[InnerArrayDask, None], ...]", _TP_MEM)
)
_TP_DISK = tuple(
_TP_DISK_DENSE = tuple(
ArrayType(m, n, Flags.Any | Flags.Disk) for m, n in [("h5py", "Dataset"), ("zarr", "Array")]
)
_TP_DISK_SPARSE = tuple(
ArrayType("anndata.abc", n, Flags.Any | Flags.Disk | Flags.Sparse, inner=t) # type: ignore[type-var]
for t in cast("tuple[ArrayType[InnerArrayDisk, None], ...]", _TP_DISK_DENSE)
for n in ["CSRDataset", "CSCDataset"]
)

SUPPORTED_TYPES: tuple[ArrayType, ...] = (*_TP_MEM, *_TP_DASK, *_TP_DISK)
SUPPORTED_TYPES: tuple[ArrayType, ...] = (*_TP_MEM, *_TP_DASK, *_TP_DISK_DENSE, *_TP_DISK_SPARSE)
"""All supported array types."""
Loading