diff --git a/docs/conf.py b/docs/conf.py index abfd723..2b4f3cd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -32,12 +32,20 @@ "sphinx.ext.autodoc", "sphinx.ext.autosummary", "scanpydoc.elegant_typehints", + "sphinx_autofixture", ] # API documentation when building nitpicky = True autosummary_generate = True autodoc_member_order = "bysource" +autodoc_default_options = { + "special-members": True, + # everything except __call__ really, to avoid having to write autosummary templates + "exclude-members": ( + "__setattr__,__delattr__,__repr__,__eq__,__or__,__ror__,__hash__,__weakref__,__init__,__new__" + ), +} napoleon_google_docstring = False napoleon_numpy_docstring = True todo_include_todos = False @@ -55,9 +63,11 @@ "np.dtype": "numpy.dtype", "np.number": "numpy.number", "np.integer": "numpy.integer", + "np.random.Generator": "numpy.random.Generator", "ArrayLike": "numpy.typing.ArrayLike", "DTypeLike": "numpy.typing.DTypeLike", "NDArray": "numpy.typing.NDArray", + "_pytest.fixtures.FixtureRequest": "pytest.FixtureRequest", **{ k: v for k_plain, v in { @@ -74,10 +84,17 @@ # If that doesn’t work, ignore them nitpick_ignore = { ("py:class", "fast_array_utils.types.T_co"), + ("py:class", "Arr"), + ("py:class", "testing.fast_array_utils._array_type.Arr"), + ("py:class", "testing.fast_array_utils._array_type.Inner"), + ("py:class", "_DTypeLikeFloat32"), + ("py:class", "_DTypeLikeFloat64"), # sphinx bugs, should be covered by `autodoc_type_aliases` above + ("py:class", "Array"), ("py:class", "ArrayLike"), ("py:class", "DTypeLike"), ("py:class", "NDArray"), + ("py:class", "_pytest.fixtures.FixtureRequest"), } # Options for HTML output diff --git a/docs/index.rst b/docs/index.rst index 697b5c6..db58c7a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,10 +1,15 @@ ``fast_array_utils`` ==================== +.. toctree:: + :hidden: + + fast-array-utils + testing + .. automodule:: fast_array_utils :members: - ``fast_array_utils.conv`` ------------------------- diff --git a/docs/testing.rst b/docs/testing.rst new file mode 100644 index 0000000..0b17228 --- /dev/null +++ b/docs/testing.rst @@ -0,0 +1,11 @@ +``testing.fast_array_utils`` +============================ + +.. automodule:: testing.fast_array_utils + :members: + +``testing.fast_array_utils.pytest`` +----------------------------------- + +.. automodule:: testing.fast_array_utils.pytest + :members: diff --git a/pyproject.toml b/pyproject.toml index d14ded6..2b3167e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,14 @@ classifiers = [ ] dynamic = [ "description", "version" ] dependencies = [ "numba", "numpy" ] -optional-dependencies.doc = [ "furo", "scanpydoc>=0.15.2", "sphinx>=8", "sphinx-autodoc-typehints" ] +optional-dependencies.doc = [ + "furo", + "pytest", + "scanpydoc>=0.15.2", + "sphinx>=8", + "sphinx-autodoc-typehints", + "sphinx-autofixture", +] optional-dependencies.full = [ "dask", "fast-array-utils[sparse]", "h5py", "zarr" ] optional-dependencies.sparse = [ "scipy>=1.8" ] optional-dependencies.test = [ "coverage[toml]", "pytest", "pytest-codspeed" ] @@ -31,12 +38,18 @@ urls.'Documentation' = "https://icb-fast-array-utils.readthedocs-hosted.com/" urls.'Issue Tracker' = "https://github.com/scverse/fast-array-utils/issues" urls.'Source Code' = "https://github.com/scverse/fast-array-utils" -[tool.hatch.metadata.hooks.docstring-description] +entry_points.pytest11.fast_array_utils = "testing.fast_array_utils.pytest" [tool.hatch.version] source = "vcs" raw-options = { local_scheme = "no-local-version" } # be able to publish dev version +# TODO: support setting main package in the plugin +# [tool.hatch.metadata.hooks.docstring-description] + +[tool.hatch.build.targets.wheel] +packages = [ "src/testing", "src/fast_array_utils" ] + [tool.hatch.envs.default] installer = "uv" @@ -85,6 +98,8 @@ lint.per-file-ignores."tests/**/test_*.py" = [ "S101", # tests use `assert` ] lint.allowed-confusables = [ "×", "’" ] +lint.flake8-bugbear.extend-immutable-calls = [ "testing.fast_array_utils.Flags" ] + lint.flake8-copyright.notice-rgx = "SPDX-License-Identifier: MPL-2\\.0" lint.flake8-type-checking.exempt-modules = [ ] lint.flake8-type-checking.strict = true diff --git a/src/fast_array_utils/conv/_asarray.py b/src/fast_array_utils/conv/_asarray.py index 1378d50..42f3e06 100644 --- a/src/fast_array_utils/conv/_asarray.py +++ b/src/fast_array_utils/conv/_asarray.py @@ -2,17 +2,16 @@ from __future__ import annotations from functools import singledispatch -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, cast import numpy as np +from numpy.typing import NDArray from .. import types if TYPE_CHECKING: - from typing import Any - - from numpy.typing import ArrayLike, NDArray + from numpy.typing import ArrayLike __all__ = ["asarray"] @@ -64,9 +63,9 @@ def _(x: types.OutOfCoreDataset[types.CSBase | NDArray[Any]]) -> NDArray[Any]: @asarray.register(types.CupyArray) def _(x: types.CupyArray) -> NDArray[Any]: - return x.get() # type: ignore[no-any-return] + return cast(NDArray[Any], x.get()) @asarray.register(types.CupySparseMatrix) def _(x: types.CupySparseMatrix) -> NDArray[Any]: - return x.toarray().get() # type: ignore[no-any-return] + return cast(NDArray[Any], x.toarray().get()) diff --git a/src/fast_array_utils/stats/_sum.py b/src/fast_array_utils/stats/_sum.py index 79660be..0d8ef56 100644 --- a/src/fast_array_utils/stats/_sum.py +++ b/src/fast_array_utils/stats/_sum.py @@ -2,26 +2,27 @@ from __future__ import annotations from functools import partial, singledispatch -from typing import TYPE_CHECKING, overload +from typing import TYPE_CHECKING, Any, cast, overload import numpy as np +from numpy.typing import NDArray from .. import types if TYPE_CHECKING: - from typing import Any, Literal + from typing import Literal - from numpy.typing import ArrayLike, DTypeLike, NDArray + from numpy.typing import ArrayLike, DTypeLike @overload def sum( - x: ArrayLike, /, *, axis: None = None, dtype: DTypeLike | None = None + x: ArrayLike | types.ZarrArray, /, *, axis: None = None, dtype: DTypeLike | None = None ) -> np.number[Any]: ... @overload def sum( - x: ArrayLike, /, *, axis: Literal[0, 1], dtype: DTypeLike | None = None + x: ArrayLike | types.ZarrArray, /, *, axis: Literal[0, 1], dtype: DTypeLike | None = None ) -> NDArray[Any]: ... @overload def sum( @@ -30,7 +31,11 @@ def sum( def sum( - x: ArrayLike, /, *, axis: Literal[0, 1, None] = None, dtype: DTypeLike | None = None + x: ArrayLike | types.ZarrArray, + /, + *, + axis: Literal[0, 1, None] = None, + dtype: DTypeLike | None = None, ) -> NDArray[Any] | np.number[Any] | types.DaskArray: """Sum over both or one axis. @@ -56,7 +61,7 @@ def _sum( dtype: DTypeLike | None = None, ) -> NDArray[Any] | np.number[Any] | types.DaskArray: assert not isinstance(x, types.CSBase | types.DaskArray) - return np.sum(x, axis=axis, dtype=dtype) # type: ignore[no-any-return] + return cast(NDArray[Any] | np.number[Any], np.sum(x, axis=axis, dtype=dtype)) @_sum.register(types.CSBase) @@ -67,7 +72,7 @@ def _( if isinstance(x, types.CSMatrix): x = sp.csr_array(x) if x.format == "csr" else sp.csc_array(x) - return np.sum(x, axis=axis, dtype=dtype) # type: ignore[no-any-return] + return cast(NDArray[Any] | np.number[Any], np.sum(x, axis=axis, dtype=dtype)) @_sum.register(types.DaskArray) @@ -108,11 +113,14 @@ def sum_drop_keepdims( # Explicitly use numpy result dtype (e.g. `NDArray[bool].sum().dtype == int64`) dtype = np.zeros(1, dtype=x.dtype).sum().dtype - return reduction( # type: ignore[no-any-return,no-untyped-call] - x, - sum_drop_keepdims, - partial(np.sum, dtype=dtype), - axis=axis, - dtype=dtype, - meta=np.array([], dtype=dtype), + return cast( + types.DaskArray, + reduction( # type: ignore[no-untyped-call] + x, + sum_drop_keepdims, + partial(np.sum, dtype=dtype), + axis=axis, + dtype=dtype, + meta=np.array([], dtype=dtype), + ), ) diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py index 01dcb79..e26852f 100644 --- a/src/testing/fast_array_utils/__init__.py +++ b/src/testing/fast_array_utils/__init__.py @@ -3,135 +3,42 @@ from __future__ import annotations -import re from typing import TYPE_CHECKING -import numpy as np +from ._array_type import ArrayType, ConversionContext, Flags, random_mat if TYPE_CHECKING: - from typing import Any, Literal, Protocol, SupportsFloat, TypeAlias - - from numpy.typing import ArrayLike, DTypeLike, NDArray - - from fast_array_utils import types - from fast_array_utils.types import CSBase - - Array: TypeAlias = ( - NDArray[Any] - | types.CSBase - | types.CupyArray - | types.DaskArray - | types.H5Dataset - | types.ZarrArray - ) - - class ToArray(Protocol): - """Convert to a supported array.""" - - def __call__( # noqa: D102 - self, data: ArrayLike, /, *, dtype: DTypeLike | None = None - ) -> Array: ... - - _DTypeLikeFloat32 = np.dtype[np.float32] | type[np.float32] - _DTypeLikeFloat64 = np.dtype[np.float64] | type[np.float64] - - -RE_ARRAY_QUAL = re.compile(r"(?P(?:\w+\.)*\w+)\.(?P[^\[]+)(?:\[(?P[\w.]+)\])?") - - -def get_array_cls(qualname: str) -> type[Array]: # noqa: PLR0911 - """Get a supported array class by qualname.""" - m = RE_ARRAY_QUAL.fullmatch(qualname) - assert m - match m["mod"], m["name"], m["inner"]: - case "numpy", "ndarray", None: - return np.ndarray - case "scipy.sparse", ( - "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" - ) as cls_name, None: - import scipy.sparse - - return getattr(scipy.sparse, cls_name) # type: ignore[no-any-return] - case "cupy", "ndarray", None: - import cupy as cp - - return cp.ndarray # type: ignore[no-any-return] - case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: - import cupyx.scipy.sparse as cu_sparse - - return getattr(cu_sparse, cls_name) # type: ignore[no-any-return] - case "dask.array", cls_name, _: - if TYPE_CHECKING: - from dask.array.core import Array as DaskArray - else: - from dask.array import Array as DaskArray - - return DaskArray - case "h5py", "Dataset", _: - import h5py - - return h5py.Dataset # type: ignore[no-any-return] - case "zarr", "Array", _: - import zarr - - return zarr.Array - case _: - msg = f"Unknown array class: {qualname}" - raise ValueError(msg) - - -def random_mat( - shape: tuple[int, int], - *, - density: SupportsFloat = 0.01, - format: Literal["csr", "csc"] = "csr", # noqa: A002 - dtype: DTypeLike | None = None, - container: Literal["array", "matrix"] = "array", - gen: np.random.Generator | None = None, -) -> CSBase: - """Create a random matrix.""" - from scipy.sparse import random as random_spmat - from scipy.sparse import random_array as random_sparr - - m, n = shape - return ( - random_spmat(m, n, density=density, format=format, dtype=dtype, random_state=gen) - if container == "matrix" - else random_sparr(shape, density=density, format=format, dtype=dtype, random_state=gen) - ) - - -def random_array( - qualname: str, - shape: tuple[int, int], - *, - dtype: _DTypeLikeFloat32 | _DTypeLikeFloat64 | None, - gen: np.random.Generator | None = None, -) -> Array: - """Create a random array.""" - gen = np.random.default_rng(gen) - - m = RE_ARRAY_QUAL.fullmatch(qualname) - assert m - match m["mod"], m["name"], m["inner"]: - case "numpy", "ndarray", None: - return gen.random(shape, dtype=dtype or np.float64) - case "scipy.sparse", ( - "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" - ) as cls_name, None: - fmt, container = cls_name.split("_") - return random_mat(shape, format=fmt, container=container, dtype=dtype) # type: ignore[arg-type] - case "cupy", "ndarray", None: - raise NotImplementedError - case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: - raise NotImplementedError - case "dask.array", cls_name, _: - raise NotImplementedError - case "h5py", "Dataset", _: - raise NotImplementedError - case "zarr", "Array", _: - raise NotImplementedError - case _: - msg = f"Unknown array class: {qualname}" - raise ValueError(msg) + from ._array_type import Array, ToArray # noqa: TC004 + + +__all__ = [ + "SUPPORTED_TYPES", + "Array", + "ArrayType", + "ConversionContext", + "Flags", + "ToArray", + "random_mat", +] + + +_TP_MEM = ( + ArrayType("numpy", "ndarray", Flags.Any), + ArrayType("cupy", "ndarray", Flags.Any | Flags.Gpu), + *( + ArrayType("scipy.sparse", n, Flags.Any | Flags.Sparse) + for n in ["csr_array", "csc_array", "csr_matrix", "csc_matrix"] + ), + *( + ArrayType("cupyx.scipy.sparse", n, Flags.Any | Flags.Gpu | Flags.Sparse) + for n in ["csr_matrix", "csc_matrix"] + ), +) +_TP_DASK = tuple(ArrayType("dask.array", "Array", Flags.Dask | t.flags, inner=t) for t in _TP_MEM) +_TP_DISK = tuple( + ArrayType(m, n, Flags.Any | Flags.Disk) for m, n in [("h5py", "Dataset"), ("zarr", "Array")] +) + +SUPPORTED_TYPES: tuple[ArrayType, ...] = (*_TP_MEM, *_TP_DASK, *_TP_DISK) +"""All supported array types.""" diff --git a/src/testing/fast_array_utils/_array_type.py b/src/testing/fast_array_utils/_array_type.py new file mode 100644 index 0000000..e13e01f --- /dev/null +++ b/src/testing/fast_array_utils/_array_type.py @@ -0,0 +1,288 @@ +# SPDX-License-Identifier: MPL-2.0 +"""ArrayType class and helpers.""" + +from __future__ import annotations + +import enum +from dataclasses import KW_ONLY, dataclass, field +from functools import cached_property +from typing import TYPE_CHECKING, Generic, Literal, TypeVar, cast + +import numpy as np + + +if TYPE_CHECKING: + from typing import Any, Protocol, SupportsFloat, TypeAlias + + import h5py + from numpy.typing import ArrayLike, DTypeLike, NDArray + + from fast_array_utils import types + from fast_array_utils.types import CSBase + + Array: TypeAlias = ( + NDArray[Any] + | types.CSBase + | types.CupyArray + | types.DaskArray + | types.H5Dataset + | types.ZarrArray + ) + + Arr = TypeVar("Arr", bound=Array, default=Array) + Arr_co = TypeVar("Arr_co", bound=Array, covariant=True) + + Inner = TypeVar("Inner", bound="ArrayType[Any, None] | None", default=Any) + + class ToArray(Protocol, Generic[Arr_co]): + """Convert to a supported array.""" + + def __call__(self, data: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Arr_co: ... + + _DTypeLikeFloat32 = np.dtype[np.float32] | type[np.float32] + _DTypeLikeFloat64 = np.dtype[np.float64] | type[np.float64] +else: + Arr = TypeVar("Arr") + Inner = TypeVar("Inner") + ToArray = list # needs to have 1 type parameter + + +__all__ = ["ArrayType", "ConversionContext", "ToArray"] + + +class Flags(enum.Flag): + """Array classification flags.""" + + None_ = 0 + """No array type.""" + Any = enum.auto() + """Any array type.""" + + Sparse = enum.auto() + """Sparse array.""" + Gpu = enum.auto() + """GPU array.""" + Dask = enum.auto() + """Dask array.""" + Disk = enum.auto() + """On-disk array.""" + + +@dataclass +class ConversionContext: + """Conversion context required for h5py.""" + + hdf5_file: h5py.File + + +@dataclass(frozen=True) +class ArrayType(Generic[Arr, Inner]): + """Supported array type with methods for conversion and random generation. + + Examples + -------- + >>> at = ArrayType("numpy", "ndarray") + >>> arr = at([1, 2, 3]) + >>> arr + array([1, 2, 3]) + >>> assert isinstance(arr, at.cls) + + """ + + mod: str + """Module name.""" + name: str + """Array class name.""" + flags: Flags = Flags.Any + """Classification flags.""" + + _: KW_ONLY + + inner: Inner = None # type: ignore[assignment] + """Inner array type (e.g. for dask).""" + conversion_context: ConversionContext | None = field(default=None, compare=False) + """Conversion context required for converting to h5py.""" + + def __repr__(self) -> str: + rv = f"{self.mod}.{self.name}" + return f"{rv}[{self.inner}]" if self.inner else rv + + @cached_property + def cls(self) -> type[Arr]: # noqa: PLR0911 + """Array class for :func:`isinstance` checks.""" + match self.mod, self.name, self.inner: + case "numpy", "ndarray", None: + return cast(type[Arr], np.ndarray) + case "scipy.sparse", ( + "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" + ) as cls_name, None: + import scipy.sparse + + return cast(type[Arr], getattr(scipy.sparse, cls_name)) + case "cupy", "ndarray", None: + import cupy as cp + + return cast(type[Arr], cp.ndarray) + case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: + import cupyx.scipy.sparse as cu_sparse + + return cast(type[Arr], getattr(cu_sparse, cls_name)) + case "dask.array", "Array", _: + if TYPE_CHECKING: + from dask.array.core import Array as DaskArray + else: + from dask.array import Array as DaskArray + + return cast(type[Arr], DaskArray) + case "h5py", "Dataset", _: + import h5py + + return cast(type[Arr], h5py.Dataset) + case "zarr", "Array", _: + import zarr + + return cast(type[Arr], zarr.Array) + case _: + msg = f"Unknown array class: {self}" + raise ValueError(msg) + + def random( + self, + shape: tuple[int, int], + *, + dtype: _DTypeLikeFloat32 | _DTypeLikeFloat64 | None, + gen: np.random.Generator | None = None, + # sparse only + density: SupportsFloat = 0.01, + ) -> Arr: + """Create a random array.""" + gen = np.random.default_rng(gen) + + match self.mod, self.name, self.inner: + case "numpy", "ndarray", None: + return cast(Arr, gen.random(shape, dtype=dtype or np.float64)) + case "scipy.sparse", ( + "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" + ) as cls_name, None: + fmt, container = cast( + tuple[Literal["csr", "csc"], Literal["array", "matrix"]], cls_name.split("_") + ) + return cast( + Arr, + random_mat( + shape, density=density, format=fmt, container=container, dtype=dtype + ), + ) + case "cupy", "ndarray", None: + raise NotImplementedError + case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: + raise NotImplementedError + case "dask.array", "Array", _: + if TYPE_CHECKING: + from dask.array.wrap import zeros + else: + from dask.array import zeros + + arr = zeros(shape, dtype=dtype, chunks=_half_chunk_size(shape)) + return cast( + Arr, + arr.map_blocks( + lambda x: self.random(x.shape, dtype=x.dtype, gen=gen, density=density), + dtype=dtype, + ), + ) + case "h5py", "Dataset", _: + raise NotImplementedError + case "zarr", "Array", _: + raise NotImplementedError + case _: + msg = f"Unknown array class: {self}" + raise ValueError(msg) + + def __call__(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Arr: + """Convert to this array type.""" + from fast_array_utils import types + + fn: ToArray[Arr] + if self.cls is np.ndarray: + fn = cast(ToArray[Arr], np.asarray) + elif self.cls is types.DaskArray: + if self.inner is None: + msg = "Cannot convert to dask array without inner array type" + raise AssertionError(msg) + fn = cast(ToArray[Arr], self._to_dask_array) + elif self.cls is types.H5Dataset: + fn = cast(ToArray[Arr], self._to_h5py_dataset) + elif self.cls is types.ZarrArray: + fn = cast(ToArray[Arr], self._to_zarr_array) + elif self.cls is types.CupyArray: + import cupy as cu + + fn = cast(ToArray[Arr], cu.asarray) + else: + fn = cast(ToArray[Arr], self.cls) + + return fn(x, dtype=dtype) + + def _to_dask_array(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.DaskArray: + """Convert to a dask array.""" + from fast_array_utils.types import DaskArray + + if TYPE_CHECKING: + import dask.array.core as da + else: + import dask.array as da + + assert self.inner is not None + + arr = self.inner(x, dtype=dtype) + return cast(DaskArray, da.from_array(arr, _half_chunk_size(arr.shape))) # type: ignore[no-untyped-call] + + def _to_h5py_dataset( + self, x: ArrayLike, /, *, dtype: DTypeLike | None = None + ) -> types.H5Dataset: + """Convert to a h5py dataset.""" + if (ctx := self.conversion_context) is None: + msg = "`conversion_context` must be set for h5py" + raise RuntimeError(msg) + arr = np.asarray(x, dtype=dtype) + return ctx.hdf5_file.create_dataset("data", arr.shape, arr.dtype, data=arr) + + @staticmethod + def _to_zarr_array(x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.ZarrArray: + """Convert to a zarr array.""" + import zarr + + arr = np.asarray(x, dtype=dtype) + za = zarr.create_array({}, shape=arr.shape, dtype=arr.dtype) + za[...] = arr + return za + + +def random_mat( + shape: tuple[int, int], + *, + density: SupportsFloat = 0.01, + format: Literal["csr", "csc"] = "csr", # noqa: A002 + dtype: DTypeLike | None = None, + container: Literal["array", "matrix"] = "array", + gen: np.random.Generator | None = None, +) -> CSBase: + """Create a random matrix.""" + from scipy.sparse import random as random_spmat + from scipy.sparse import random_array as random_sparr + + m, n = shape + return ( + random_spmat(m, n, density=density, format=format, dtype=dtype, random_state=gen) + if container == "matrix" + else random_sparr(shape, density=density, format=format, dtype=dtype, random_state=gen) + ) + + +def _half_chunk_size(a: tuple[int, ...]) -> tuple[int, ...]: + def half_rounded_up(x: int) -> int: + div, mod = divmod(x, 2) + return div + (mod > 0) + + return tuple(half_rounded_up(x) for x in a) diff --git a/src/testing/fast_array_utils/pytest.py b/src/testing/fast_array_utils/pytest.py index 85c8e18..ddccb80 100644 --- a/src/testing/fast_array_utils/pytest.py +++ b/src/testing/fast_array_utils/pytest.py @@ -1,150 +1,109 @@ # SPDX-License-Identifier: MPL-2.0 -"""Testing utilities.""" +"""Pytest fixtures to get supported array types. + +Can be used as pytest plugin: ``pytest -p testing.fast_array_utils.pytest``. +""" from __future__ import annotations -import os +import dataclasses from importlib.util import find_spec from typing import TYPE_CHECKING, cast -import numpy as np import pytest -from fast_array_utils import types - -from . import get_array_cls +from . import SUPPORTED_TYPES, ArrayType, ConversionContext, Flags if TYPE_CHECKING: from collections.abc import Generator - from numpy.typing import ArrayLike, DTypeLike + from _pytest.nodes import Node +else: + Node = object - from testing.fast_array_utils import ToArray - from . import Array +__all__ = ["array_type", "conversion_context"] -def _skip_if_no(dist: str) -> pytest.MarkDecorator: - return pytest.mark.skipif(not find_spec(dist), reason=f"{dist} not installed") +def pytest_configure(config: pytest.Config) -> None: + config.addinivalue_line( + "markers", "array_type: filter tests using `testing.fast_array_utils.Flags`" + ) -@pytest.fixture( - scope="session", - params=[ - pytest.param("numpy.ndarray"), - pytest.param("scipy.sparse.csr_array", marks=_skip_if_no("scipy")), - pytest.param("scipy.sparse.csc_array", marks=_skip_if_no("scipy")), - pytest.param("scipy.sparse.csr_matrix", marks=_skip_if_no("scipy")), - pytest.param("scipy.sparse.csc_matrix", marks=_skip_if_no("scipy")), - pytest.param("dask.array.Array[numpy.ndarray]", marks=_skip_if_no("dask")), - pytest.param("dask.array.Array[scipy.sparse.csr_array]", marks=_skip_if_no("dask")), - pytest.param("dask.array.Array[scipy.sparse.csc_array]", marks=_skip_if_no("dask")), - pytest.param("dask.array.Array[scipy.sparse.csr_matrix]", marks=_skip_if_no("dask")), - pytest.param("dask.array.Array[scipy.sparse.csc_matrix]", marks=_skip_if_no("dask")), - pytest.param("h5py.Dataset", marks=_skip_if_no("h5py")), - pytest.param("zarr.Array", marks=_skip_if_no("zarr")), - pytest.param("cupy.ndarray", marks=_skip_if_no("cupy")), - pytest.param("cupyx.scipy.sparse.csr_matrix", marks=_skip_if_no("cupy")), - pytest.param("cupyx.scipy.sparse.csc_matrix", marks=_skip_if_no("cupy")), - ], -) -def array_cls_name(request: pytest.FixtureRequest) -> str: - """Fixture for a supported array class.""" - return cast(str, request.param) +def _skip_if_unimportable(array_type: ArrayType) -> pytest.MarkDecorator: + dist = None + skip = False + for t in (array_type, array_type.inner): + if t and not find_spec(dist := t.mod.split(".", 1)[0]): + skip = True + return pytest.mark.skipif(skip, reason=f"{dist} not installed") -@pytest.fixture(scope="session") -def array_cls(array_cls_name: str) -> type[Array]: - """Fixture for a supported array class.""" - return get_array_cls(array_cls_name) +def _resolve_sel( + select: Flags = ~Flags(0), skip: Flags = Flags(0), *, reason: str | None = None +) -> tuple[Flags, Flags, str | None]: + return select, skip, reason -@pytest.fixture(scope="session") -def to_array( - request: pytest.FixtureRequest, array_cls: type[Array], array_cls_name: str -) -> ToArray: - """Fixture for conversion into a supported array.""" - return get_to_array(array_cls, array_cls_name, request) - +@pytest.fixture( + params=[pytest.param(t, id=str(t), marks=_skip_if_unimportable(t)) for t in SUPPORTED_TYPES], +) +def array_type(request: pytest.FixtureRequest) -> ArrayType: + """Fixture for a supported :class:`~testing.fast_array_utils.ArrayType`. -def get_to_array( - array_cls: type[Array], - array_cls_name: str | None = None, - request: pytest.FixtureRequest | None = None, -) -> ToArray: - """Create a function to convert to a supported array.""" - if array_cls is np.ndarray: - return np.asarray # type: ignore[return-value] - if array_cls is types.DaskArray: - assert array_cls_name is not None - return to_dask_array(array_cls_name) - if array_cls is types.H5Dataset: - assert request is not None - return request.getfixturevalue("to_h5py_dataset") # type: ignore[no-any-return] - if array_cls is types.ZarrArray: - return to_zarr_array - if array_cls is types.CupyArray: - import cupy as cu + Use :class:`testing.fast_array_utils.Flags` to select or skip array types - return cu.asarray # type: ignore[no-any-return] + #. using ``select=``/``args[0]``: - return array_cls # type: ignore[return-value] + .. code:: python + @pytest.mark.array_type(Flags.Sparse, reason="`something` only supports sparse arrays") + def test_something(array_type: ArrayType) -> None: + ... -def _half_chunk_size(a: tuple[int, ...]) -> tuple[int, ...]: - def half_rounded_up(x: int) -> int: - div, mod = divmod(x, 2) - return div + (mod > 0) + #. and/or using ``skip=``/``args[1]``: - return tuple(half_rounded_up(x) for x in a) + .. code:: python + @pytest.mark.array_type(skip=Flags.Dask | Flags.Disk | Flags.Gpu) + def test_something(array_type: ArrayType) -> None: + ... + """ + from fast_array_utils.types import H5Dataset -def to_dask_array(array_cls_name: str) -> ToArray: - """Convert to a dask array.""" - if TYPE_CHECKING: - import dask.array.core as da - else: - import dask.array as da + at = cast(ArrayType, request.param) - inner_cls_name = array_cls_name.removeprefix("dask.array.Array[").removesuffix("]") - inner_cls = get_array_cls(inner_cls_name) - to_array_fn: ToArray = get_to_array(array_cls=inner_cls) + mark = cast(Node, request.node).get_closest_marker("array_type") + if mark: + select, skip, reason = _resolve_sel(*mark.args, **mark.kwargs) + if not (at.flags & select) or (at.flags & skip): + pytest.skip(reason or f"{at} not included in {select=}, {skip=}") - def to_dask_array(x: ArrayLike, *, dtype: DTypeLike | None = None) -> types.DaskArray: - x = np.asarray(x, dtype=dtype) - return da.from_array(to_array_fn(x), _half_chunk_size(x.shape)) # type: ignore[no-untyped-call,no-any-return] + if at.cls is H5Dataset: + ctx = request.getfixturevalue("conversion_context") + at = dataclasses.replace(at, conversion_context=ctx) - return to_dask_array + return at -@pytest.fixture(scope="session") +@pytest.fixture # worker_id for xdist since we don't want to override open files -def to_h5py_dataset( +def conversion_context( + request: pytest.FixtureRequest, tmp_path_factory: pytest.TempPathFactory, worker_id: str = "serial", -) -> Generator[ToArray, None, None]: - """Convert to a h5py dataset.""" +) -> Generator[ConversionContext, None, None]: + """Fixture providing a :class:`~testing.fast_array_utils.ConversionContext`. + + Makes sure h5py works even when running tests in parallel. + """ import h5py + node = cast(Node, request.node) tmp_path = tmp_path_factory.mktemp("backed_adata") - tmp_path = tmp_path / f"test_{worker_id}.h5ad" + tmp_path = tmp_path / f"test_{node.name}_{worker_id}.h5ad" with h5py.File(tmp_path, "x") as f: - - def to_h5py_dataset(x: ArrayLike, *, dtype: DTypeLike | None = None) -> types.H5Dataset: - arr = np.asarray(x, dtype=dtype) - test_name = os.environ["PYTEST_CURRENT_TEST"].rsplit(":", 1)[-1].split(" ", 1)[0] - return f.create_dataset(test_name, arr.shape, arr.dtype, data=arr) - - yield to_h5py_dataset - - -def to_zarr_array(x: ArrayLike, *, dtype: DTypeLike | None = None) -> types.ZarrArray: - """Convert to a zarr array.""" - import zarr - - arr = np.asarray(x, dtype=dtype) - za = zarr.create_array({}, shape=arr.shape, dtype=arr.dtype) - za[...] = arr - return za + yield ConversionContext(hdf5_file=f) diff --git a/tests/test_asarray.py b/tests/test_asarray.py index c7caa57..2b1395b 100644 --- a/tests/test_asarray.py +++ b/tests/test_asarray.py @@ -9,11 +9,11 @@ if TYPE_CHECKING: - from testing.fast_array_utils import ToArray + from testing.fast_array_utils import ArrayType -def test_asarray(to_array: ToArray) -> None: - x = to_array([[1, 2, 3], [4, 5, 6]]) +def test_asarray(array_type: ArrayType) -> None: + x = array_type([[1, 2, 3], [4, 5, 6]]) arr = asarray(x) assert isinstance(arr, np.ndarray) assert arr.shape == (2, 3) diff --git a/tests/test_sparse.py b/tests/test_sparse.py index 91c8dab..0bf10c6 100644 --- a/tests/test_sparse.py +++ b/tests/test_sparse.py @@ -2,48 +2,49 @@ from __future__ import annotations from importlib.util import find_spec -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal, cast import numpy as np import pytest from fast_array_utils.conv.scipy import to_dense -from testing.fast_array_utils import random_mat +from testing.fast_array_utils import Flags if TYPE_CHECKING: - from typing import Literal - - from numpy.typing import DTypeLike from pytest_codspeed import BenchmarkFixture + from fast_array_utils.types import CSBase + from testing.fast_array_utils import ArrayType + from testing.fast_array_utils._array_type import _DTypeLikeFloat32, _DTypeLikeFloat64 + pytestmark = [pytest.mark.skipif(not find_spec("scipy"), reason="scipy not installed")] @pytest.fixture(scope="session", params=["csr", "csc"]) def sp_fmt(request: pytest.FixtureRequest) -> Literal["csr", "csc"]: - return request.param # type: ignore[no-any-return] + return cast(Literal["csr", "csc"], request.param) @pytest.fixture(scope="session", params=["array", "matrix"]) def sp_container(request: pytest.FixtureRequest) -> Literal["array", "matrix"]: - return request.param # type: ignore[no-any-return] + return cast(Literal["array", "matrix"], request.param) @pytest.fixture(scope="session", params=[np.float32, np.float64]) def dtype(request: pytest.FixtureRequest) -> type[np.float32 | np.float64]: - return request.param # type: ignore[no-any-return] + return cast(type[np.float32 | np.float64], request.param) +@pytest.mark.array_type(select=Flags.Sparse, skip=Flags.Dask) @pytest.mark.parametrize("order", ["C", "F"]) def test_to_dense( + array_type: ArrayType[CSBase, None], order: Literal["C", "F"], - sp_fmt: Literal["csr", "csc"], - dtype: DTypeLike, - sp_container: Literal["array", "matrix"], + dtype: _DTypeLikeFloat32 | _DTypeLikeFloat64, ) -> None: - mat = random_mat((10, 10), density=0.1, format=sp_fmt, dtype=dtype, container=sp_container) + mat = array_type.random((10, 10), density=0.1, dtype=dtype) arr = to_dense(mat, order=order) assert arr.flags[order] assert arr.dtype == mat.dtype @@ -51,13 +52,14 @@ def test_to_dense( @pytest.mark.benchmark +@pytest.mark.array_type(select=Flags.Sparse, skip=Flags.Dask) @pytest.mark.parametrize("order", ["C", "F"]) def test_to_dense_benchmark( benchmark: BenchmarkFixture, + array_type: ArrayType[CSBase, None], order: Literal["C", "F"], - sp_fmt: Literal["csr", "csc"], - dtype: DTypeLike, + dtype: _DTypeLikeFloat32 | _DTypeLikeFloat64, ) -> None: - mat = random_mat((1_000, 1_000), format=sp_fmt, dtype=dtype, container="array") + mat = array_type.random((1_000, 1_000), dtype=dtype) to_dense(mat, order=order) # warmup: numba compile benchmark(to_dense, mat, order=order) diff --git a/tests/test_stats.py b/tests/test_stats.py index e86aacb..500b75d 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -1,52 +1,58 @@ # SPDX-License-Identifier: MPL-2.0 from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal, cast import numpy as np import pytest from fast_array_utils import stats, types -from testing.fast_array_utils import random_array +from testing.fast_array_utils import Flags if TYPE_CHECKING: - from typing import Any, Literal + from typing import Any + from numpy.typing import NDArray from pytest_codspeed import BenchmarkFixture - from testing.fast_array_utils import Array, ToArray + from testing.fast_array_utils import ArrayType DTypeIn = type[np.float32 | np.float64 | np.int32 | np.bool_] DTypeOut = type[np.float32 | np.float64 | np.int64] +else: + DTypeIn = type + DTypeOut = type @pytest.fixture(scope="session", params=[0, 1, None]) def axis(request: pytest.FixtureRequest) -> Literal[0, 1, None]: - return request.param # type: ignore[no-any-return] + return cast(Literal[0, 1, None], request.param) @pytest.fixture(scope="session", params=[np.float32, np.float64, np.int32, np.bool_]) def dtype_in(request: pytest.FixtureRequest) -> DTypeIn: - return request.param # type: ignore[no-any-return] + return cast(DTypeIn, request.param) @pytest.fixture(scope="session", params=[np.float32, np.float64, None]) def dtype_arg(request: pytest.FixtureRequest) -> DTypeOut | None: - return request.param # type: ignore[no-any-return] + return cast(DTypeOut | None, request.param) def test_sum( - to_array: ToArray, + array_type: ArrayType, dtype_in: DTypeIn, dtype_arg: DTypeOut | None, axis: Literal[0, 1, None], ) -> None: np_arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype_in) - arr = to_array(np_arr.copy()) + arr = array_type(np_arr.copy()) assert arr.dtype == dtype_in - sum_: Array[Any] | np.floating = stats.sum(arr, axis=axis, dtype=dtype_arg) # type: ignore[type-arg,arg-type] + sum_: NDArray[Any] | np.number[Any] | types.DaskArray = stats.sum( + arr, axis=axis, dtype=dtype_arg + ) match axis, arr: case _, types.DaskArray(): @@ -68,22 +74,20 @@ def test_sum( else: assert sum_.dtype == dtype_in - np.testing.assert_array_equal(sum_, np.sum(np_arr, axis=axis, dtype=dtype_arg)) # type: ignore[arg-type] + np.testing.assert_array_equal(sum_, np.sum(np_arr, axis=axis, dtype=dtype_arg)) @pytest.mark.benchmark +@pytest.mark.array_type(skip=Flags.Dask | Flags.Disk | Flags.Gpu) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) # random only supports float def test_sum_benchmark( benchmark: BenchmarkFixture, - array_cls_name: str, + array_type: ArrayType[NDArray[Any] | types.CSBase], axis: Literal[0, 1, None], dtype: type[np.float32 | np.float64], ) -> None: - try: - shape = (1_000, 1_000) if "sparse" in array_cls_name else (100, 100) - arr = random_array(array_cls_name, shape, dtype=dtype) - except NotImplementedError: - pytest.skip("random_array not implemented for dtype") + shape = (1_000, 1_000) if "sparse" in array_type.mod else (100, 100) + arr = array_type.random(shape, dtype=dtype) - stats.sum(arr, axis=axis) # type: ignore[arg-type] # warmup: numba compile + stats.sum(arr, axis=axis) # warmup: numba compile benchmark(stats.sum, arr, axis=axis) diff --git a/tests/test_test_utils.py b/tests/test_test_utils.py index 36d41c3..5ebfd5f 100644 --- a/tests/test_test_utils.py +++ b/tests/test_test_utils.py @@ -7,21 +7,32 @@ import pytest from fast_array_utils import types +from testing.fast_array_utils import Flags if TYPE_CHECKING: from numpy.typing import DTypeLike - from testing.fast_array_utils import Array, ToArray + from testing.fast_array_utils import ArrayType @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_conv(array_cls: type[Array], to_array: ToArray, dtype: DTypeLike) -> None: - arr = to_array(np.arange(12).reshape(3, 4), dtype=dtype) - assert isinstance(arr, array_cls) +def test_conv(array_type: ArrayType, dtype: DTypeLike) -> None: + arr = array_type(np.arange(12).reshape(3, 4), dtype=dtype) + assert isinstance(arr, array_type.cls) if isinstance(arr, types.DaskArray): arr = arr.compute() # type: ignore[no-untyped-call] elif isinstance(arr, types.CupyArray): arr = arr.get() assert arr.shape == (3, 4) assert arr.dtype == dtype + + +def test_array_types(array_type: ArrayType) -> None: + assert array_type.flags & Flags.Any + assert array_type.flags & ~Flags(0) + assert not (array_type.flags & Flags(0)) + assert ("sparse" in str(array_type)) == bool(array_type.flags & Flags.Sparse) + assert ("cupy" in str(array_type)) == bool(array_type.flags & Flags.Gpu) + assert ("dask" in str(array_type)) == bool(array_type.flags & Flags.Dask) + assert (array_type.mod in {"zarr", "h5py"}) == bool(array_type.flags & Flags.Disk)