diff --git a/.github/workflows/regression-tests.yml b/.github/workflows/regression-tests.yml index 72dda521..68c8f436 100644 --- a/.github/workflows/regression-tests.yml +++ b/.github/workflows/regression-tests.yml @@ -15,8 +15,8 @@ jobs: runs-on: ubuntu-latest strategy: fail-fast: false - matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + matrix: # Keep these in ascending order for automagic with coverage + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 @@ -32,13 +32,18 @@ jobs: python -c "import pyttb" - name: Install dev dependencies run: | - python -m pip install --upgrade coverage coveralls sphinx_rtd_theme + python -m pip install --upgrade coverage sphinx_rtd_theme pip install ".[dev]" - name: Run tests run: | coverage run --source pyttb -m pytest tests/ coverage report + - name: Add coveralls dependencies + if: strategy.job-index == 0 + run: | + python -m pip install --upgrade coveralls - name: Upload coverage to Coveralls + if: strategy.job-index == 0 uses: coverallsapp/github-action@v2 #env: # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/docs/source/create_problem.rst b/docs/source/create_problem.rst new file mode 100644 index 00000000..211bbb5d --- /dev/null +++ b/docs/source/create_problem.rst @@ -0,0 +1,34 @@ +Create Test Problems (:obj:`pyttb.create_problem`) +--------------------------------------------------- + +.. autofunction:: pyttb.create_problem.create_problem + +.. autoclass:: pyttb.create_problem.BaseProblem + :exclude-members: __dict__, __weakref__, __slots__ + :show-inheritance: + +.. autoclass:: pyttb.create_problem.CPProblem + :exclude-members: __dict__, __weakref__, __slots__ + :show-inheritance: + +.. autoclass:: pyttb.create_problem.TuckerProblem + :exclude-members: __dict__, __weakref__, __slots__ + :show-inheritance: + +.. autoclass:: pyttb.create_problem.ExistingSolution + :exclude-members: __dict__, __weakref__, __slots__ + :show-inheritance: + +.. autoclass:: pyttb.create_problem.ExistingCPSolution + :exclude-members: __dict__, __weakref__, __slots__ + :show-inheritance: + +.. autoclass:: pyttb.create_problem.ExistingTuckerSolution + :exclude-members: __dict__, __weakref__, __slots__ + :show-inheritance: + +.. autoclass:: pyttb.create_problem.MissingData + :members: + :special-members: + :exclude-members: __dict__, __weakref__, __slots__ + :show-inheritance: diff --git a/docs/source/index.rst b/docs/source/index.rst index 930010c1..3183c0a8 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -47,8 +47,13 @@ algorithms for computing low-rank tensor models. decompositions such as Poisson Tensor Factorization via alternating Poisson regression. +- `IO`_ + + Storing and retrieving tensors from disk. + .. _Tensor Classes: tensor_classes.html .. _Algorithms: algorithms.html +.. _IO: io.html Getting Started diff --git a/docs/source/io.rst b/docs/source/io.rst new file mode 100644 index 00000000..10ccc1fc --- /dev/null +++ b/docs/source/io.rst @@ -0,0 +1,6 @@ +Input/Output +------------ +Storing or reading tensors from disk. + +.. autofunction:: pyttb.import_data.import_data +.. autofunction:: pyttb.export_data.export_data \ No newline at end of file diff --git a/docs/source/matlab/additional_support.rst b/docs/source/matlab/additional_support.rst new file mode 100644 index 00000000..f1df8eab --- /dev/null +++ b/docs/source/matlab/additional_support.rst @@ -0,0 +1,4 @@ +Additional Utilities For MATLAB User Transition +----------------------------------------------- + +.. autofunction:: pyttb.matlab.matlab_support.matlab_print \ No newline at end of file diff --git a/docs/source/matlab/symktensor.rst b/docs/source/matlab/symktensor.rst deleted file mode 100644 index 19e215a8..00000000 --- a/docs/source/matlab/symktensor.rst +++ /dev/null @@ -1,8 +0,0 @@ -``symktensor`` --------------------- - -Data members -^^^^^^^^^^^^ - -Methods -^^^^^^^ \ No newline at end of file diff --git a/docs/source/matlab/symtensor.rst b/docs/source/matlab/symtensor.rst deleted file mode 100644 index 8d673c32..00000000 --- a/docs/source/matlab/symtensor.rst +++ /dev/null @@ -1,8 +0,0 @@ -``symtensor`` -------------------- - -Data members -^^^^^^^^^^^^ - -Methods -^^^^^^^ \ No newline at end of file diff --git a/docs/source/pyttb_utils.rst b/docs/source/pyttb_utils.rst index 7cc02f4c..0627df89 100644 --- a/docs/source/pyttb_utils.rst +++ b/docs/source/pyttb_utils.rst @@ -1,5 +1,7 @@ -Helper Functions (:mod:`pyttb_utils`) -------------------------------------- +Helper Functions (:mod:`pyttb_utils`, :mod:`khatrirao`) +-------------------------------------------------------- + +.. autofunction:: pyttb.khatrirao.khatrirao .. automodule:: pyttb.pyttb_utils :members: diff --git a/docs/source/reference.rst b/docs/source/reference.rst index 46cab023..8d7529fc 100644 --- a/docs/source/reference.rst +++ b/docs/source/reference.rst @@ -6,3 +6,4 @@ Reference (:mod:`pyttb`) tensor_classes.rst algorithms.rst + io.rst diff --git a/docs/source/tensor_classes.rst b/docs/source/tensor_classes.rst index 56fb9f21..4a220ed3 100644 --- a/docs/source/tensor_classes.rst +++ b/docs/source/tensor_classes.rst @@ -12,4 +12,5 @@ Tensor Classes tenmat.rst sptenmat.rst pyttb_utils.rst + create_problem.rst diff --git a/docs/source/tutorial/utility_test_problem.ipynb b/docs/source/tutorial/utility_test_problem.ipynb new file mode 100644 index 00000000..583f3b8e --- /dev/null +++ b/docs/source/tutorial/utility_test_problem.ipynb @@ -0,0 +1,437 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Creating Test Problems\n", + "```\n", + "Copyright 2025 National Technology & Engineering Solutions of Sandia,\n", + "LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the\n", + "U.S. Government retains certain rights in this software.\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "We demonstrate how to use the `create_problem` function to create test problems for decomposition algorithms. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "import pyttb as ttb\n", + "from pyttb.create_problem import (\n", + " CPProblem,\n", + " ExistingCPSolution,\n", + " TuckerProblem,\n", + " MissingData,\n", + " create_problem,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "# Set global random seed for reproducibility of this notebook\n", + "import numpy as np\n", + "\n", + "np.random.seed(123)" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "## Create a CP test problem\n", + "The `create_problem` function generates both the solution (as a `ktensor` for CP) and the test data (as a dense `tensor`)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a problem\n", + "cp_specific_params = CPProblem(shape=(5, 4, 3), num_factors=3, noise=0.1)\n", + "no_missing_data = MissingData()\n", + "solution, data = create_problem(cp_specific_params, no_missing_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "# Display the solution\n", + "print(solution)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "# Display the data\n", + "print(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "# The difference between the true solution and measured data\n", + "# should match the specified noise setting\n", + "diff = (solution.full() - data).norm() / solution.full().norm()\n", + "print(diff)" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "## Creating a Tucker test problem\n", + "The `create_problem` function can also create Tucker problems by providing a `TuckerParams` data class as the first argument to `create_problem` instead. In this case, the function generates the solution as a `ttensor`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "tucker_specific_params = TuckerProblem(\n", + " shape=(5, 4, 3), num_factors=[3, 3, 2], noise=0.1\n", + ")\n", + "no_missing_data = MissingData()\n", + "solution, data = create_problem(tucker_specific_params, no_missing_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "# Display the solution\n", + "print(solution)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "# Display the data\n", + "print(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "# The difference between the true solution and measured data\n", + "# should match the specified noise setting\n", + "diff = (solution.full() - data).norm() / solution.full().norm()\n", + "print(diff)" + ] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": {}, + "source": [ + "## Recreating the same test problem\n", + "We are still relying on numpy's deprecated global random state. See [#441](https://github.com/sandialabs/pyttb/issues/441)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "# Problem details\n", + "shape = [5, 4, 3]\n", + "num_factors = 3\n", + "seed = 123\n", + "missing_params = MissingData()\n", + "cp_specific_params = CPProblem(shape, num_factors=num_factors)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "# Generate the first test problem\n", + "np.random.seed(seed)\n", + "solution_1, data_1 = create_problem(cp_specific_params, missing_params)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ + "# Generate the second test problem\n", + "np.random.seed(seed)\n", + "solution_2, data_2 = create_problem(cp_specific_params, missing_params)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "# Check that the solutions are identical\n", + "print(f\"{solution_1.isequal(solution_2)=}\")\n", + "\n", + "# Check that the data are identical\n", + "print(f\"{(data_1-data_2).norm()=}\")" + ] + }, + { + "cell_type": "markdown", + "id": "19", + "metadata": {}, + "source": [ + "## Options for creating factor matrices, core tensors, and weights\n", + "\n", + "User specified functions may be provided to generate the relevant components of `ktensors` or `ttensors`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": {}, + "outputs": [], + "source": [ + "# Example custom weight generator for CP Problems\n", + "cp_specific_params = CPProblem(shape=[5, 4, 3], num_factors=2, weight_generator=np.ones)\n", + "solution, _ = create_problem(cp_specific_params, missing_params)\n", + "print(f\"{solution.weights}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": {}, + "outputs": [], + "source": [ + "# Example custom core generator for Tucker\n", + "tucker_specific_params = TuckerProblem(\n", + " shape=[5, 4, 3], num_factors=[2, 2, 2], core_generator=ttb.tenones\n", + ")\n", + "solution, _ = create_problem(tucker_specific_params, missing_params)\n", + "print(f\"{solution.core}\")" + ] + }, + { + "cell_type": "markdown", + "id": "22", + "metadata": {}, + "source": [ + "## Create dense missing data problems\n", + "It's possible to create problems that have a percentage of missing data. The problem generator randomly creates the pattern of missing data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23", + "metadata": {}, + "outputs": [], + "source": [ + "# Specify 25% missing data\n", + "missing_data_params = MissingData(missing_ratio=0.25)\n", + "\n", + "# Show an example of randomly generated pattern\n", + "# 1 is known 0 is unknown\n", + "print(missing_data_params.get_pattern(shape=[5, 4, 3]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24", + "metadata": {}, + "outputs": [], + "source": [ + "# Generate problem using a newly sampled pattern\n", + "solution, data = create_problem(cp_specific_params, missing_data_params)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25", + "metadata": {}, + "outputs": [], + "source": [ + "# Show data (including noise) with missing entries zeroed out\n", + "print(data)" + ] + }, + { + "cell_type": "markdown", + "id": "26", + "metadata": {}, + "source": [ + "## Creating sparse missing data problems\n", + "If `sparse_models` is set to true then the returned data is sparse. This should only be used with `missing_ratio` >= 0.8." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27", + "metadata": {}, + "outputs": [], + "source": [ + "missing_data_params = MissingData(missing_ratio=0.8, sparse_model=True)\n", + "\n", + "# Here is a candidate pattern of known data\n", + "print(missing_data_params.get_pattern([5, 4, 3]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28", + "metadata": {}, + "outputs": [], + "source": [ + "# Here is the data (including noise) with zeros not explicitly represented.\n", + "solution, data = create_problem(cp_specific_params, missing_data_params)\n", + "print(data)" + ] + }, + { + "cell_type": "markdown", + "id": "29", + "metadata": {}, + "source": [ + "## Create missing data problems with pre-specified pattern\n", + "A specific pattern (dense or sparse) can be use to represent missing data. This is also currently the recommended approach for reproducibility." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30", + "metadata": {}, + "outputs": [], + "source": [ + "# Grab a pattern from before\n", + "pattern = MissingData(missing_ratio=0.25).get_pattern([5, 4, 3])\n", + "missing_data_params = MissingData(missing_pattern=pattern)\n", + "solution, data = create_problem(cp_specific_params, missing_data_params)\n", + "print(data)" + ] + }, + { + "cell_type": "markdown", + "id": "31", + "metadata": {}, + "source": [ + "## Creating Sparse Problems (CP only)\n", + "If we assume each model parameter is the input to a Poisson process, then we can generate a sparse test problems. This requires that all the factor matrices and lambda be nonnegative. The default factor generator ('randn') won't work since it produces both positive and negative values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32", + "metadata": {}, + "outputs": [], + "source": [ + "# Generate factor matrices with a few large entries in each column\n", + "# This will be the basis of our solution\n", + "shape = (20, 15, 10)\n", + "num_factors = 4\n", + "A = []\n", + "for n in range(len(shape)):\n", + " A.append(np.random.rand(shape[n], num_factors))\n", + " for r in range(num_factors):\n", + " p = np.random.permutation(np.arange(shape[n]))\n", + " idx = p[1 : round(0.2 * shape[n])]\n", + " A[n][idx, r] *= 10\n", + "S = ttb.ktensor(A)\n", + "# S.normalize(sort=True);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33", + "metadata": {}, + "outputs": [], + "source": [ + "S.normalize(sort=True).weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34", + "metadata": {}, + "outputs": [], + "source": [ + "# Create sparse test problem based on the solution.\n", + "# `sparse_generation` controls how many insertions to make based on the solution.\n", + "# The weight vector of the solution is automatically rescaled to match the number of insertions.\n", + "existing_params = ExistingCPSolution(S, noise=0.0, sparse_generation=500)\n", + "print(f\"{S.weights=}\")\n", + "solution, data = create_problem(existing_params)\n", + "print(\n", + " f\"num_nozeros: {data.nnz}\\n\"\n", + " f\"total_insertions: {np.sum(data.vals)}\\n\"\n", + " f\"original weights vs rescaled: {S.weights / solution.weights}\"\n", + ")" + ] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst index ed373b56..4b1ffbc8 100644 --- a/docs/source/tutorials.rst +++ b/docs/source/tutorials.rst @@ -32,6 +32,11 @@ Tucker Decompositions Working with Tensors ==================== +.. toctree:: + :maxdepth: 1 + + Creating Test Problems + Converting Between Tensors and Matrices --------------------------------------- diff --git a/pyproject.toml b/pyproject.toml index 45604a84..ec0e7712 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ documentation = "https://pyttb.readthedocs.io" [project.optional-dependencies] dev = [ - "mypy>=1.10,<1.14.0", + "mypy>=1.15,<1.16.0", # Also in pre-commit "nbstripout>=0.8,<0.9", "pytest>8.0", @@ -84,6 +84,8 @@ ignore = [ "B011", # There is ongoing discussion about logging/warning etc "B028", + # Personal preference on magic method + "D105", ] [tool.ruff.lint.pydocstyle] convention = "numpy" diff --git a/pyttb/__init__.py b/pyttb/__init__.py index dbec0889..87a70514 100644 --- a/pyttb/__init__.py +++ b/pyttb/__init__.py @@ -22,10 +22,7 @@ from pyttb.matlab import matlab_support from pyttb.sptenmat import sptenmat from pyttb.sptensor import sptendiag, sptenrand, sptensor -from pyttb.sptensor3 import sptensor3 from pyttb.sumtensor import sumtensor -from pyttb.symktensor import symktensor -from pyttb.symtensor import symtensor from pyttb.tenmat import tenmat from pyttb.tensor import tendiag, teneye, tenones, tenrand, tensor, tenzeros from pyttb.ttensor import ttensor @@ -55,10 +52,7 @@ def ignore_warnings(ignore=True): sptendiag.__name__, sptenrand.__name__, sptensor.__name__, - sptensor3.__name__, sumtensor.__name__, - symktensor.__name__, - symtensor.__name__, teneye.__name__, tenmat.__name__, tendiag.__name__, diff --git a/pyttb/create_problem.py b/pyttb/create_problem.py new file mode 100644 index 00000000..e10c8bf7 --- /dev/null +++ b/pyttb/create_problem.py @@ -0,0 +1,623 @@ +"""Create test problems for tensor factorizations.""" + +import logging +import math +from dataclasses import dataclass, field +from typing import Callable, Optional, Tuple, Union, cast, overload + +import numpy as np +from numpy_groupies import aggregate as accumarray + +import pyttb as ttb +from pyttb.pyttb_utils import Shape, parse_shape + +solution_generator = Callable[[Tuple[int, ...]], np.ndarray] +core_generator_t = Callable[ + [Tuple[int, ...]], Union[ttb.tensor, ttb.sptensor, np.ndarray] +] + + +def randn(shape: Tuple[int, ...]) -> np.ndarray: + """Stub for MATLAB randn. + + TODO move somewhere shareable. + """ + return np.random.normal(0, 1, size=shape) + + +@dataclass +class BaseProblem: + """Parameters general to all solutions. + + Attributes + ---------- + shape: + Tensor shape for generated problem. + factor_generator: + Method to generate factor matrices. + symmetric: + List of modes that should be symmetric. + For instance, `[(1,2), (3,4)]` specifies that + modes 1 and 2 have identical factor matrices, and modes 3 and 4 + also have identical factor matrices. + num_factors: + Number of factors. + noise: + Amount of Gaussian noise to add to solution. + If data is sparse noise is only added to nonzero entries. + """ + + shape: Shape = field(metadata={"doc": "A shape"}) + factor_generator: solution_generator = randn + symmetric: Optional[list[Tuple[int, int]]] = None + num_factors: Union[int, list[int], None] = None + noise: float = 0.10 + + def __post_init__(self): + self.shape = ttb.pyttb_utils.parse_shape(self.shape) + if not 0.0 <= self.noise <= 1.0: + raise ValueError(f"Noise must be in [0,1] but got {self.noise}") + + +@dataclass +class CPProblem(BaseProblem): + """Parameters specifying CP Solutions. + + Attributes + ---------- + shape: + Tensor shape for generated problem. + factor_generator: + Method to generate factor matrices. + symmetric: + List of modes that should be symmetric. + For instance, `[(1,2), (3,4)]` specifies that + modes 1 and 2 have identical factor matrices, and modes 3 and 4 + also have identical factor matrices. + num_factors: + Number of factors. + noise: + Amount of Gaussian noise to add to solution. + If data is sparse noise is only added to nonzero entries. + weight_generator: + Method to generate weights for ktensor solution. + """ + + # NOTE inherited attributes are manually copy pasted, keep aligned between problems + + num_factors: int = 2 + weight_generator: solution_generator = np.random.random + # TODO: This is in DataParams in MATLAB, but only works for CP problems so + # feels more reasonable here + sparse_generation: Optional[float] = None + + +@dataclass +class TuckerProblem(BaseProblem): + """Parameters specifying Tucker Solutions. + + Attributes + ---------- + shape: + Tensor shape for generated problem. + factor_generator: + Method to generate factor matrices. + symmetric: + List of modes that should be symmetric. + For instance, `[(1,2), (3,4)]` specifies that + modes 1 and 2 have identical factor matrices, and modes 3 and 4 + also have identical factor matrices. + num_factors: + Number of factors. + noise: + Amount of Gaussian noise to add to solution. + If data is sparse noise is only added to nonzero entries. + core_generator: + Method to generate weights for ttensor solution. + """ + + # TODO post_init set to [2, 2, 2] + num_factors: Optional[list[int]] = None + core_generator: core_generator_t = randn + + def __post_init__(self): + super().__post_init__() + self.num_factors = self.num_factors or [2, 2, 2] + + +@dataclass +class ExistingSolution: + """Parameters for using an existing tensor solution. + + Attributes + ---------- + solution: + Pre-existing tensor solution (ktensor or ttensor). + noise: + Amount of Gaussian noise to add to solution. + If data is sparse noise is only added to nonzero entries. + """ + + solution: Union[ttb.ktensor, ttb.ttensor] + noise: float = 0.10 + + def __post_init__(self): + if not 0.0 <= self.noise <= 1.0: + raise ValueError(f"Noise must be in [0,1] but got {self.noise}") + + @property + def symmetric(self) -> None: + """Get the symmetric modes from the solution.""" + # ExistingSolution doesn't support symmetry constraints + return None + + +@dataclass +class ExistingTuckerSolution(ExistingSolution): + """Parameters for using an existing tucket tensor solution. + + Attributes + ---------- + solution: + Pre-existing ttensor solution. + noise: + Amount of Gaussian noise to add to solution. + If data is sparse noise is only added to nonzero entries. + """ + + solution: ttb.ttensor + + +@dataclass +class ExistingCPSolution(ExistingSolution): + """Parameters for using an existing tucket tensor solution. + + Attributes + ---------- + solution: + Pre-existing ktensor solution. + noise: + Amount of Gaussian noise to add to solution. + If data is sparse noise is only added to nonzero entries. + sparse_generation: + Generate a sparse tensor that can be scaled so that the + column factors and weights are stochastic. Provide a number + of nonzeros to be inserted. A value in range [0,1) will be + interpreted as a ratio. + """ + + solution: ttb.ktensor + sparse_generation: Optional[float] = None + + +@dataclass +class MissingData: + """Parameters to control missing data. + + Attributes + ---------- + missing_ratio: + Proportion of missing data. + missing_pattern: + An explicit tensor representing missing data locations. + sparse_model: + Whether to generate sparse rather than dense missing data pattern. + Only useful for large tensors that don't easily fit in memory and + when missing ratio > 0.8. + """ + + missing_ratio: float = 0.0 + missing_pattern: Optional[Union[ttb.sptensor, ttb.tensor]] = None + sparse_model: bool = False + + def __post_init__(self): + if not 0.0 <= self.missing_ratio <= 1.0: + raise ValueError( + f"Missing ratio must be in [0,1] but got {self.missing_ratio}" + ) + if self.missing_ratio > 0.0 and self.missing_pattern is not None: + raise ValueError( + "Can't set ratio and explicit pattern to specify missing data. " + "Select one or the other." + ) + + def has_missing(self) -> bool: + """Check if any form of missing data is requested.""" + return self.missing_ratio > 0.0 or self.missing_pattern is not None + + def raise_symmetric(self): + """Raise for unsupported symmetry request.""" + if self.missing_ratio: + raise ValueError("Can't generate a symmetric problem with missing data.") + if self.sparse_model: + raise ValueError("Can't generate sparse symmetric problem.") + + def get_pattern(self, shape: Shape) -> Union[None, ttb.tensor, ttb.sptensor]: + """Generate a tensor pattern of missing data.""" + if self.missing_pattern is not None: + if self.missing_pattern.shape != shape: + raise ValueError( + "Missing pattern and problem shapes are not compatible." + ) + return self.missing_pattern + + if self.missing_ratio == 0.0: + # All usages of this are internal, should we just rule out this situation? + return None + if self.missing_ratio < 0.8 and self.sparse_model: + logging.warning( + "Setting sparse to false because there are" + " fewer than 80% missing elements." + ) + return _create_missing_data_pattern( + shape, self.missing_ratio, self.sparse_model + ) + + +def _create_missing_data_pattern( + shape: Shape, missing_ratio: float, sparse_model: bool = False +) -> Union[ttb.tensor, ttb.sptensor]: + """Create a randomly missing element indicator tensor. + + Creates a binary tensor of specified size with 0's indication missing data + and 1's indicating valid data. Will only return a tensor that has at least + one entry per N-1 dimensional slice. + """ + shape = parse_shape(shape) + ndim = len(shape) + P = math.prod(shape) + Q = math.ceil((1 - missing_ratio) * P) + W: Union[ttb.tensor, ttb.sptensor] + + # Create tensor + ## Keep iterating until tensor is created or we give up. + # TODO: make range configurable? + for _ in range(20): + if sparse_model: + # Start with 50% more than Q random subs + # Note in original matlab to work out expected value of a*Q to guarantee + # Q unique entries + subs = np.unique( + np.floor( + np.random.random((int(np.ceil(1.5 * Q)), len(shape))).dot( + np.diag(shape) + ) + ), + axis=0, + ).astype(int) + # Check if there are too many unique subs + if len(subs) > Q: + # TODO: check if note from matlab still relevant + # Note in original matlab: unique orders the subs and would bias toward + # first subs with lower values, so we sample to cut back + idx = np.random.permutation(subs.shape[0]) + subs = subs[idx[:Q]] + elif subs.shape[0] < Q: + logging.warning( + f"Only generated {subs.shape[0]} of " f"{Q} desired subscripts" + ) + W = ttb.sptensor( + subs, + np.ones( + (len(subs), 1), + ), + shape=shape, + ) + else: + # Compute the linear indices of the missing entries. + idx = np.random.permutation(P) + idx = idx[:Q] + W = ttb.tenzeros(shape) + W[idx] = 1 + # return W + + # Check if W has any empty slices + isokay = True + for n in range(ndim): + all_but_n = np.arange(W.ndims) + all_but_n = np.delete(all_but_n, n) + collapse_W = W.collapse(all_but_n) + if isinstance(collapse_W, np.ndarray): + isokay &= bool(np.all(collapse_W)) + else: + isokay &= bool(np.all(collapse_W.double())) + + # Quit if okay + if isokay: + break + + if not isokay: + raise ValueError( + f"After {iter} iterations, cannot produce a tensor with" + f"{missing_ratio*100} missing data without an empty slice." + ) + return W + + +@overload +def create_problem( + problem_params: CPProblem, missing_params: Optional[MissingData] = None +) -> Tuple[ + ttb.ktensor, Union[ttb.tensor, ttb.sptensor] +]: ... # pragma: no cover see coveragepy/issues/970 + + +@overload +def create_problem( + problem_params: TuckerProblem, + missing_params: Optional[MissingData] = None, +) -> Tuple[ttb.ttensor, ttb.tensor]: ... # pragma: no cover see coveragepy/issues/970 + + +@overload +def create_problem( + problem_params: ExistingSolution, + missing_params: Optional[MissingData] = None, +) -> Tuple[ + Union[ttb.ktensor, ttb.ttensor], Union[ttb.tensor, ttb.sptensor] +]: ... # pragma: no cover see coveragepy/issues/970 + + +def create_problem( + problem_params: Union[CPProblem, TuckerProblem, ExistingSolution], + missing_params: Optional[MissingData] = None, +) -> Tuple[Union[ttb.ktensor, ttb.ttensor], Union[ttb.tensor, ttb.sptensor]]: + """Generate a problem and solution. + + Arguments + --------- + problem_params: + Parameters related to the problem to generate, or an existing solution. + missing_params: + Parameters to control missing data in the generated data/solution. + + Examples + -------- + Base example params + + >>> shape = (5, 4, 3) + + Generate a CP problem + + >>> cp_specific_params = CPProblem(shape=shape, num_factors=3, noise=0.1) + >>> no_missing_data = MissingData() + >>> solution, data = create_problem(cp_specific_params, no_missing_data) + >>> diff = (solution.full() - data).norm() / solution.full().norm() + >>> bool(np.isclose(diff, 0.1)) + True + + Generate Tucker Problem + + >>> tucker_specific_params = TuckerProblem(shape, num_factors=[3, 3, 2], noise=0.1) + >>> solution, data = create_problem(tucker_specific_params, no_missing_data) + >>> diff = (solution.full() - data).norm() / solution.full().norm() + >>> bool(np.isclose(diff, 0.1)) + True + + Use existing solution + + >>> factor_matrices = [np.random.random((dim, 3)) for dim in shape] + >>> weights = np.random.random(3) + >>> existing_ktensor = ttb.ktensor(factor_matrices, weights) + >>> existing_params = ExistingSolution(existing_ktensor, noise=0.1) + >>> solution, data = create_problem(existing_params, no_missing_data) + >>> assert solution is existing_ktensor + """ + if missing_params is None: + missing_params = MissingData() + + if problem_params.symmetric is not None: + missing_params.raise_symmetric() + + solution = generate_solution(problem_params) + + data: Union[ttb.tensor, ttb.sptensor] + if ( + isinstance(problem_params, (CPProblem, ExistingCPSolution)) + and problem_params.sparse_generation is not None + ): + if missing_params.has_missing(): + raise ValueError( + f"Can't combine missing data {MissingData.__name__} and " + f" sparse generation {CPProblem.__name__}." + ) + solution = cast(ttb.ktensor, solution) + solution, data = generate_data_sparse(solution, problem_params) + elif missing_params.has_missing(): + pattern = missing_params.get_pattern(solution.shape) + data = generate_data(solution, problem_params, pattern) + else: + data = generate_data(solution, problem_params) + return solution, data + + +def generate_solution_factors(base_params: BaseProblem) -> list[np.ndarray]: + """Generate the factor matrices for either type of solution.""" + # Get shape of final tensor + shape = cast(Tuple[int, ...], base_params.shape) + + # Get shape of factors + if isinstance(base_params.num_factors, int): + nfactors = [base_params.num_factors] * len(shape) + elif base_params.num_factors is not None: + nfactors = base_params.num_factors + else: + raise ValueError("Num_factors shouldn't be none.") + if len(nfactors) != len(shape): + raise ValueError( + "Num_factors should be the same dimensions as shape but got" + f"{nfactors} and {shape}" + ) + factor_matrices = [] + for shape_i, nfactors_i in zip(shape, nfactors): + factor_matrices.append(base_params.factor_generator((shape_i, nfactors_i))) + + if base_params.symmetric is not None: + for grp in base_params.symmetric: + for j in range(1, len(grp)): + factor_matrices[grp[j]] = factor_matrices[grp[0]] + + return factor_matrices + + +@overload +def generate_solution( + problem_params: TuckerProblem, +) -> ttb.ttensor: ... + + +@overload +def generate_solution( + problem_params: CPProblem, +) -> ttb.ktensor: ... + + +@overload +def generate_solution( + problem_params: ExistingSolution, +) -> Union[ttb.ktensor, ttb.ttensor]: ... + + +def generate_solution( + problem_params: Union[CPProblem, TuckerProblem, ExistingSolution], +) -> Union[ttb.ktensor, ttb.ttensor]: + """Generate problem solution.""" + if isinstance(problem_params, ExistingSolution): + return problem_params.solution + factor_matrices = generate_solution_factors(problem_params) + # Create final model + if isinstance(problem_params, TuckerProblem): + nfactors = cast(list[int], problem_params.num_factors) + generated_core = problem_params.core_generator(tuple(nfactors)) + if isinstance(generated_core, (ttb.tensor, ttb.sptensor)): + core = generated_core + else: + core = ttb.tensor(generated_core) + return ttb.ttensor(core, factor_matrices) + elif isinstance(problem_params, CPProblem): + weights = problem_params.weight_generator((problem_params.num_factors,)) + return ttb.ktensor(factor_matrices, weights) + raise ValueError(f"Unsupported problem parameter type: {type(problem_params)=}") + + +@overload +def generate_data( + solution: Union[ttb.ktensor, ttb.ttensor], + problem_params: Union[BaseProblem, ExistingSolution], + pattern: Optional[ttb.tensor] = None, +) -> ttb.tensor: ... # pragma: no cover see coveragepy/issues/970 + + +@overload +def generate_data( + solution: Union[ttb.ktensor, ttb.ttensor], + problem_params: Union[BaseProblem, ExistingSolution], + pattern: ttb.sptensor, +) -> ttb.sptensor: ... # pragma: no cover see coveragepy/issues/970 + + +def generate_data( + solution: Union[ttb.ktensor, ttb.ttensor], + problem_params: Union[BaseProblem, ExistingSolution], + pattern: Optional[Union[ttb.tensor, ttb.sptensor]] = None, +) -> Union[ttb.tensor, ttb.sptensor]: + """Generate problem data.""" + shape = solution.shape + Rdm: Union[ttb.tensor, ttb.sptensor] + if pattern is not None: + if isinstance(pattern, ttb.sptensor): + Rdm = ttb.sptensor(pattern.subs, randn((pattern.nnz, 1)), pattern.shape) + Z = pattern * solution + elif isinstance(pattern, ttb.tensor): + Rdm = pattern * ttb.tensor(randn(shape)) + Z = pattern * solution.full() + else: + raise ValueError(f"Unsupported sparsity pattern of type {type(pattern)}") + else: + # TODO don't we already have a randn tensor method? + Rdm = ttb.tensor(randn(shape)) + Z = solution.full() + if problem_params.symmetric is not None: + # TODO Note in MATLAB code to follow up + Rdm = Rdm.symmetrize(np.array(problem_params.symmetric)) + + D = Z + problem_params.noise * Z.norm() * Rdm / Rdm.norm() + # Make sure the final result is definitely symmetric + if problem_params.symmetric is not None: + D = D.symmetrize(np.array(problem_params.symmetric)) + return D + + +def prosample(nsamples: int, prob: np.ndarray) -> np.ndarray: + """Proportional Sampling.""" + bins = np.minimum(np.cumsum(np.array([0, *prob])), 1) + bins[-1] = 1 + indices = np.digitize(np.random.random(nsamples), bins=bins) + return indices - 1 + + +def generate_data_sparse( + solution: ttb.ktensor, + problem_params: Union[CPProblem, ExistingCPSolution], +) -> Tuple[ttb.ktensor, ttb.sptensor]: + """Generate sparse CP data from a given solution.""" + # Error check on solution + if np.any(solution.weights < 0): + raise ValueError("All weights must be nonnegative.") + if any(np.any(factor < 0) for factor in solution.factor_matrices): + raise ValueError("All factor matrices must be nonnegative.") + if problem_params.symmetric is not None: + logging.warning("Summetric constraints have been ignored.") + if problem_params.sparse_generation is None: + raise ValueError("Cannot generate sparse data without sparse_generation set.") + + # Convert solution to probability tensor + # NOTE: Make copy since normalize modifies in place + P = solution.copy().normalize(mode=0) + eta = np.sum(P.weights) + P.weights /= eta + + # Determine how many samples per component + nedges = problem_params.sparse_generation + if nedges < 1: + nedges = np.round(nedges * math.prod(P.shape)).astype(int) + nedges = int(nedges) + nd = P.ndims + nc = P.ncomponents + csample = prosample(nedges, P.weights) + # TODO check this + csums = accumarray(csample, 1, size=nc) + + # Determine the subscripts for each randomly sampled entry + shape = solution.shape + subs: list[np.ndarray] = [] + for c in range(nc): + nsample = csums[c] + if nsample == 0: + continue + subs.append(np.zeros((nsample, nd), dtype=int)) + for d in range(nd): + subs[-1][:, d] = prosample(nsample, P.factor_matrices[d][:, c]) + # TODO could sum csums and allocate in place with slicing + allsubs = np.vstack(subs) + # Assemble final tensor. Note that duplicates are summed. + # TODO should we have sptenones for purposes like this? + Z = ttb.sptensor.from_aggregator( + allsubs, + np.ones( + (len(allsubs), 1), + ), + shape=shape, + ) + + # Rescale S so that it is proportional to the number of edges inserted + solution = P + # raise ValueError( + # f"{nedges=}" + # f"{solution.weights=}" + # ) + solution.weights *= nedges + + # TODO no noise introduced in this special case in MATLAB + + return solution, Z diff --git a/pyttb/ktensor.py b/pyttb/ktensor.py index 11ed3f19..a2637f8b 100644 --- a/pyttb/ktensor.py +++ b/pyttb/ktensor.py @@ -1606,7 +1606,7 @@ def score( component :class:`pyttb.ktensor` instances that have been normalized so that their weights are `self.weights` and `other.weights`, and their factor matrices are single column vectors containing [a1,a2,...,an] and - [b1,b2,...bn], rescpetively, then the score is defined as + [b1,b2,...bn], respectively, then the score is defined as score = penalty * (a1.T*b1) * (a2.T*b2) * ... * (an.T*bn), @@ -1653,23 +1653,31 @@ def score( Create two :class:`pyttb.ktensor` instances and compute the score between them: - >>> factors = [np.ones((3, 3)), np.ones((4, 3)), np.ones((5, 3))] + >>> factors = [ + ... np.ones((3, 3)) + 0.1, + ... np.ones((4, 3)) + 0.2, + ... np.ones((5, 3)) + 0.3, + ... ] >>> weights = np.array([2.0, 1.0, 3.0]) >>> K = ttb.ktensor(factors, weights) - >>> factors_2 = [np.ones((3, 2)), np.ones((4, 2)), np.ones((5, 2))] + >>> factors_2 = [ + ... np.ones((3, 2)) + 0.1, + ... np.ones((4, 2)) + 0.2, + ... np.ones((5, 2)) + 0.3, + ... ] >>> weights_2 = np.array([2.0, 4.0]) >>> K2 = ttb.ktensor(factors_2, weights_2) >>> score, Kperm, flag, perm = K.score(K2) - >>> print(score) - 0.875 + >>> print(np.isclose(score, 0.875)) + True >>> print(perm) [0 2 1] Compute score without using weights: >>> score, Kperm, flag, perm = K.score(K2, weight_penalty=False) - >>> print(score) - 1.0 + >>> print(np.isclose(score, 1.0)) + True >>> print(perm) [0 1 2] """ @@ -1733,8 +1741,9 @@ def score( best_perm = -1 * np.ones((RA), dtype=int) best_score = 0.0 for _ in range(RB): - idx = np.argmax(C.reshape(prod(C.shape), order=self.order)) - ij = tt_ind2sub((RA, RB), np.array(idx)) + flatten_C = C.reshape(prod(C.shape), order=self.order) + idx = np.argmax(flatten_C) + ij = tt_ind2sub((RA, RB), np.array(idx, dtype=int), order=self.order) best_score = best_score + C[ij[0], ij[1]] C[ij[0], :] = -10 C[:, ij[1]] = -10 diff --git a/pyttb/sptenmat.py b/pyttb/sptenmat.py index 8670da5d..9a0a143c 100644 --- a/pyttb/sptenmat.py +++ b/pyttb/sptenmat.py @@ -40,7 +40,7 @@ def __init__( # noqa: PLR0913 and values (vals) along with the mappings of the row (rdims) and column indices (cdims) and the shape of the original tensor (tshape). - If you already have an sparse tensor see :method:`pyttb.sptensor.to_sptenmat`. + If you already have an sparse tensor see :meth:`pyttb.sptensor.to_sptenmat`. Parameters ---------- diff --git a/pyttb/sptensor.py b/pyttb/sptensor.py index 942a212e..cca500ca 100644 --- a/pyttb/sptensor.py +++ b/pyttb/sptensor.py @@ -163,6 +163,11 @@ def __init__( if vals.size == 0: # In case user provides an empty array in weird format vals = np.array([], dtype=vals.dtype, ndmin=2) + elif len(vals.shape) == 1: + # Enforce column array + vals = vals.reshape((vals.shape[0], 1)) + elif len(vals.shape) > 2: + raise ValueError("Values should be a column vector") if copy: self.subs = subs.copy() @@ -438,6 +443,20 @@ def allsubs(self) -> np.ndarray: return s.astype(int) + @overload + def collapse( + self, + dims: None, + function_handle: Callable[[np.ndarray], Union[float, np.ndarray]], + ) -> float: ... # pragma: no cover see coveragepy/issues/970 + + @overload + def collapse( + self, + dims: OneDArray, + function_handle: Callable[[np.ndarray], Union[float, np.ndarray]] = sum, + ) -> Union[np.ndarray, sptensor]: ... # pragma: no cover see coveragepy/issues/970 + def collapse( self, dims: Optional[OneDArray] = None, @@ -503,6 +522,8 @@ def collapse( size=newsize[0], func=function_handle, ) + # TODO think about if this makes sense + # complicates return typing return np.zeros((newsize[0],)) # Create Result diff --git a/pyttb/sptensor3.py b/pyttb/sptensor3.py deleted file mode 100644 index 2d469b06..00000000 --- a/pyttb/sptensor3.py +++ /dev/null @@ -1,12 +0,0 @@ -"""Sparse Tensor 3 Class Placeholder.""" - -# Copyright 2025 National Technology & Engineering Solutions of Sandia, -# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the -# U.S. Government retains certain rights in this software. - - -class sptensor3: - """A sparse tensor variant.""" - - def __init__(self): - assert False, "SPTENSOR3 class not yet implemented" diff --git a/pyttb/symktensor.py b/pyttb/symktensor.py deleted file mode 100644 index 67a05e26..00000000 --- a/pyttb/symktensor.py +++ /dev/null @@ -1,12 +0,0 @@ -"""Symmetric Kruskal Tensor Class Placeholder.""" - -# Copyright 2025 National Technology & Engineering Solutions of Sandia, -# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the -# U.S. Government retains certain rights in this software. - - -class symktensor: - """Class for symmetric Kruskal tensors (decomposed).""" - - def __init__(self): - assert False, "SYMKTENSOR class not yet implemented" diff --git a/pyttb/symtensor.py b/pyttb/symtensor.py deleted file mode 100644 index bd57e5c0..00000000 --- a/pyttb/symtensor.py +++ /dev/null @@ -1,12 +0,0 @@ -"""Symmetric Tensor Class Placeholder.""" - -# Copyright 2025 National Technology & Engineering Solutions of Sandia, -# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the -# U.S. Government retains certain rights in this software. - - -class symtensor: - """Class for storing only unique entries of symmetric tensor.""" - - def __init__(self): - assert False, "SYMTENSOR class not yet implemented" diff --git a/pyttb/tensor.py b/pyttb/tensor.py index 1fe81dd3..4aaf9ca0 100644 --- a/pyttb/tensor.py +++ b/pyttb/tensor.py @@ -311,6 +311,20 @@ def __deepcopy__(self, memo): """Return deep copy of this tensor.""" return self.copy() + @overload + def collapse( + self, + dims: None, + fun: Callable[[np.ndarray], Union[float, np.ndarray]], + ) -> float: ... # pragma: no cover see coveragepy/issues/970 + + @overload + def collapse( + self, + dims: OneDArray, + fun: Callable[[np.ndarray], Union[float, np.ndarray]] = np.sum, + ) -> Union[np.ndarray, tensor]: ... # pragma: no cover see coveragepy/issues/970 + def collapse( self, dims: Optional[OneDArray] = None, @@ -382,6 +396,8 @@ def collapse( Min value: -0.977277879876411 """ if self.data.size == 0: + # TODO verify this is the only thing that returns np array + # and remove return np.array([], order=self.order) if dims is None: diff --git a/pyttb/ttensor.py b/pyttb/ttensor.py index eb58d4f4..ba22ebec 100644 --- a/pyttb/ttensor.py +++ b/pyttb/ttensor.py @@ -200,7 +200,7 @@ def __repr__(self): # pragma: no cover str Contains the core, and factor matrices as strings on different lines. """ - display_string = f"Tensor of shape: {self.shape}\n" f"\tCore is a\n" + display_string = f"TTensor of shape: {self.shape}\n" f"\tCore is a\n" display_string += textwrap.indent(str(self.core), "\t\t") display_string += "\n" diff --git a/tests/test_create_problem.py b/tests/test_create_problem.py new file mode 100644 index 00000000..3e02cf97 --- /dev/null +++ b/tests/test_create_problem.py @@ -0,0 +1,227 @@ +import numpy as np +import pytest + +import pyttb as ttb +from pyttb.create_problem import ( + BaseProblem, + CPProblem, + ExistingSolution, + MissingData, + TuckerProblem, + create_problem, + generate_data, + generate_data_sparse, + generate_solution, +) + + +class TestDataclasses: + def test_problemparams(self): + arbitrary_shape = (2, 2, 2) + with pytest.raises(ValueError): + number_larger_than_one = 2.0 + BaseProblem(arbitrary_shape, noise=number_larger_than_one) + with pytest.raises(ValueError): + number_less_than_zero = -2.0 + BaseProblem(arbitrary_shape, noise=number_less_than_zero) + + def test_missingdata(self): + arbitrary_shape = (2, 2, 2) + with pytest.raises(ValueError): + number_larger_than_one = 2.0 + MissingData(missing_ratio=number_larger_than_one) + with pytest.raises(ValueError): + number_less_than_zero = -2.0 + MissingData(missing_ratio=number_less_than_zero) + + with pytest.raises(ValueError): + number_larger_than_zero = 1.0 + arbitrary_missing_pattern = ttb.tenones(arbitrary_shape) + MissingData( + missing_ratio=number_larger_than_zero, + missing_pattern=arbitrary_missing_pattern, + ) + + missing_params = MissingData(missing_ratio=0.1) + assert missing_params.has_missing() + with pytest.raises(ValueError): + missing_params.raise_symmetric() + + missing_params = MissingData(sparse_model=True) + with pytest.raises(ValueError): + missing_params.raise_symmetric() + + missing_params = MissingData() + assert not missing_params.has_missing() + missing_params.raise_symmetric() + + missing_params = MissingData() + assert missing_params.get_pattern(arbitrary_shape) is None + + def test_missingdata_logging(self, caplog): + arbitrary_shape = (2, 2, 2) + missing_params = MissingData(missing_ratio=0.1, sparse_model=True) + missing_params.get_pattern(arbitrary_shape) + assert "missing elements" in caplog.text + + def test_existing_solution(self, sample_ktensor_2way): + solution = sample_ktensor_2way + existing_solution = ExistingSolution(solution) + assert existing_solution.solution is solution + assert existing_solution.noise == 0.1 + + with pytest.raises(ValueError): + value_less_than_zero = -0.1 + ExistingSolution(solution, noise=value_less_than_zero) + + with pytest.raises(ValueError): + value_greater_than_one = 1.1 + ExistingSolution(solution, noise=value_greater_than_one) + + +def test_generate_solution_cp(): + # Smoke test with defaults + shape = (2, 2, 2) + cp_params = CPProblem(shape) + model = generate_solution(cp_params) + assert isinstance(model, ttb.ktensor) + assert model.shape == shape + + # TODO could test with different generators and enforce that they actually get used + + +def test_generate_data_cp(): + # Smoke test with defaults + shape = (2, 2, 2) + cp_params = CPProblem(shape) + model = generate_solution(cp_params) + data = generate_data(model, cp_params) + assert isinstance(data, ttb.tensor) + assert data.shape == model.shape + + +def test_generate_solution_tucker(): + # Smoke test with defaults + shape = (2, 2, 2) + tucker_params = TuckerProblem(shape) + model = generate_solution(tucker_params) + assert isinstance(model, ttb.ttensor) + assert model.shape == shape + + # Smoke test with a tensor core generator + shape = (2, 2, 2) + tucker_params = TuckerProblem(shape, core_generator=ttb.tenrand) + model = generate_solution(tucker_params) + assert isinstance(model, ttb.ttensor) + assert model.shape == shape + # TODO could test with different generators and enforce that they actually get used + + +def test_generate_data_tucker(): + # Smoke test with defaults + shape = (2, 2, 2) + tucker_params = TuckerProblem(shape) + model = generate_solution(tucker_params) + data = generate_data(model, tucker_params) + assert isinstance(data, ttb.tensor) + assert data.shape == model.shape + + +def test_create_problem_smoke(): + shape = (2, 2, 2) + cp_params = CPProblem(shape) + missing_params = MissingData() + soln, data = create_problem(cp_params, missing_params) + assert soln.full().shape == data.shape + + existing_params = ExistingSolution(soln) + missing_params = MissingData() + soln, data = create_problem(existing_params, missing_params) + assert soln.full().shape == data.shape + assert soln is existing_params.solution, "Solution should be the same object" + + cp_params.symmetric = [(0, 1)] + soln, data = create_problem(cp_params, missing_params) + assert soln.full().shape == data.shape + + with pytest.raises(ValueError): + empty_num_factors = BaseProblem(shape) + create_problem(empty_num_factors, missing_params) + with pytest.raises(ValueError): + inconsistent_num_factors = BaseProblem(shape, num_factors=[2, 2]) + create_problem(inconsistent_num_factors, missing_params) + with pytest.raises(ValueError): + bad_problem_type = BaseProblem(shape, num_factors=3) + create_problem(bad_problem_type, missing_params) + + # TODO hit edge cases and symmetric + + +def test_create_problem_smoke_sparse(): + shape = (2, 2, 2) + cp_params = CPProblem( + shape, sparse_generation=0.99, factor_generator=np.random.random + ) + missing_params = MissingData() + soln, data = create_problem(cp_params, missing_params) + assert soln.full().shape == data.shape + + with pytest.raises(ValueError): + missing_AND_sparse_generation = MissingData(missing_ratio=0.1) + create_problem(cp_params, missing_AND_sparse_generation) + # TODO hit edge cases and symmetric + + +def test_create_problem_smoke_missing(): + shape = (4, 5, 6) + cp_params = CPProblem(shape, factor_generator=np.random.random) + missing_params = MissingData(missing_ratio=0.8) + soln, data = create_problem(cp_params, missing_params) + assert soln.full().shape == data.shape + + missing_params = MissingData(missing_ratio=0.8, sparse_model=True) + soln, data = create_problem(cp_params, missing_params) + assert soln.full().shape == data.shape + + with pytest.raises(ValueError): + bad_pattern_shape = np.ones([dim + 1 for dim in soln.shape]) + missing_params = MissingData(missing_pattern=bad_pattern_shape) + create_problem(cp_params, missing_params) + + with pytest.raises(ValueError): + bad_pattern_type = np.ones(soln.shape) + missing_params = MissingData(missing_pattern=bad_pattern_type) + create_problem(cp_params, missing_params) + + +def test_generate_data_sparse_value_errors(): + """Test that generate_data_sparse raises expected ValueErrors.""" + shape = (3, 3, 3) + + # Test negative weights + factor_matrices = [np.random.random((3, 2)) for _ in range(3)] + negative_weights = np.array([-1.0, 1.0]) # One negative weight + solution = ttb.ktensor(factor_matrices, negative_weights) + problem_params = CPProblem(shape, sparse_generation=0.5) + + with pytest.raises(ValueError): + generate_data_sparse(solution, problem_params) + + # Test negative factor matrices + factor_matrices = [np.random.random((3, 2)) for _ in range(3)] + factor_matrices[0][0, 0] = -1.0 # Make one element negative + positive_weights = np.array([1.0, 1.0]) + solution = ttb.ktensor(factor_matrices, positive_weights) + problem_params = CPProblem(shape, sparse_generation=0.5) + + with pytest.raises(ValueError): + generate_data_sparse(solution, problem_params) + + # Test missing sparse_generation + factor_matrices = [np.random.random((3, 2)) for _ in range(3)] + positive_weights = np.array([1.0, 1.0]) + solution = ttb.ktensor(factor_matrices, positive_weights) + problem_params = CPProblem(shape, sparse_generation=None) + + with pytest.raises(ValueError): + generate_data_sparse(solution, problem_params) diff --git a/tests/test_ktensor.py b/tests/test_ktensor.py index 6c560c26..6abda068 100644 --- a/tests/test_ktensor.py +++ b/tests/test_ktensor.py @@ -779,23 +779,27 @@ def test_ktensor_redistribute(sample_ktensor_2way): def test_ktensor_score(): A = ttb.ktensor( - [np.ones((3, 3)), np.ones((4, 3)), np.ones((5, 3))], np.array([2.0, 1.0, 3.0]) + [np.ones((3, 3)) + 0.1, np.ones((4, 3)) + 0.2, np.ones((5, 3)) + 0.3], + np.array([2.0, 1.0, 3.0]), ) B = ttb.ktensor( - [np.ones((3, 2)), np.ones((4, 2)), np.ones((5, 2))], np.array([2.0, 4.0]) + [np.ones((3, 2)) + 0.1, np.ones((4, 2)) + 0.2, np.ones((5, 2)) + 0.3], + np.array([2.0, 4.0]), ) + A_norm = A.copy().normalize() + # defaults score, Aperm, flag, best_perm = A.score(B) - assert score == 0.875 - assert np.allclose(Aperm.weights, np.array([15.49193338, 23.23790008, 7.74596669])) + assert np.isclose(score, 0.875) + assert np.allclose(Aperm.weights, A_norm.weights[best_perm]) assert flag assert np.array_equal(best_perm, np.array([0, 2, 1])) # compare just factor matrices (i.e., do not use weights) score, Aperm, flag, best_perm = A.score(B, weight_penalty=False) - assert score == 1.0 - assert np.allclose(Aperm.weights, np.array([15.49193338, 7.74596669, 23.23790008])) + assert np.isclose(score, 1.0) + assert np.allclose(Aperm.weights, A_norm.weights[best_perm]) assert not flag assert np.array_equal(best_perm, np.array([0, 1, 2])) diff --git a/tests/test_sptensor.py b/tests/test_sptensor.py index 0e41e23a..b7d32756 100644 --- a/tests/test_sptensor.py +++ b/tests/test_sptensor.py @@ -1357,7 +1357,7 @@ def test_sptensor_squeeze(sample_sptensor): ) assert np.array_equal( ttb.sptensor(np.array([[0, 0, 0]]), np.array([4]), (2, 2, 1)).squeeze().vals, - np.array([4]), + np.array([[4]]), ) # Singleton dimension with empty sptensor diff --git a/tests/test_sptensor3.py b/tests/test_sptensor3.py deleted file mode 100644 index fd7cd94f..00000000 --- a/tests/test_sptensor3.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2024 National Technology & Engineering Solutions of Sandia, -# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the -# U.S. Government retains certain rights in this software. - -import pytest - -import pyttb as ttb - - -def test_sptensor3_initialization_empty(): - with pytest.raises(AssertionError) as excinfo: - ttb.sptensor3() - assert "SPTENSOR3 class not yet implemented" in str(excinfo) diff --git a/tests/test_symktensor.py b/tests/test_symktensor.py deleted file mode 100644 index 0265d6a7..00000000 --- a/tests/test_symktensor.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2024 National Technology & Engineering Solutions of Sandia, -# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the -# U.S. Government retains certain rights in this software. - -import pytest - -import pyttb as ttb - - -def test_symktensor_initialization_empty(): - with pytest.raises(AssertionError) as excinfo: - ttb.symktensor() - assert "SYMKTENSOR class not yet implemented" in str(excinfo) diff --git a/tests/test_symtensor.py b/tests/test_symtensor.py deleted file mode 100644 index 5ee45bcf..00000000 --- a/tests/test_symtensor.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2024 National Technology & Engineering Solutions of Sandia, -# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the -# U.S. Government retains certain rights in this software. - -import pytest - -import pyttb as ttb - - -def test_symtensor_initialization_empty(): - with pytest.raises(AssertionError) as excinfo: - ttb.symtensor() - assert "SYMTENSOR class not yet implemented" in str(excinfo)