Merge pull request #80 from IntelPython/fix-clongdouble-input-win32

oleksandr-pavlyk · web-flow · commit 0eb591402c08 · 2023-03-17T16:45:11.000-05:00
Fix unexpected behavior of fft call on clongdouble type on win32
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
@@ -30,14 +30,14 @@ requirements:
 
 test:
     commands:
-      - pytest -v --args mkl_fft
+      - pytest -v --pyargs mkl_fft
     requires:
       - pytest
     imports:
       - mkl_fft
-      - mkl_fft.interface
-      - mkl_fft.interface.numpy_fft
-      - mkl_fft.interface.scipy_fft
+      - mkl_fft.interfaces
+      - mkl_fft.interfaces.numpy_fft
+      - mkl_fft.interfaces.scipy_fft
 
 about:
     home: http://github.com/IntelPython/mkl_fft
diff --git a/mkl_fft/_pydfti.pyx b/mkl_fft/_pydfti.pyx
@@ -321,7 +321,7 @@ def _fft1d_impl(x, n=None, axis=-1, overwrite_arg=False, direction=+1, double fs
         # so we cast to complex double and operate in place
         try:
             x_arr = <cnp.ndarray> cnp.PyArray_FROM_OTF(
-                x_arr, cnp.NPY_CDOUBLE, cnp.NPY_BEHAVED)
+                x_arr, cnp.NPY_CDOUBLE, cnp.NPY_BEHAVED | cnp.NPY_ENSURECOPY)
         except:
             raise ValueError("First argument must be a complex or real sequence of single or double precision")
         x_type = cnp.PyArray_TYPE(x_arr)
@@ -545,7 +545,7 @@ def _rr_fft1d_impl2(x, n=None, axis=-1, overwrite_arg=False, double fsc=1.0):
     else:
         try:
             x_arr = <cnp.ndarray> cnp.PyArray_FROM_OTF(
-                x_arr, cnp.NPY_DOUBLE, cnp.NPY_BEHAVED)
+                x_arr, cnp.NPY_DOUBLE, cnp.NPY_BEHAVED | cnp.NPY_ENSURECOPY)
         except:
             raise TypeError("1st argument must be a real sequence")
         x_type = cnp.PyArray_TYPE(x_arr)
@@ -601,7 +601,7 @@ def _rr_ifft1d_impl2(x, n=None, axis=-1, overwrite_arg=False, double fsc=1.0):
         # so we cast to complex double and operate in place
         try:
             x_arr = <cnp.ndarray> cnp.PyArray_FROM_OTF(
-                x_arr, cnp.NPY_DOUBLE, cnp.NPY_BEHAVED)
+                x_arr, cnp.NPY_DOUBLE, cnp.NPY_BEHAVED | cnp.NPY_ENSURECOPY)
         except:
             raise ValueError("First argument should be a real or a complex sequence of single or double precision")
         x_type = cnp.PyArray_TYPE(x_arr)
@@ -669,7 +669,7 @@ def _rc_fft1d_impl(x, n=None, axis=-1, overwrite_arg=False, double fsc=1.0):
     else:
         # we must cast the input to doubles and allocate the output,
         try:
-            requirement = cnp.NPY_BEHAVED
+            requirement = cnp.NPY_BEHAVED | cnp.NPY_ENSURECOPY
             if x_type is cnp.NPY_LONGDOUBLE:
                 requirement = requirement | cnp.NPY_FORCECAST
             x_arr = <cnp.ndarray> cnp.PyArray_FROM_OTF(
@@ -981,7 +981,14 @@ def _direct_fftnd(x, overwrite_arg=False, direction=+1, double fsc=1.0):
        in_place = 1  # a copy was made, so we can work in place.
 
     x_type = cnp.PyArray_TYPE(x_arr)
-    assert( x_type == cnp.NPY_CDOUBLE or x_type == cnp.NPY_CFLOAT or x_type == cnp.NPY_DOUBLE or x_type == cnp.NPY_FLOAT);
+    if (x_type == cnp.NPY_CDOUBLE or x_type == cnp.NPY_CFLOAT or x_type == cnp.NPY_DOUBLE or x_type == cnp.NPY_FLOAT):
+        pass
+    else:
+        x_arr = <cnp.ndarray> cnp.PyArray_FROM_OTF(
+            x_arr, cnp.NPY_CDOUBLE, cnp.NPY_BEHAVED | cnp.NPY_ENSURECOPY)
+        x_type = cnp.PyArray_TYPE(x_arr)
+        assert x_type == cnp.NPY_CDOUBLE
+        in_place = 1
 
     if in_place:
         in_place = 1 if x_type == cnp.NPY_CDOUBLE or x_type == cnp.NPY_CFLOAT else 0
@@ -1076,7 +1083,7 @@ def _fftnd_impl(x, shape=None, axes=None, overwrite_x=False, direction=+1, doubl
     if _direct:
         return _direct_fftnd(x, overwrite_arg=overwrite_x, direction=direction, fsc=fsc)
     else:
-        if (shape is None and x.dtype in [np.complex64, np.complex128, np.float32, np.float64]):
+        if (shape is None and x.dtype in [np.csingle, np.cdouble, np.single, np.double]):
             x = np.asarray(x)
             res = np.empty(x.shape, dtype=_output_dtype(x.dtype))
             return iter_complementary(
diff --git a/mkl_fft/_scipy_fft_backend.py b/mkl_fft/_scipy_fft_backend.py
@@ -30,7 +30,9 @@
 
 from numpy.core import (take, sqrt, prod)
 import contextvars
+import contextlib
 import operator
+import os
 
 
 __doc__ = """
@@ -89,14 +91,20 @@ def get_workers():
     return _workers_global_settings.get().workers
 
 
+@contextlib.contextmanager
 def set_workers(n_workers):
     "Set the value of workers used by default, returns the previous value"
     nw = operator.index(n_workers)
-    wd = _workers_global_settings.get()
-    saved_nw = wd.workers
-    wd.workers = nw
-    _workers_global_settings.set(wd)
-    return saved_nw
+    token = None
+    try:
+        new_wd = _workers_data(nw)
+        token = _workers_global_settings.set(new_wd)
+        yield
+    finally:
+        if token:
+            _workers_global_settings.reset(token)
+        else:
+            raise ValueError
 
 
 __all__ = ['fft', 'ifft', 'fft2', 'ifft2', 'fftn', 'ifftn',
@@ -153,7 +161,7 @@ def _workers_to_num_threads(w):
     if (_w == 0):
         raise ValueError("Number of workers must not be zero")
     if (_w < 0):
-        ub = _cpu_max_threads_count().get_cpu_count()
+        ub = os.cpu_count()
         _w += ub + 1
         if _w <= 0:
             raise ValueError("workers value out of range; got {}, must not be"