From 7bb794a18281e1fa4af0cb003adba144ebd542ff Mon Sep 17 00:00:00 2001 From: Sergey Pokhodenko Date: Wed, 26 Feb 2020 23:55:43 +0300 Subject: [PATCH 1/4] Scale quantile via DAAL --- sdc/_daal.cpp | 91 ++++++++++++++++++++++++++++++++++++++++++ sdc/daal_overloads.py | 69 ++++++++++++++++++++++++++++++++ sdc/tests/__init__.py | 1 + sdc/tests/test_daal.py | 77 +++++++++++++++++++++++++++++++++++ setup.py | 18 +++++++++ 5 files changed, 256 insertions(+) create mode 100644 sdc/_daal.cpp create mode 100644 sdc/daal_overloads.py create mode 100644 sdc/tests/test_daal.py diff --git a/sdc/_daal.cpp b/sdc/_daal.cpp new file mode 100644 index 000000000..5ae369700 --- /dev/null +++ b/sdc/_daal.cpp @@ -0,0 +1,91 @@ +//***************************************************************************** +// Copyright (c) 2020, Intel Corporation All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + + +extern "C" +{ + +int test(int x) +{ + return x + 42; +} + +double sum(double *p, int c) +{ + double result = 0.0; + for (int i = 0; i < c; ++i) + { + result += p[i]; + } + return result; +} + +double quantile(int c, double *p, double q) +{ + using namespace daal; + using namespace daal::algorithms; + using namespace daal::data_management; + + quantiles::Batch<> algorithm; + + auto in_table = HomogenNumericTable::create(p, 1, c); + algorithm.input.set(quantiles::data, in_table); + + algorithm.parameter.quantileOrders->assign(q); + + algorithm.compute(); + + auto out_table = algorithm.getResult()->get(quantiles::quantiles); + return out_table->getValue(0, 0); +} + +PyMODINIT_FUNC PyInit_daal() +{ + static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "daal", + "No docs", + -1, + NULL, + }; + PyObject* m = PyModule_Create(&moduledef); + if (m == NULL) + { + return NULL; + } + +#define REGISTER(func) PyObject_SetAttrString(m, #func, PyLong_FromVoidPtr((void*)(&func))); + REGISTER(test) + REGISTER(sum) + REGISTER(quantile) +#undef REGISTER + return m; +} + +} // extern "C" diff --git a/sdc/daal_overloads.py b/sdc/daal_overloads.py new file mode 100644 index 000000000..6f706bbb5 --- /dev/null +++ b/sdc/daal_overloads.py @@ -0,0 +1,69 @@ +# ***************************************************************************** +# Copyright (c) 2020, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import numba + +from numba import types +from numba.extending import overload + +# from numba import typing, generated_jit +# from numba.extending import models, register_model +# from numba.extending import lower_builtin, overload_method, intrinsic + +# from llvmlite import ir as lir +import llvmlite.binding as ll + +from . import daal + + +ll.add_symbol('test', daal.test) +ll.add_symbol('sum', daal.sum) + + +_test = types.ExternalFunction("test", types.int_(types.int_)) +_sum = types.ExternalFunction("sum", types.float64(types.voidptr, types.int_)) + + +def test(x): + pass + + +@overload(test) +def test_overload(x): + return lambda x: _test(x) + + +import ctypes + +functype_test = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int) +ctypes_test = functype_test(daal.test) + +# functype_sum = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.POINTER(ctypes.c_double), ctypes.c_int) +functype_sum = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_void_p, ctypes.c_int) +ctypes_sum = functype_sum(daal.sum) + + +quantile = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_int, ctypes.c_void_p, ctypes.c_double)(daal.quantile) diff --git a/sdc/tests/__init__.py b/sdc/tests/__init__.py index 41236156a..ab7d390f7 100644 --- a/sdc/tests/__init__.py +++ b/sdc/tests/__init__.py @@ -44,6 +44,7 @@ from sdc.tests.test_hpat_jit import * +from sdc.tests.test_daal import * from sdc.tests.test_sdc_numpy import * from sdc.tests.test_prange_utils import * diff --git a/sdc/tests/test_daal.py b/sdc/tests/test_daal.py new file mode 100644 index 000000000..99345e4a9 --- /dev/null +++ b/sdc/tests/test_daal.py @@ -0,0 +1,77 @@ +# ***************************************************************************** +# Copyright (c) 2020, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import numpy as np +import ctypes + +from sdc.tests.test_base import TestCase + +from sdc.daal_overloads import test, ctypes_test, ctypes_sum, quantile + + +class TestDaal(TestCase): + + def test_test(self): + def pyfunc(): + return test(10) + + def ctypes_pyfunc(): + return ctypes_test(10) + + cfunc = self.jit(pyfunc) + ctypes_cfunc = self.jit(ctypes_pyfunc) + # self.assertEqual(cfunc(), pyfunc()) + self.assertEqual(cfunc(), ctypes_pyfunc()) + self.assertEqual(ctypes_cfunc(), ctypes_pyfunc()) + + def test_sum(self): + def pyfunc(arr): + # return ctypes_sum(arr.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), len(arr)) + return ctypes_sum(arr.ctypes, len(arr)) + cfunc = self.jit(pyfunc) + + arr = np.arange(10, dtype=np.float64) + expected = np.sum(arr) + + # print(ctypes_sum.argtypes) + + self.assertEqual(pyfunc(arr), expected) + self.assertEqual(cfunc(arr), expected) + + def test_quantile(self): + def pyfunc(arr, q): + return quantile(len(arr), arr.ctypes, q) + + cfunc = self.jit(pyfunc) + + arr = np.arange(10, dtype=np.float64) + q = 0.5 + expected = np.quantile(arr, q) + + # print(ctypes_sum.argtypes) + + self.assertEqual(pyfunc(arr, q), expected) + self.assertEqual(cfunc(arr, q), expected) diff --git a/setup.py b/setup.py index f9b08004e..cba2a4058 100644 --- a/setup.py +++ b/setup.py @@ -197,6 +197,24 @@ def readme(): if _has_opencv: _ext_mods.append(ext_cv_wrapper) +daal_root = "/localdisk/spokhode/miniconda3/envs/sdc-env" + +ext_daal = Extension(name="sdc.daal", + sources=["sdc/_daal.cpp"], + include_dirs=[os.path.join(daal_root, 'include')], + libraries=['daal_core', 'daal_thread'], + library_dirs=[ + # for Linux + os.path.join(daal_root, 'lib', 'intel64', 'gcc4.4'), + # for MacOS + os.path.join(daal_root, 'lib'), + # for Windows + os.path.join(daal_root, 'lib', 'intel64', 'vc_mt'), + ], + language="c++", + ) + +_ext_mods.append(ext_daal) class style(Command): """ Command to check and adjust code style From 5b2cd6f678036c1d7cd46ff37b0d6b8ce9cb69aa Mon Sep 17 00:00:00 2001 From: Sergey Pokhodenko Date: Thu, 27 Feb 2020 00:03:56 +0300 Subject: [PATCH 2/4] Add different q for tests --- sdc/tests/test_daal.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sdc/tests/test_daal.py b/sdc/tests/test_daal.py index 99345e4a9..cc91fd11a 100644 --- a/sdc/tests/test_daal.py +++ b/sdc/tests/test_daal.py @@ -64,14 +64,14 @@ def pyfunc(arr): def test_quantile(self): def pyfunc(arr, q): return quantile(len(arr), arr.ctypes, q) - cfunc = self.jit(pyfunc) arr = np.arange(10, dtype=np.float64) - q = 0.5 - expected = np.quantile(arr, q) # print(ctypes_sum.argtypes) - self.assertEqual(pyfunc(arr, q), expected) - self.assertEqual(cfunc(arr, q), expected) + for q in [0., 0.25, 0.5, 0.75, 1.]: + with self.subTest(q=q): + expected = np.quantile(arr, q) + self.assertEqual(pyfunc(arr, q), expected) + self.assertEqual(cfunc(arr, q), expected) From 31a716b571810132cd636028fe522ea55320fbc5 Mon Sep 17 00:00:00 2001 From: Sergey Pokhodenko Date: Thu, 27 Feb 2020 00:53:12 +0300 Subject: [PATCH 3/4] Use daal_quantile for Series.quantile --- sdc/datatypes/hpat_pandas_series_functions.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py index 550750861..25c9f3f18 100644 --- a/sdc/datatypes/hpat_pandas_series_functions.py +++ b/sdc/datatypes/hpat_pandas_series_functions.py @@ -65,6 +65,7 @@ from sdc.functions import numpy_like from sdc.hiframes.api import isna from sdc.datatypes.hpat_pandas_groupby_functions import init_series_groupby +from sdc.daal_overloads import quantile as daal_quantile from .pandas_series_functions import apply from .pandas_series_functions import map as _map @@ -3361,8 +3362,8 @@ def hpat_pandas_series_quantile(self, q=0.5, interpolation='linear'): ty_checker.raise_exc(q, 'int, float, list', 'q') def hpat_pandas_series_quantile_impl(self, q=0.5, interpolation='linear'): - - return numpy.quantile(self._data, q) + # return numpy.quantile(self._data, q) + return daal_quantile(len(self._data), self._data.ctypes, q) return hpat_pandas_series_quantile_impl From 3bdad33ceabfe35edb414fb7727901039474afc4 Mon Sep 17 00:00:00 2001 From: Sergey Pokhodenko Date: Fri, 28 Feb 2020 18:53:36 +0300 Subject: [PATCH 4/4] wip --- sdc/_daal.cpp | 33 ++++++++++++++++++++++++--------- sdc/daal_overloads.py | 12 ++++++------ 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/sdc/_daal.cpp b/sdc/_daal.cpp index 5ae369700..ea76a093d 100644 --- a/sdc/_daal.cpp +++ b/sdc/_daal.cpp @@ -28,6 +28,12 @@ #include +using namespace daal; +using namespace daal::algorithms; +using namespace daal::data_management; +using namespace daal::services; + + extern "C" { @@ -46,23 +52,31 @@ double sum(double *p, int c) return result; } -double quantile(int c, double *p, double q) +double median(int nRows, double *ptr) { - using namespace daal; - using namespace daal::algorithms; - using namespace daal::data_management; + quantiles::Batch algorithm; + algorithm.input.set(quantiles::data, HomogenNumericTable::create(ptr, 1, nRows)); + algorithm.compute(); + return algorithm.getResult()->get(quantiles::quantiles)->getValue(0, 0); +} - quantiles::Batch<> algorithm; +void quantile(const double * data, const __int64_t nFeatures, const __int64_t nVectors, + const __int64_t quantOrderN, const double * quantOrder, + double * quants) +{ + Environment::getInstance()->setNumberOfThreads(4); // does not affect - auto in_table = HomogenNumericTable::create(p, 1, c); - algorithm.input.set(quantiles::data, in_table); + // quantiles::Batch<> algorithm; + quantiles::Batch algorithm; // 2 times faster + algorithm.parameter.quantileOrders = HomogenNumericTable::create(quantOrder, 1, quantOrderN); - algorithm.parameter.quantileOrders->assign(q); + algorithm.input.set(quantiles::data, HomogenNumericTable::create(data, nFeatures, nVectors)); algorithm.compute(); auto out_table = algorithm.getResult()->get(quantiles::quantiles); - return out_table->getValue(0, 0); + for (int i = 0; i < quantOrderN; ++i) + quants[i] = out_table->getValue(0, i); } PyMODINIT_FUNC PyInit_daal() @@ -83,6 +97,7 @@ PyMODINIT_FUNC PyInit_daal() #define REGISTER(func) PyObject_SetAttrString(m, #func, PyLong_FromVoidPtr((void*)(&func))); REGISTER(test) REGISTER(sum) + REGISTER(median) REGISTER(quantile) #undef REGISTER return m; diff --git a/sdc/daal_overloads.py b/sdc/daal_overloads.py index 6f706bbb5..b9084c83b 100644 --- a/sdc/daal_overloads.py +++ b/sdc/daal_overloads.py @@ -25,6 +25,7 @@ # ***************************************************************************** import numba +import ctypes as ct from numba import types from numba.extending import overload @@ -56,14 +57,13 @@ def test_overload(x): return lambda x: _test(x) -import ctypes - -functype_test = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int) +functype_test = ct.CFUNCTYPE(ct.c_int, ct.c_int) ctypes_test = functype_test(daal.test) -# functype_sum = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.POINTER(ctypes.c_double), ctypes.c_int) -functype_sum = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_void_p, ctypes.c_int) +# functype_sum = ct.CFUNCTYPE(ct.c_double, ct.POINTER(ct.c_double), ct.c_int) +functype_sum = ct.CFUNCTYPE(ct.c_double, ct.c_void_p, ct.c_int) ctypes_sum = functype_sum(daal.sum) -quantile = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_int, ctypes.c_void_p, ctypes.c_double)(daal.quantile) +median = ct.CFUNCTYPE(ct.c_double, ct.c_int, ct.c_void_p)(daal.median) +quantile = ct.CFUNCTYPE(ct.c_double, ct.c_int, ct.c_void_p, ct.c_double)(daal.quantile)