Skip to content

Commit 5e265ef

Browse files
authored
Merge pull request #78 from dcherian/optimize
Some optimizations for numpy aggregations
2 parents 6b7877b + 16ad9a2 commit 5e265ef

File tree

2 files changed

+13
-7
lines changed

2 files changed

+13
-7
lines changed

numpy_groupies/aggregate_numpy.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def _sum(group_idx, a, size, fill_value, dtype=None):
2424
dtype = minimum_dtype_scalar(fill_value, dtype, a)
2525

2626
if np.ndim(a) == 0:
27-
ret = np.bincount(group_idx, minlength=size).astype(dtype)
27+
ret = np.bincount(group_idx, minlength=size).astype(dtype, copy=False)
2828
if a != 1:
2929
ret *= a
3030
else:
@@ -33,7 +33,9 @@ def _sum(group_idx, a, size, fill_value, dtype=None):
3333
ret.real = np.bincount(group_idx, weights=a.real, minlength=size)
3434
ret.imag = np.bincount(group_idx, weights=a.imag, minlength=size)
3535
else:
36-
ret = np.bincount(group_idx, weights=a, minlength=size).astype(dtype)
36+
ret = np.bincount(group_idx, weights=a, minlength=size).astype(
37+
dtype, copy=False
38+
)
3739

3840
if fill_value != 0:
3941
_fill_untouched(group_idx, ret, fill_value)
@@ -146,19 +148,19 @@ def _mean(group_idx, a, size, fill_value, dtype=np.dtype(np.float64)):
146148
sums.real = np.bincount(group_idx, weights=a.real, minlength=size)
147149
sums.imag = np.bincount(group_idx, weights=a.imag, minlength=size)
148150
else:
149-
sums = np.bincount(group_idx, weights=a, minlength=size).astype(dtype)
151+
sums = np.bincount(group_idx, weights=a, minlength=size).astype(dtype, copy=False)
150152

151153
with np.errstate(divide="ignore", invalid="ignore"):
152-
ret = sums.astype(dtype) / counts
154+
ret = sums.astype(dtype, copy=False) / counts
153155
if not np.isnan(fill_value):
154156
ret[counts == 0] = fill_value
155157
return ret
156158

157159

158160
def _sum_of_squres(group_idx, a, size, fill_value, dtype=np.dtype(np.float64)):
159161
ret = np.bincount(group_idx, weights=a * a, minlength=size)
160-
counts = np.bincount(group_idx, minlength=size)
161162
if fill_value != 0:
163+
counts = np.bincount(group_idx, minlength=size)
162164
ret[counts == 0] = fill_value
163165
return ret
164166

@@ -171,7 +173,7 @@ def _var(
171173
counts = np.bincount(group_idx, minlength=size)
172174
sums = np.bincount(group_idx, weights=a, minlength=size)
173175
with np.errstate(divide="ignore", invalid="ignore"):
174-
means = sums.astype(dtype) / counts
176+
means = sums.astype(dtype, copy=False) / counts
175177
counts = np.where(counts > ddof, counts - ddof, 0)
176178
ret = (
177179
np.bincount(group_idx, (a - means[group_idx]) ** 2, minlength=size) / counts
@@ -299,6 +301,7 @@ def _aggregate_base(
299301
dtype=None,
300302
axis=None,
301303
_impl_dict=_impl_dict,
304+
is_pandas=False,
302305
**kwargs
303306
):
304307
iv = input_validation(group_idx, a, size=size, order=order, axis=axis, func=func)
@@ -324,7 +327,9 @@ def _aggregate_base(
324327
kwargs["_nansqueeze"] = True
325328
else:
326329
good = ~np.isnan(a)
327-
a = a[good]
330+
if "len" not in func or is_pandas:
331+
# a is not needed for len, nanlen!
332+
a = a[good]
328333
group_idx = group_idx[good]
329334

330335
dtype = check_dtype(dtype, func, a, flat_size)

numpy_groupies/aggregate_pandas.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def aggregate(
6969
func=func,
7070
axis=axis,
7171
_impl_dict=_impl_dict,
72+
is_pandas=True,
7273
**kwargs
7374
)
7475

0 commit comments

Comments
 (0)