From f4ede247ed913b7e2e00d23eb12b9a86db72b941 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Mon, 11 Jan 2021 17:19:01 -0600 Subject: [PATCH 1/7] Add nonunique to complement unique. Nonunique returns the already seen elements of sequence. --- toolz/itertoolz.py | 32 ++++++++++++++++++++++++++------ toolz/tests/test_itertoolz.py | 8 +++++++- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index b8165162..82aea2f4 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -274,6 +274,30 @@ def unique(seq, key=None): yield item +def nonunique(seq, key=None): + """Return only nonunique elements of a sequence. + + >>> tuple(nonunique((1, 2, 3, 1))) + (1,) + >>> tuple(nonunique((1, 2, 3))) + () + """ + seen = set() + seen_add = seen.add + if key is None: + for item in seq: + if item in seen: + yield item + seen_add(item) + else: + for item in seq: + val = key(item) + if val in seen: + yield item + seen_add(val) + + + def isiterable(x): """ Is x iterable? @@ -305,12 +329,8 @@ def isdistinct(seq): True """ if iter(seq) is seq: - seen = set() - seen_add = seen.add - for item in seq: - if item in seen: - return False - seen_add(item) + for item in nonunique(seq): + return False return True else: return len(seq) == len(set(seq)) diff --git a/toolz/tests/test_itertoolz.py b/toolz/tests/test_itertoolz.py index 61618725..8262dc66 100644 --- a/toolz/tests/test_itertoolz.py +++ b/toolz/tests/test_itertoolz.py @@ -4,7 +4,7 @@ from functools import partial from random import Random from pickle import dumps, loads -from toolz.itertoolz import (remove, groupby, merge_sorted, +from toolz.itertoolz import (nonunique, remove, groupby, merge_sorted, concat, concatv, interleave, unique, isiterable, getter, mapcat, isdistinct, first, second, @@ -105,6 +105,12 @@ def test_unique(): assert tuple(unique((1, 2, 3), key=iseven)) == (1, 2) +def test_nonunique(): + assert tuple(nonunique((1, 2, 3))) == () + assert tuple(nonunique((1, 2, 1, 3, 1))) == (1, 1) + assert tuple(nonunique((1, 2, 3, 4), key=iseven)) == (3, 4) + + def test_isiterable(): assert isiterable([1, 2, 3]) is True assert isiterable('abc') is True From c1e8138deacfaffdd64108722aab1d3d9d8bcb65 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Mon, 11 Jan 2021 17:31:39 -0600 Subject: [PATCH 2/7] Remove extra line. --- toolz/itertoolz.py | 1 - 1 file changed, 1 deletion(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index 82aea2f4..914cf19e 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -297,7 +297,6 @@ def nonunique(seq, key=None): seen_add(val) - def isiterable(x): """ Is x iterable? From fe77b418cc53376da5c648912eb29b0d2c549163 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Mon, 11 Jan 2021 17:51:47 -0600 Subject: [PATCH 3/7] Only add items that aren't present. Guarding the seen_add call can improve performance when there are a high ratio of duplicates. --- toolz/itertoolz.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index 914cf19e..8f5c5e55 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -10,7 +10,7 @@ __all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave', - 'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth', + 'unique', 'nonunique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth', 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv', 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate', 'sliding_window', 'partition', 'partition_all', 'count', 'pluck', @@ -288,13 +288,15 @@ def nonunique(seq, key=None): for item in seq: if item in seen: yield item - seen_add(item) + else: + seen_add(item) else: for item in seq: val = key(item) if val in seen: yield item - seen_add(val) + else: + seen_add(val) def isiterable(x): From 8c00c823747578e882476ac367ee3c8d4150065a Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Mon, 11 Jan 2021 17:52:05 -0600 Subject: [PATCH 4/7] Add nonunique to curried namespace. --- toolz/curried/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/toolz/curried/__init__.py b/toolz/curried/__init__.py index 356eddbd..a488bf76 100644 --- a/toolz/curried/__init__.py +++ b/toolz/curried/__init__.py @@ -23,6 +23,7 @@ See Also: toolz.functoolz.curry """ +from toolz.itertoolz import nonunique import toolz from . import operator from toolz import ( @@ -77,6 +78,7 @@ keymap = toolz.curry(toolz.keymap) map = toolz.curry(toolz.map) mapcat = toolz.curry(toolz.mapcat) +nonunique = toolz.curry(toolz.nonunique) nth = toolz.curry(toolz.nth) partial = toolz.curry(toolz.partial) partition = toolz.curry(toolz.partition) From af51d52678bcad8440bdd37c6ee646a9c392fe2a Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Mon, 11 Jan 2021 17:54:09 -0600 Subject: [PATCH 5/7] Reformat __all__ --- toolz/itertoolz.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index 8f5c5e55..e8f8501e 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -10,11 +10,12 @@ __all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave', - 'unique', 'nonunique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth', - 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv', - 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate', - 'sliding_window', 'partition', 'partition_all', 'count', 'pluck', - 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample') + 'unique', 'nonunique', 'isiterable', 'isdistinct', 'take', 'drop', + 'take_nth', 'first', 'second', 'nth', 'last', 'get', 'concat', + 'concatv', 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', + 'iterate', 'sliding_window', 'partition', 'partition_all', 'count', + 'pluck', 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', + 'random_sample') def remove(predicate, seq): From e5c03a8cdf3bb23576273cc9992f7877b39c8b8f Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Mon, 11 Jan 2021 17:55:38 -0600 Subject: [PATCH 6/7] Remove line automatically added by IDE. --- toolz/curried/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/toolz/curried/__init__.py b/toolz/curried/__init__.py index a488bf76..30709bc9 100644 --- a/toolz/curried/__init__.py +++ b/toolz/curried/__init__.py @@ -23,7 +23,6 @@ See Also: toolz.functoolz.curry """ -from toolz.itertoolz import nonunique import toolz from . import operator from toolz import ( From 7b443a5e487ae4eff33beb3a50111acaad48eaed Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Fri, 29 Oct 2021 16:04:35 -0500 Subject: [PATCH 7/7] Refer to related functions in docstring. --- toolz/itertoolz.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index e8f8501e..1a3532f9 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -259,6 +259,9 @@ def unique(seq, key=None): >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len)) ('cat', 'mouse') + + See also: + nonunique """ seen = set() seen_add = seen.add @@ -276,12 +279,15 @@ def unique(seq, key=None): def nonunique(seq, key=None): - """Return only nonunique elements of a sequence. + """Return only the nonunique/duplicated elements of a sequence. >>> tuple(nonunique((1, 2, 3, 1))) (1,) >>> tuple(nonunique((1, 2, 3))) () + + See also: + unique """ seen = set() seen_add = seen.add