Skip to content

Commit 14fbc1e

Browse files
authored
Fix uniquify to handle multiple successive duplicates (#126889) (#126953)
CollectionUtils.uniquify is based on C++ std::unique. However, C++ iterators are not quite the same as Java iterators. In particular, advancing them only allows grabbing the value once. This commit reworks uniquify to be based on list indices instead of iterators. closes #126883
1 parent a4111ec commit 14fbc1e

File tree

3 files changed

+36
-10
lines changed

3 files changed

+36
-10
lines changed

docs/changelog/126889.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 126889
2+
summary: Rework uniquify to not use iterators
3+
area: Infra/Core
4+
type: bug
5+
issues:
6+
- 126883

server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java

+13-9
Original file line numberDiff line numberDiff line change
@@ -51,20 +51,24 @@ public static <T> void uniquify(List<T> list, Comparator<T> cmp) {
5151
return;
5252
}
5353

54-
ListIterator<T> uniqueItr = list.listIterator();
55-
ListIterator<T> existingItr = list.listIterator();
56-
T uniqueValue = uniqueItr.next(); // get first element to compare with
57-
existingItr.next(); // advance the existing iterator to the second element, where we will begin comparing
54+
ListIterator<T> resultItr = list.listIterator();
55+
ListIterator<T> currentItr = list.listIterator(1); // start at second element to compare
56+
T lastValue = resultItr.next(); // result always includes first element, so advance it and grab first
5857
do {
59-
T existingValue = existingItr.next();
60-
if (cmp.compare(existingValue, uniqueValue) != 0 && (uniqueValue = uniqueItr.next()) != existingValue) {
61-
uniqueItr.set(existingValue);
58+
T currentValue = currentItr.next(); // each iter we check if the next element is different from the last we put in the result
59+
if (cmp.compare(lastValue, currentValue) != 0) {
60+
lastValue = currentValue;
61+
resultItr.next(); // advance result so the current position is where we want to set
62+
if (resultItr.previousIndex() != currentItr.previousIndex()) {
63+
// optimization: only need to set if different
64+
resultItr.set(currentValue);
65+
}
6266
}
63-
} while (existingItr.hasNext());
67+
} while (currentItr.hasNext());
6468

6569
// Lop off the rest of the list. Note with LinkedList this requires advancing back to this index,
6670
// but Java provides no way to efficiently remove from the end of a non random-access list.
67-
list.subList(uniqueItr.nextIndex(), list.size()).clear();
71+
list.subList(resultItr.nextIndex(), list.size()).clear();
6872
}
6973

7074
/**

server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java

+17-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ private <T> void assertUniquify(List<T> list, Comparator<T> cmp, int size) {
6565
for (List<T> listCopy : List.of(new ArrayList<T>(list), new LinkedList<T>(list))) {
6666
CollectionUtils.uniquify(listCopy, cmp);
6767
for (int i = 0; i < listCopy.size() - 1; ++i) {
68-
assertThat(cmp.compare(listCopy.get(i), listCopy.get(i + 1)), lessThan(0));
68+
assertThat(listCopy.get(i) + " < " + listCopy.get(i + 1), cmp.compare(listCopy.get(i), listCopy.get(i + 1)), lessThan(0));
6969
}
7070
assertThat(listCopy.size(), equalTo(size));
7171
}
@@ -75,9 +75,25 @@ public void testUniquify() {
7575
assertUniquify(List.<Integer>of(), Comparator.naturalOrder(), 0);
7676
assertUniquify(List.of(1), Comparator.naturalOrder(), 1);
7777
assertUniquify(List.of(1, 2, 3), Comparator.naturalOrder(), 3);
78+
assertUniquify(List.of(1, 1, 3), Comparator.naturalOrder(), 2);
7879
assertUniquify(List.of(1, 1, 1), Comparator.naturalOrder(), 1);
7980
assertUniquify(List.of(1, 2, 2, 3), Comparator.naturalOrder(), 3);
8081
assertUniquify(List.of(1, 2, 2, 2), Comparator.naturalOrder(), 2);
82+
assertUniquify(List.of(1, 2, 2, 3, 3, 5), Comparator.naturalOrder(), 4);
83+
84+
for (int i = 0; i < 10; ++i) {
85+
int uniqueItems = randomIntBetween(1, 10);
86+
var list = new ArrayList<Integer>();
87+
int next = 1;
88+
for (int j = 0; j < uniqueItems; ++j) {
89+
int occurences = randomIntBetween(1, 10);
90+
while (occurences-- > 0) {
91+
list.add(next);
92+
}
93+
next++;
94+
}
95+
assertUniquify(list, Comparator.naturalOrder(), uniqueItems);
96+
}
8197
}
8298

8399
public void testEmptyPartition() {

0 commit comments

Comments
 (0)