Skip to content

Commit

Permalink
[#376] Implement shortcut for 0.0 and 1.0 percentile calculations
Browse files Browse the repository at this point in the history
A few fixes from the suggested PR:

- Removed ad-hoc @param javadoc again. To be fixed, (if at all) separately and more thoroughly via [#388]
- Removed unnecessary comments
- Fixed Comparator contract violation
  • Loading branch information
lukaseder committed May 21, 2021
1 parent ee8b7e1 commit 82b5791
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 64 deletions.
52 changes: 20 additions & 32 deletions jOOL-java-8/src/main/java/org/jooq/lambda/Agg.java
Original file line number Diff line number Diff line change
Expand Up @@ -906,49 +906,37 @@ else if (l1[0] == null)

/**
* Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering.
*
* @param function map the items in the streams into values
* @param comparator comparator used for sorting the items
* @return a collector that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function
*/
public static <T, U> Collector<T, ?, Optional<T>> percentileBy(double percentile, Function<? super T, ? extends U> function, Comparator<? super U> comparator) {
if (percentile < 0.0 || percentile > 1.0)
throw new IllegalArgumentException("Percentile must be between 0.0 and 1.0");

// CS304 Issue link: https://github.com/jOOQ/jOOL/issues/376
if (percentile == 0.0)
// If percentile is 0, this is the same as taking the item with the minimum value.
return minBy(function, comparator);
else if (percentile == 1.0)
// If percentile is 1, this is the same as taking the item with the maximum value,
// If there are multiple maxima, take the last one.
return maxBy(function, (o1, o2) -> {
int compareResult = comparator.compare(o1, o2);
return compareResult == 0 ? -1 : compareResult;
});

// At a later stage, we'll optimise this implementation in case that function is the identity function
return Collector.of(
() -> new ArrayList<Tuple2<T, U>>(),
(l, v) -> l.add(tuple(v, function.apply(v))),
(l1, l2) -> {
l1.addAll(l2);
return l1;
},
l -> {
int size = l.size();
return collectingAndThen(maxAllBy(function, comparator), s -> s.findLast());
else
return Collector.of(
() -> new ArrayList<Tuple2<T, U>>(),
(l, v) -> l.add(tuple(v, function.apply(v))),
(l1, l2) -> {
l1.addAll(l2);
return l1;
},
l -> {
int size = l.size();

if (size == 0)
return Optional.empty();
else if (size == 1)
return Optional.of(l.get(0).v1);
if (size == 0)
return Optional.empty();
else if (size == 1)
return Optional.of(l.get(0).v1);

l.sort(Comparator.comparing(t -> t.v2, comparator));
l.sort(Comparator.comparing(t -> t.v2, comparator));

// x.5 should be rounded down
return Optional.of(l.get((int) -Math.round(-(size * percentile + 0.5)) - 1).v1);
}
);
// x.5 should be rounded down
return Optional.of(l.get((int) -Math.round(-(size * percentile + 0.5)) - 1).v1);
}
);
}

/**
Expand Down
52 changes: 20 additions & 32 deletions jOOL/src/main/java/org/jooq/lambda/Agg.java
Original file line number Diff line number Diff line change
Expand Up @@ -906,49 +906,37 @@ else if (l1[0] == null)

/**
* Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering.
*
* @param function map the items in the streams into values
* @param comparator comparator used for sorting the items
* @return a collector that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function
*/
public static <T, U> Collector<T, ?, Optional<T>> percentileBy(double percentile, Function<? super T, ? extends U> function, Comparator<? super U> comparator) {
if (percentile < 0.0 || percentile > 1.0)
throw new IllegalArgumentException("Percentile must be between 0.0 and 1.0");

// CS304 Issue link: https://github.com/jOOQ/jOOL/issues/376
if (percentile == 0.0)
// If percentile is 0, this is the same as taking the item with the minimum value.
return minBy(function, comparator);
else if (percentile == 1.0)
// If percentile is 1, this is the same as taking the item with the maximum value,
// If there are multiple maxima, take the last one.
return maxBy(function, (o1, o2) -> {
int compareResult = comparator.compare(o1, o2);
return compareResult == 0 ? -1 : compareResult;
});

// At a later stage, we'll optimise this implementation in case that function is the identity function
return Collector.of(
() -> new ArrayList<Tuple2<T, U>>(),
(l, v) -> l.add(tuple(v, function.apply(v))),
(l1, l2) -> {
l1.addAll(l2);
return l1;
},
l -> {
int size = l.size();
return collectingAndThen(maxAllBy(function, comparator), s -> s.findLast());
else
return Collector.of(
() -> new ArrayList<Tuple2<T, U>>(),
(l, v) -> l.add(tuple(v, function.apply(v))),
(l1, l2) -> {
l1.addAll(l2);
return l1;
},
l -> {
int size = l.size();

if (size == 0)
return Optional.empty();
else if (size == 1)
return Optional.of(l.get(0).v1);
if (size == 0)
return Optional.empty();
else if (size == 1)
return Optional.of(l.get(0).v1);

l.sort(Comparator.comparing(t -> t.v2, comparator));
l.sort(Comparator.comparing(t -> t.v2, comparator));

// x.5 should be rounded down
return Optional.of(l.get((int) -Math.round(-(size * percentile + 0.5)) - 1).v1);
}
);
// x.5 should be rounded down
return Optional.of(l.get((int) -Math.round(-(size * percentile + 0.5)) - 1).v1);
}
);
}

/**
Expand Down

0 comments on commit 82b5791

Please sign in to comment.