Skip to content

Commit b2a5de5

Browse files
author
michaelwenk
committed
chore: optimised getOutliers method
1 parent 7f44e3d commit b2a5de5

File tree

2 files changed

+47
-18
lines changed

2 files changed

+47
-18
lines changed

src/casekit/nmr/analysis/HOSECodeShiftStatistics.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ public static Map<String, Map<String, Double[]>> buildHOSECodeShiftStatistics(
193193
for (final Map.Entry<String, List<Double>> solvents : hoseCodes.getValue()
194194
.entrySet()) {
195195
values = new ArrayList<>(solvents.getValue());
196-
Statistics.removeOutliers(values, 1.5);
196+
values = Statistics.removeOutliers(values, 1.5);
197197
hoseCodeShiftStatistics.get(hoseCodes.getKey())
198198
.put(solvents.getKey(),
199199
new Double[]{(double) values.size(), Collections.min(values),

src/casekit/nmr/utils/Statistics.java

+46-17
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,61 @@ public class Statistics {
1717
*
1818
* @return new array list without values outside the generated boundaries
1919
*/
20-
public static void removeOutliers(final List<Double> input, final double multiplierIQR) {
21-
input.removeAll(getOutliers(input, multiplierIQR));
20+
public static List<Double> removeOutliers(final List<Double> input, final double multiplierIQR) {
21+
final List<Double> values = new ArrayList<>();
22+
if (input.size()
23+
<= 1) {
24+
return values;
25+
}
26+
final double[] boundaries = getLowerAndUpperBoundaries(input, multiplierIQR);
27+
final double lowerBound = boundaries[0];
28+
final double upperBound = boundaries[1];
29+
30+
for (final Double value : input) {
31+
if (value
32+
>= lowerBound
33+
&& value
34+
<= upperBound) {
35+
values.add(value);
36+
}
37+
}
38+
39+
return values;
2240
}
2341

2442
/**
25-
* @param input
26-
* @param multiplierIQR
43+
* Detects outliers in given array list of input values and returns them. <br>
44+
* Here, outliers are those which are outside of a calculated lower and upper bound (whisker).
45+
* The interquartile range (IQR) of the input values is therefore multiplied with a given value
46+
* for whisker creation.
2747
*
28-
* @return
48+
* @param input list of values to process
49+
* @param multiplierIQR multiplier for IQR to use for lower and upper bound creation
50+
*
51+
* @return new array list with values outside the generated boundaries
2952
*/
3053
public static List<Double> getOutliers(final List<Double> input, final double multiplierIQR) {
3154
final List<Double> outliers = new ArrayList<>();
3255
if (input.size()
3356
<= 1) {
3457
return outliers;
3558
}
59+
final double[] boundaries = getLowerAndUpperBoundaries(input, multiplierIQR);
60+
final double lowerBound = boundaries[0];
61+
final double upperBound = boundaries[1];
62+
for (final Double value : input) {
63+
if (value
64+
< lowerBound
65+
|| value
66+
> upperBound) {
67+
outliers.add(value);
68+
}
69+
}
70+
71+
return outliers;
72+
}
73+
74+
public static double[] getLowerAndUpperBoundaries(final List<Double> input, final double multiplierIQR) {
3675
Collections.sort(input);
3776
final List<Double> data1 = input.subList(0, input.size()
3877
/ 2);
@@ -57,18 +96,8 @@ public static List<Double> getOutliers(final List<Double> input, final double mu
5796
final double upperBound = q3
5897
+ multiplierIQR
5998
* iqr;
60-
for (int i = 0; i
61-
< input.size(); i++) {
62-
if ((input.get(i)
63-
< lowerBound)
64-
|| (input.get(i)
65-
> upperBound)) {
66-
outliers.add(input.get(i));
67-
}
68-
}
69-
// System.out.println("input size: " + input.size());
70-
// System.out.println("output size: " + outliers.size());
71-
return outliers;
99+
100+
return new double[]{lowerBound, upperBound};
72101
}
73102

74103
/**

0 commit comments

Comments
 (0)