|
26 | 26 | import java.util.List;
|
27 | 27 | import java.util.Map;
|
28 | 28 |
|
29 |
| -import static com.facebook.presto.spi.function.StatsPropagationBehavior.NON_NULL_ROW_COUNT; |
30 |
| -import static com.facebook.presto.spi.function.StatsPropagationBehavior.ROW_COUNT; |
| 29 | +import static com.facebook.presto.spi.function.StatsPropagationBehavior.Constants.NON_NULL_ROW_COUNT_CONST; |
| 30 | +import static com.facebook.presto.spi.function.StatsPropagationBehavior.Constants.ROW_COUNT_CONST; |
31 | 31 | import static com.facebook.presto.spi.function.StatsPropagationBehavior.SUM_ARGUMENTS;
|
32 | 32 | import static com.facebook.presto.spi.function.StatsPropagationBehavior.SUM_ARGUMENTS_UPPER_BOUNDED_TO_ROW_COUNT;
|
33 | 33 | import static com.facebook.presto.spi.function.StatsPropagationBehavior.UNKNOWN;
|
|
41 | 41 | import static java.lang.Double.NaN;
|
42 | 42 | import static java.lang.Double.isFinite;
|
43 | 43 | import static java.lang.Double.isNaN;
|
| 44 | +import static java.util.Objects.requireNonNull; |
44 | 45 |
|
45 | 46 | public final class ScalarStatsAnnotationProcessor
|
46 | 47 | {
|
47 | 48 | private ScalarStatsAnnotationProcessor()
|
48 | 49 | {
|
49 | 50 | }
|
50 | 51 |
|
51 |
| - public static VariableStatsEstimate process( |
52 |
| - double outputRowCount, |
| 52 | + public static VariableStatsEstimate computeConcatStatistics(CallExpression call, List<VariableStatsEstimate> sourceStats, double outputRowCount) |
| 53 | + { // Concat function is specially handled since it is a generated function for all arity. |
| 54 | + double nullFraction = NaN; |
| 55 | + double ndv = NaN; |
| 56 | + double avgRowSize = 0.0; |
| 57 | + for (VariableStatsEstimate stat : sourceStats) { |
| 58 | + if (isFinite(stat.getNullsFraction())) { |
| 59 | + nullFraction = firstFiniteValue(nullFraction, 0.0); |
| 60 | + nullFraction = max(nullFraction, stat.getNullsFraction()); |
| 61 | + } |
| 62 | + if (isFinite(stat.getDistinctValuesCount())) { |
| 63 | + ndv = firstFiniteValue(ndv, 0.0); |
| 64 | + ndv = max(ndv, stat.getDistinctValuesCount()); |
| 65 | + } |
| 66 | + if (isFinite(stat.getAverageRowSize())) { |
| 67 | + avgRowSize += stat.getAverageRowSize(); |
| 68 | + } |
| 69 | + } |
| 70 | + if (avgRowSize == 0.0) { |
| 71 | + avgRowSize = NaN; |
| 72 | + } |
| 73 | + return VariableStatsEstimate.builder() |
| 74 | + .setNullsFraction(nullFraction) |
| 75 | + .setDistinctValuesCount(minExcludingNaNs(ndv, outputRowCount)) |
| 76 | + .setAverageRowSize(minExcludingNaNs(returnNaNIfTypeWidthUnknown(getReturnTypeWidth(call, SUM_ARGUMENTS)), avgRowSize)) |
| 77 | + .build(); |
| 78 | + } |
| 79 | + |
| 80 | + public static VariableStatsEstimate computeHashCodeOperatorStatistics(CallExpression call, List<VariableStatsEstimate> sourceStats, double outputRowCount) |
| 81 | + { |
| 82 | + requireNonNull(call, "call is null"); |
| 83 | + checkArgument(sourceStats.size() == 1, |
| 84 | + "exactly one argument expected for hash code operator scalar function"); |
| 85 | + VariableStatsEstimate argStats = sourceStats.get(0); |
| 86 | + VariableStatsEstimate.Builder result = |
| 87 | + VariableStatsEstimate.builder() |
| 88 | + .setAverageRowSize(returnNaNIfTypeWidthUnknown(getReturnTypeWidth(call, UNKNOWN))) |
| 89 | + .setNullsFraction(argStats.getNullsFraction()) |
| 90 | + .setDistinctValuesCount(minExcludingNaNs(argStats.getDistinctValuesCount(), outputRowCount)); |
| 91 | + return result.build(); |
| 92 | + } |
| 93 | + |
| 94 | + public static VariableStatsEstimate computeComparisonOperatorStatistics(CallExpression call, List<VariableStatsEstimate> sourceStats) |
| 95 | + { |
| 96 | + requireNonNull(call, "call is null"); |
| 97 | + if (sourceStats.size() != 2) { |
| 98 | + return VariableStatsEstimate.unknown(); |
| 99 | + } |
| 100 | + VariableStatsEstimate left = sourceStats.get(0); |
| 101 | + VariableStatsEstimate right = sourceStats.get(1); |
| 102 | + VariableStatsEstimate.Builder result = |
| 103 | + VariableStatsEstimate.builder() |
| 104 | + .setAverageRowSize(returnNaNIfTypeWidthUnknown(getReturnTypeWidth(call, UNKNOWN))) |
| 105 | + .setNullsFraction(left.getNullsFraction() + right.getNullsFraction() - left.getNullsFraction() * right.getNullsFraction()) |
| 106 | + .setDistinctValuesCount(1.0); |
| 107 | + return result.build(); |
| 108 | + } |
| 109 | + |
| 110 | + public static VariableStatsEstimate computeStatsFromAnnotations( |
53 | 111 | CallExpression callExpression,
|
54 | 112 | List<VariableStatsEstimate> sourceStats,
|
55 |
| - ScalarStatsHeader scalarStatsHeader) |
| 113 | + ScalarStatsHeader scalarStatsHeader, |
| 114 | + double outputRowCount) |
56 | 115 | {
|
57 | 116 | double nullFraction = scalarStatsHeader.getNullFraction();
|
58 | 117 | double distinctValuesCount = NaN;
|
@@ -99,10 +158,10 @@ public static VariableStatsEstimate process(
|
99 | 158 | private static double processDistinctValuesCount(double outputRowCount, double nullFraction, double distinctValuesCountFromConstant, double distinctValuesCount)
|
100 | 159 | {
|
101 | 160 | if (isFinite(distinctValuesCountFromConstant)) {
|
102 |
| - if (nearlyEqual(distinctValuesCountFromConstant, NON_NULL_ROW_COUNT.getValue(), 0.1)) { |
| 161 | + if (nearlyEqual(distinctValuesCountFromConstant, NON_NULL_ROW_COUNT_CONST, 0.1)) { |
103 | 162 | distinctValuesCountFromConstant = outputRowCount * (1 - firstFiniteValue(nullFraction, 0.0));
|
104 | 163 | }
|
105 |
| - else if (nearlyEqual(distinctValuesCount, ROW_COUNT.getValue(), 0.1)) { |
| 164 | + else if (nearlyEqual(distinctValuesCount, ROW_COUNT_CONST, 0.1)) { |
106 | 165 | distinctValuesCountFromConstant = outputRowCount;
|
107 | 166 | }
|
108 | 167 | }
|
@@ -164,6 +223,14 @@ private static double processSingleArgumentStatistic(
|
164 | 223 | case USE_TYPE_WIDTH_VARCHAR:
|
165 | 224 | statValue = returnNaNIfTypeWidthUnknown(getTypeWidthVarchar(callExpression.getArguments().get(sourceStatsArgumentIndex).getType()));
|
166 | 225 | break;
|
| 226 | + case LOG10_SOURCE_STATS: |
| 227 | + statValue = Math.log10(sourceStats.get(sourceStatsArgumentIndex)); |
| 228 | + break; |
| 229 | + case LOG2_SOURCE_STATS: |
| 230 | + statValue = Math.log(sourceStats.get(sourceStatsArgumentIndex)) / Math.log(2); |
| 231 | + break; |
| 232 | + case LOG_NATURAL_SOURCE_STATS: |
| 233 | + statValue = Math.log(sourceStats.get(sourceStatsArgumentIndex)); |
167 | 234 | }
|
168 | 235 | }
|
169 | 236 | return statValue;
|
|
0 commit comments