Skip to content

Commit e778c4f

Browse files
committed
Added support for concat functions and operators.
1 parent d2a5953 commit e778c4f

File tree

6 files changed

+275
-98
lines changed

6 files changed

+275
-98
lines changed

presto-common/src/main/java/com/facebook/presto/common/function/OperatorType.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@ public boolean isArithmeticOperator()
9191
return this.equals(ADD) || this.equals(SUBTRACT) || this.equals(MULTIPLY) || this.equals(DIVIDE) || this.equals(MODULUS);
9292
}
9393

94+
public boolean isHashOperator()
95+
{
96+
return this.equals(HASH_CODE) ||
97+
this.equals(XX_HASH_64);
98+
}
99+
94100
public static Optional<OperatorType> tryGetOperatorType(QualifiedObjectName operatorName)
95101
{
96102
return Optional.ofNullable(OPERATOR_TYPES.get(operatorName));

presto-main/src/main/java/com/facebook/presto/cost/ScalarStatsAnnotationProcessor.java

Lines changed: 74 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626
import java.util.List;
2727
import java.util.Map;
2828

29-
import static com.facebook.presto.spi.function.StatsPropagationBehavior.NON_NULL_ROW_COUNT;
30-
import static com.facebook.presto.spi.function.StatsPropagationBehavior.ROW_COUNT;
29+
import static com.facebook.presto.spi.function.StatsPropagationBehavior.Constants.NON_NULL_ROW_COUNT_CONST;
30+
import static com.facebook.presto.spi.function.StatsPropagationBehavior.Constants.ROW_COUNT_CONST;
3131
import static com.facebook.presto.spi.function.StatsPropagationBehavior.SUM_ARGUMENTS;
3232
import static com.facebook.presto.spi.function.StatsPropagationBehavior.SUM_ARGUMENTS_UPPER_BOUNDED_TO_ROW_COUNT;
3333
import static com.facebook.presto.spi.function.StatsPropagationBehavior.UNKNOWN;
@@ -41,18 +41,77 @@
4141
import static java.lang.Double.NaN;
4242
import static java.lang.Double.isFinite;
4343
import static java.lang.Double.isNaN;
44+
import static java.util.Objects.requireNonNull;
4445

4546
public final class ScalarStatsAnnotationProcessor
4647
{
4748
private ScalarStatsAnnotationProcessor()
4849
{
4950
}
5051

51-
public static VariableStatsEstimate process(
52-
double outputRowCount,
52+
public static VariableStatsEstimate computeConcatStatistics(CallExpression call, List<VariableStatsEstimate> sourceStats, double outputRowCount)
53+
{ // Concat function is specially handled since it is a generated function for all arity.
54+
double nullFraction = NaN;
55+
double ndv = NaN;
56+
double avgRowSize = 0.0;
57+
for (VariableStatsEstimate stat : sourceStats) {
58+
if (isFinite(stat.getNullsFraction())) {
59+
nullFraction = firstFiniteValue(nullFraction, 0.0);
60+
nullFraction = max(nullFraction, stat.getNullsFraction());
61+
}
62+
if (isFinite(stat.getDistinctValuesCount())) {
63+
ndv = firstFiniteValue(ndv, 0.0);
64+
ndv = max(ndv, stat.getDistinctValuesCount());
65+
}
66+
if (isFinite(stat.getAverageRowSize())) {
67+
avgRowSize += stat.getAverageRowSize();
68+
}
69+
}
70+
if (avgRowSize == 0.0) {
71+
avgRowSize = NaN;
72+
}
73+
return VariableStatsEstimate.builder()
74+
.setNullsFraction(nullFraction)
75+
.setDistinctValuesCount(minExcludingNaNs(ndv, outputRowCount))
76+
.setAverageRowSize(minExcludingNaNs(returnNaNIfTypeWidthUnknown(getReturnTypeWidth(call, SUM_ARGUMENTS)), avgRowSize))
77+
.build();
78+
}
79+
80+
public static VariableStatsEstimate computeHashCodeOperatorStatistics(CallExpression call, List<VariableStatsEstimate> sourceStats, double outputRowCount)
81+
{
82+
requireNonNull(call, "call is null");
83+
checkArgument(sourceStats.size() == 1,
84+
"exactly one argument expected for hash code operator scalar function");
85+
VariableStatsEstimate argStats = sourceStats.get(0);
86+
VariableStatsEstimate.Builder result =
87+
VariableStatsEstimate.builder()
88+
.setAverageRowSize(returnNaNIfTypeWidthUnknown(getReturnTypeWidth(call, UNKNOWN)))
89+
.setNullsFraction(argStats.getNullsFraction())
90+
.setDistinctValuesCount(minExcludingNaNs(argStats.getDistinctValuesCount(), outputRowCount));
91+
return result.build();
92+
}
93+
94+
public static VariableStatsEstimate computeComparisonOperatorStatistics(CallExpression call, List<VariableStatsEstimate> sourceStats)
95+
{
96+
requireNonNull(call, "call is null");
97+
if (sourceStats.size() != 2) {
98+
return VariableStatsEstimate.unknown();
99+
}
100+
VariableStatsEstimate left = sourceStats.get(0);
101+
VariableStatsEstimate right = sourceStats.get(1);
102+
VariableStatsEstimate.Builder result =
103+
VariableStatsEstimate.builder()
104+
.setAverageRowSize(returnNaNIfTypeWidthUnknown(getReturnTypeWidth(call, UNKNOWN)))
105+
.setNullsFraction(left.getNullsFraction() + right.getNullsFraction() - left.getNullsFraction() * right.getNullsFraction())
106+
.setDistinctValuesCount(1.0);
107+
return result.build();
108+
}
109+
110+
public static VariableStatsEstimate computeStatsFromAnnotations(
53111
CallExpression callExpression,
54112
List<VariableStatsEstimate> sourceStats,
55-
ScalarStatsHeader scalarStatsHeader)
113+
ScalarStatsHeader scalarStatsHeader,
114+
double outputRowCount)
56115
{
57116
double nullFraction = scalarStatsHeader.getNullFraction();
58117
double distinctValuesCount = NaN;
@@ -99,10 +158,10 @@ public static VariableStatsEstimate process(
99158
private static double processDistinctValuesCount(double outputRowCount, double nullFraction, double distinctValuesCountFromConstant, double distinctValuesCount)
100159
{
101160
if (isFinite(distinctValuesCountFromConstant)) {
102-
if (nearlyEqual(distinctValuesCountFromConstant, NON_NULL_ROW_COUNT.getValue(), 0.1)) {
161+
if (nearlyEqual(distinctValuesCountFromConstant, NON_NULL_ROW_COUNT_CONST, 0.1)) {
103162
distinctValuesCountFromConstant = outputRowCount * (1 - firstFiniteValue(nullFraction, 0.0));
104163
}
105-
else if (nearlyEqual(distinctValuesCount, ROW_COUNT.getValue(), 0.1)) {
164+
else if (nearlyEqual(distinctValuesCount, ROW_COUNT_CONST, 0.1)) {
106165
distinctValuesCountFromConstant = outputRowCount;
107166
}
108167
}
@@ -164,6 +223,14 @@ private static double processSingleArgumentStatistic(
164223
case USE_TYPE_WIDTH_VARCHAR:
165224
statValue = returnNaNIfTypeWidthUnknown(getTypeWidthVarchar(callExpression.getArguments().get(sourceStatsArgumentIndex).getType()));
166225
break;
226+
case LOG10_SOURCE_STATS:
227+
statValue = Math.log10(sourceStats.get(sourceStatsArgumentIndex));
228+
break;
229+
case LOG2_SOURCE_STATS:
230+
statValue = Math.log(sourceStats.get(sourceStatsArgumentIndex)) / Math.log(2);
231+
break;
232+
case LOG_NATURAL_SOURCE_STATS:
233+
statValue = Math.log(sourceStats.get(sourceStatsArgumentIndex));
167234
}
168235
}
169236
return statValue;

presto-main/src/main/java/com/facebook/presto/cost/ScalarStatsCalculator.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import com.facebook.presto.FullConnectorSession;
1717
import com.facebook.presto.Session;
1818
import com.facebook.presto.SystemSessionProperties;
19+
import com.facebook.presto.common.QualifiedObjectName;
1920
import com.facebook.presto.common.function.OperatorType;
2021
import com.facebook.presto.common.type.Type;
2122
import com.facebook.presto.common.type.TypeSignature;
@@ -66,6 +67,9 @@
6667

6768
import static com.facebook.presto.common.function.OperatorType.DIVIDE;
6869
import static com.facebook.presto.common.function.OperatorType.MODULUS;
70+
import static com.facebook.presto.cost.ScalarStatsAnnotationProcessor.computeComparisonOperatorStatistics;
71+
import static com.facebook.presto.cost.ScalarStatsAnnotationProcessor.computeConcatStatistics;
72+
import static com.facebook.presto.cost.ScalarStatsAnnotationProcessor.computeHashCodeOperatorStatistics;
6973
import static com.facebook.presto.cost.StatsUtil.toStatsRepresentation;
7074
import static com.facebook.presto.spi.relation.ExpressionOptimizer.Level.OPTIMIZED;
7175
import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.COALESCE;
@@ -240,9 +244,19 @@ private VariableStatsEstimate computeCallStatistics(CallExpression call, Void co
240244
requireNonNull(call, "call is null");
241245
List<VariableStatsEstimate> sourceStatsList =
242246
IntStream.range(0, call.getArguments().size()).mapToObj(argumentIndex -> getSourceStats(call, context, argumentIndex)).collect(toImmutableList());
243-
VariableStatsEstimate result =
244-
ScalarStatsAnnotationProcessor.process(input.getOutputRowCount(), call, sourceStatsList, scalarStatsHeader);
245-
return result;
247+
FunctionMetadata functionMetadata = metadata.getFunctionAndTypeManager().getFunctionMetadata(call.getFunctionHandle());
248+
if (functionMetadata.getOperatorType().map(OperatorType::isHashOperator).orElse(false)) {
249+
return computeHashCodeOperatorStatistics(call, sourceStatsList, input.getOutputRowCount());
250+
}
251+
252+
if (functionMetadata.getOperatorType().map(OperatorType::isComparisonOperator).orElse(false)) {
253+
return computeComparisonOperatorStatistics(call, sourceStatsList);
254+
}
255+
256+
if (functionMetadata.getName().equals(QualifiedObjectName.valueOf("presto.default.concat"))) {
257+
return computeConcatStatistics(call, sourceStatsList, input.getOutputRowCount());
258+
}
259+
return ScalarStatsAnnotationProcessor.computeStatsFromAnnotations(call, sourceStatsList, scalarStatsHeader, input.getOutputRowCount());
246260
}
247261

248262
private VariableStatsEstimate computeCastStatistics(CallExpression call, Void context)

0 commit comments

Comments
 (0)