Skip to content

Commit d2c5ec0

Browse files
authored
update all tests to use <string, ui64> tuples; add markdown-table result printer (#16746)
1 parent 5402667 commit d2c5ec0

File tree

14 files changed

+652
-476
lines changed

14 files changed

+652
-476
lines changed

ydb/core/kqp/tools/combiner_perf/bin/main.cpp

Lines changed: 131 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <ydb/core/kqp/tools/combiner_perf/printout.h>
12
#include <ydb/core/kqp/tools/combiner_perf/simple_last.h>
23
#include <ydb/core/kqp/tools/combiner_perf/simple.h>
34
#include <ydb/core/kqp/tools/combiner_perf/tpch_last.h>
@@ -7,61 +8,155 @@
78

89
#include <util/stream/output.h>
910
#include <util/stream/file.h>
11+
#include <util/string/printf.h>
1012
#include <util/system/compiler.h>
1113

12-
void DoFullPass(bool withSpilling)
14+
using NKikimr::NMiniKQL::TRunParams;
15+
16+
class TPrintingResultCollector : public TTestResultCollector
1317
{
14-
using namespace NKikimr::NMiniKQL;
18+
public:
19+
virtual void SubmitTestNameAndParams(const TRunParams& runParams, const char* testName, const std::optional<bool> llvm, const std::optional<bool> spilling) override
20+
{
21+
Cout << "------------------------------" << Endl;
22+
Cout << testName;
23+
if (llvm.has_value()) {
24+
Cout << ", " << (llvm.value() ? "+" : "-") << "llvm";
25+
}
26+
if (spilling.has_value()) {
27+
Cout << ", " << (spilling.value() ? "+" : "-") << "spilling";
28+
}
29+
Cout << Endl;
30+
Cout << "Data rows total: " << runParams.RowsPerRun << " x " << runParams.NumRuns << Endl;
31+
Cout << (runParams.MaxKey + 1) << " distinct numeric keys" << Endl;
32+
Cout << "Block size: " << runParams.BlockSize << Endl;
33+
Cout << "Long strings: " << (runParams.LongStringKeys ? "yes" : "no") << Endl;
34+
Cout << Endl;
35+
}
1536

16-
TRunParams runParams;
37+
virtual void SubmitTimings(const TDuration& graphTime, const TDuration& referenceTime, const std::optional<TDuration> streamTime) override
38+
{
39+
Cout << "Graph runtime is: " << graphTime << " vs. reference C++ implementation: " << referenceTime << Endl;
1740

18-
runParams.NumRuns = 20;
19-
runParams.RowsPerRun = 5'000'000;
20-
runParams.MaxKey = 200'000 - 1;
21-
runParams.BlockSize = 5'000;
22-
runParams.LongStringKeys = true;
41+
if (streamTime.has_value()) {
42+
Cout << "Input stream own iteration time: " << *streamTime << Endl;
43+
Cout << "Graph time - stream own time = " << (*streamTime <= graphTime ? graphTime - *streamTime : TDuration::Zero()) << Endl;
44+
Cout << "C++ implementation time - devnull time = " << (*streamTime <= referenceTime ? referenceTime - *streamTime : TDuration::Zero()) << Endl;
45+
}
46+
}
47+
};
2348

24-
RunTestBlockCombineHashedSimple<false, false>(runParams);
49+
class TWikiResultCollector : public TTestResultCollector
50+
{
51+
public:
52+
TWikiResultCollector()
53+
{
54+
Cout << "#|" << Endl;
55+
Cout << "|| Test name | LLVM | Spilling | RowsTotal | Distinct keys | Block size | Input stream own time (s) | Graph time - stream time (s) | C++ time - stream time (s) | Shame ratio ||" << Endl;
56+
}
2557

26-
auto doSimpleLast = [](const TRunParams& params) {
27-
Cerr << "LastSimple, -llvm, -spilling" << Endl;
28-
NKikimr::NMiniKQL::RunTestCombineLastSimple<false, false>(params);
58+
~TWikiResultCollector()
59+
{
60+
Cout << "|#" << Endl;
61+
}
2962

30-
if (false) {
31-
Cerr << "LastSimple, +llvm, -spilling" << Endl;
32-
NKikimr::NMiniKQL::RunTestCombineLastSimple<true, false>(params);
63+
virtual void SubmitTestNameAndParams(const TRunParams& runParams, const char* testName, const std::optional<bool> llvm, const std::optional<bool> spilling) override
64+
{
65+
Cout << "|| ";
66+
Cout << testName << " | ";
67+
if (llvm.has_value()) {
68+
Cout << (llvm.value() ? "+" : " ");
3369
}
34-
};
70+
Cout << " | ";
71+
if (spilling.has_value()) {
72+
Cout << (spilling.value() ? "+" : " ");
73+
}
74+
Cout << " | ";
3575

36-
doSimpleLast(runParams);
76+
Cout << (runParams.RowsPerRun * runParams.NumRuns) << " | " << (runParams.MaxKey + 1) << " | ";
77+
if (TStringBuf(testName).Contains("Block")) {
78+
Cout << runParams.BlockSize;
79+
}
80+
Cout << " | ";
81+
}
3782

38-
if (false) {
39-
Cerr << "Simple, -llvm, -spilling" << Endl;
40-
NKikimr::NMiniKQL::RunTestSimple<false>();
83+
static TString FancyDuration(const TDuration duration)
84+
{
85+
const auto ms = duration.MilliSeconds();
86+
if (!ms) {
87+
return " ";
88+
}
89+
return Sprintf("%.2f", (ms / 1000.0));
90+
}
4191

42-
Cerr << "Simple, +llvm, -spilling" << Endl;
43-
NKikimr::NMiniKQL::RunTestSimple<true>();
92+
virtual void SubmitTimings(const TDuration& graphTime, const TDuration& referenceTime, const std::optional<TDuration> streamTime) override
93+
{
94+
TDuration streamTimeOrZero = (streamTime.has_value()) ? streamTime.value() : TDuration::Zero();
95+
TDuration corrGraphTime = streamTimeOrZero <= graphTime ? graphTime - streamTimeOrZero : TDuration::Zero();
96+
TDuration corrRefTime = streamTimeOrZero <= referenceTime ? referenceTime - streamTimeOrZero : TDuration::Zero();
4497

45-
Cerr << "LastTpch, -llvm, -spilling" << Endl;
46-
NKikimr::NMiniKQL::RunTestLastTpch<false, false>();
98+
TString diff;
99+
if (corrRefTime.MilliSeconds() > 0) {
100+
diff = Sprintf("%.2f", corrGraphTime.MilliSeconds() * 1.0 / corrRefTime.MilliSeconds());
101+
}
47102

48-
Cerr << "LastTpch, +llvm, -spilling" << Endl;
49-
NKikimr::NMiniKQL::RunTestLastTpch<true, false>();
103+
Cout << FancyDuration(streamTimeOrZero) << " | " << FancyDuration(corrGraphTime) << " | " << FancyDuration(corrRefTime) << " | " << diff << " ||" << Endl;
104+
Cout.Flush();
50105
}
106+
};
51107

52-
if (withSpilling) {
53-
Cerr << "LastSimple, -llvm, +spilling" << Endl;
54-
NKikimr::NMiniKQL::RunTestCombineLastSimple<false, true>(runParams);
108+
void DoFullPass(bool withSpilling)
109+
{
110+
using namespace NKikimr::NMiniKQL;
55111

56-
Cerr << "LastSimple, +llvm, +spilling" << Endl;
57-
NKikimr::NMiniKQL::RunTestCombineLastSimple<true, true>(runParams);
112+
TWikiResultCollector printout;
58113

59-
Cerr << "LastTpch, -llvm, +spilling" << Endl;
60-
NKikimr::NMiniKQL::RunTestLastTpch<false, true>();
114+
TRunParams runParams;
61115

62-
Cerr << "LastTpch, +llvm, +spilling" << Endl;
63-
NKikimr::NMiniKQL::RunTestLastTpch<true, true>();
64-
}
116+
runParams.NumRuns = 20;
117+
runParams.RowsPerRun = 5'000'000;
118+
runParams.MaxKey = 1'00 - 1;
119+
runParams.LongStringKeys = false;
120+
121+
const std::vector<size_t> numKeys = {4u, 1000u, 100'000u, 200'000u};
122+
const std::vector<size_t> blockSizes = {128u, 8192u};
123+
124+
auto doSimple = [&printout, numKeys](const TRunParams& params) {
125+
for (size_t keyCount : numKeys) {
126+
auto runParams = params;
127+
runParams.MaxKey = keyCount - 1;
128+
RunTestSimple<false>(runParams, printout);
129+
RunTestSimple<true>(runParams, printout);
130+
}
131+
};
132+
133+
auto doSimpleLast = [&printout, &numKeys, withSpilling](const TRunParams& params) {
134+
for (size_t keyCount : numKeys) {
135+
auto runParams = params;
136+
runParams.MaxKey = keyCount - 1;
137+
RunTestCombineLastSimple<false, false>(runParams, printout);
138+
RunTestCombineLastSimple<true, false>(runParams, printout);
139+
if (withSpilling) {
140+
RunTestCombineLastSimple<false, true>(runParams, printout);
141+
RunTestCombineLastSimple<true, true>(runParams, printout);
142+
}
143+
}
144+
};
145+
146+
auto doBlockHashed = [&printout, &numKeys, &blockSizes](const TRunParams& params) {
147+
for (size_t keyCount : numKeys) {
148+
for (size_t blockSize : blockSizes) {
149+
auto runParams = params;
150+
runParams.MaxKey = keyCount - 1;
151+
runParams.BlockSize = blockSize;
152+
RunTestBlockCombineHashedSimple<false, false>(runParams, printout);
153+
}
154+
}
155+
};
156+
157+
doSimple(runParams);
158+
doSimpleLast(runParams);
159+
doBlockHashed(runParams);
65160
}
66161

67162
int main(int argc, const char* argv[])

ydb/core/kqp/tools/combiner_perf/bin/ya.make

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,3 @@ SRCS(
2121
)
2222

2323
END()
24-

ydb/core/kqp/tools/combiner_perf/converters.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#pragma once
2+
13
#include <yql/essentials/minikql/comp_nodes/ut/mkql_computation_node_ut.h>
24
#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
35
#include <yql/essentials/minikql/comp_nodes/mkql_factories.h>
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#pragma once
2+
3+
#include "run_params.h"
4+
5+
#include <util/datetime/base.h>
6+
#include <optional>
7+
8+
class TTestResultCollector {
9+
public:
10+
virtual void SubmitTestNameAndParams(const NKikimr::NMiniKQL::TRunParams& runParams, const char* testName, const std::optional<bool> llvm = {}, const std::optional<bool> spilling = {}) = 0;
11+
12+
virtual void SubmitTimings(const TDuration& graphTime, const TDuration& referenceTime, const std::optional<TDuration> streamTime = {}) = 0;
13+
14+
virtual ~TTestResultCollector() {};
15+
};

ydb/core/kqp/tools/combiner_perf/run_params.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
#include <util/system/defaults.h>
44

5-
65
namespace NKikimr {
76
namespace NMiniKQL {
87

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#include "simple.h"
2+
23
#include "factories.h"
4+
#include "streams.h"
5+
#include "printout.h"
36

47
#include <yql/essentials/minikql/comp_nodes/ut/mkql_computation_node_ut.h>
58
#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
@@ -13,74 +16,71 @@
1316
namespace NKikimr {
1417
namespace NMiniKQL {
1518

16-
std::vector<std::pair<i8, double>> MakeSamples() {
17-
constexpr auto total_samples = 100'000'000ULL;
18-
19-
std::default_random_engine eng;
20-
std::uniform_int_distribution<int> keys(-100, +100);
21-
std::uniform_real_distribution<double> unif(-999.0, +999.0);
22-
23-
std::vector<std::pair<i8, double>> samples(total_samples);
24-
25-
eng.seed(std::time(nullptr));
26-
std::generate(samples.begin(), samples.end(), std::bind(&std::make_pair<i8, double>, std::bind(std::move(keys), std::move(eng)), std::bind(std::move(unif), std::move(eng))));
27-
return samples;
28-
}
29-
30-
const std::vector<std::pair<i8, double>> I8Samples = MakeSamples();
31-
3219
template<bool LLVM>
33-
void RunTestSimple()
20+
void RunTestSimple(const TRunParams& params, TTestResultCollector& printout)
3421
{
3522
TSetup<LLVM> setup(GetPerfTestFactory());
3623

37-
Cerr << "Simple i8 sample has " << I8Samples.size() << " rows" << Endl;
24+
printout.SubmitTestNameAndParams(params, __func__, LLVM);
3825

39-
double positive = 0.0, negative = 0.0;
40-
const auto t = TInstant::Now();
41-
for (const auto& sample : I8Samples) {
42-
(sample.second > 0.0 ? positive : negative) += sample.second;
43-
}
44-
const auto cppTime = TInstant::Now() - t;
26+
TString64DataSampler sampler(params.RowsPerRun, params.MaxKey, params.NumRuns, params.LongStringKeys);
27+
// or T6464DataSampler sampler(numSamples, maxKey, numIters); -- maybe make selectable from params
28+
Cerr << "Sampler type: " << sampler.Describe() << Endl;
4529

4630
TProgramBuilder& pb = *setup.PgmBuilder;
4731

48-
const auto listType = pb.NewListType(pb.NewDataType(NUdf::TDataType<double>::Id));
49-
const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
50-
51-
const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
52-
[&](TRuntimeNode item) -> TRuntimeNode::TList { return {item}; }), 0ULL,
53-
[&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {pb.AggrGreater(items.front(), pb.NewDataLiteral(0.0))}; },
54-
[&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return items; },
55-
[&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {pb.AggrAdd(state.front(), items.front())}; },
56-
[&](TRuntimeNode::TList, TRuntimeNode::TList state) -> TRuntimeNode::TList { return state; }),
57-
[&](TRuntimeNode::TList items) { return items.front(); }
32+
const auto streamItemType = pb.NewMultiType({sampler.GetKeyType(pb), pb.NewDataType(NUdf::TDataType<ui64>::Id)});
33+
const auto streamType = pb.NewStreamType(streamItemType);
34+
const auto streamCallable = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", streamType).Build();
35+
36+
const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(
37+
pb.ToFlow(TRuntimeNode(streamCallable, false)),
38+
0ULL,
39+
[&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return { items.front() }; },
40+
[&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return { items.back() } ; },
41+
[&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
42+
return {pb.AggrAdd(state.front(), items.back())};
43+
},
44+
[&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList {
45+
return {keys.front(), state.front()};
46+
}),
47+
[&](TRuntimeNode::TList items) { return pb.NewTuple(items); }
5848
));
5949

60-
const auto graph = setup.BuildGraph(pgmReturn, {list});
61-
NUdf::TUnboxedValue* items = nullptr;
62-
graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(I8Samples.size(), items));
63-
std::transform(I8Samples.cbegin(), I8Samples.cend(), items, [](const std::pair<i8, double> s){ return ToValue<double>(s.second); });
50+
const auto graph = setup.BuildGraph(pgmReturn, {streamCallable});
6451

65-
const auto t1 = TInstant::Now();
66-
const auto& value = graph->GetValue();
67-
const auto first = value.GetElement(0);
68-
const auto second = value.GetElement(1);
69-
const auto t2 = TInstant::Now();
70-
71-
if (first.template Get<double>() > 0.0) {
72-
UNIT_ASSERT_VALUES_EQUAL(first.template Get<double>(), positive);
73-
UNIT_ASSERT_VALUES_EQUAL(second.template Get<double>(), negative);
74-
} else {
75-
UNIT_ASSERT_VALUES_EQUAL(first.template Get<double>(), negative);
76-
UNIT_ASSERT_VALUES_EQUAL(second.template Get<double>(), positive);
52+
// Measure the input stream run time
53+
const auto devnullStream = sampler.MakeStream(graph->GetHolderFactory());
54+
const auto devnullStart = TInstant::Now();
55+
{
56+
NUdf::TUnboxedValue columns[2];
57+
while (devnullStream->WideFetch(columns, 2) == NUdf::EFetchStatus::Ok) {
58+
}
7759
}
60+
const auto devnullTime = TInstant::Now() - devnullStart;
61+
62+
// Reference implementation (sum via an std::unordered_map)
63+
auto referenceStream = sampler.MakeStream(graph->GetHolderFactory());
64+
const auto t = TInstant::Now();
65+
sampler.ComputeReferenceResult(*referenceStream);
66+
const auto cppTime = TInstant::Now() - t;
67+
68+
// Compute graph implementation
69+
auto myStream = NUdf::TUnboxedValuePod(sampler.MakeStream(graph->GetHolderFactory()).Release());
70+
graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), std::move(myStream));
71+
72+
const auto graphTimeStart = TInstant::Now();
73+
const auto& value = graph->GetValue();
74+
const auto graphTime = TInstant::Now() - graphTimeStart;
75+
76+
// Verification
77+
sampler.VerifyComputedValueVsReference(value);
7878

79-
Cerr << "WideCombiner graph runtime is: " << t2 - t1 << " vs. reference C++ implementation: " << cppTime << Endl << Endl;
79+
printout.SubmitTimings(graphTime, cppTime, devnullTime);
8080
}
8181

82-
template void RunTestSimple<false>();
83-
template void RunTestSimple<true>();
82+
template void RunTestSimple<false>(const TRunParams& params, TTestResultCollector& printout);
83+
template void RunTestSimple<true>(const TRunParams& params, TTestResultCollector& printout);
8484

8585
}
8686
}
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
#pragma once
22

3+
#include "run_params.h"
4+
#include "printout.h"
5+
36
namespace NKikimr {
47
namespace NMiniKQL {
58

69
template<bool LLVM>
7-
void RunTestSimple();
10+
void RunTestSimple(const TRunParams& params, TTestResultCollector& printout);
811

912
}
1013
}

0 commit comments

Comments
 (0)