@@ -5,52 +5,37 @@ namespace NKikimr::NOlap::NIndexedReader {
5
5
6
6
void TMergePartialStream::PutControlPoint (std::shared_ptr<TSortableBatchPosition> point) {
7
7
Y_ABORT_UNLESS (point);
8
- Y_ABORT_UNLESS (point->IsSameSortingSchema (SortSchema));
8
+ AFL_VERIFY (point->IsSameSortingSchema (SortSchema))( " point " , point-> DebugJson ())( " schema " , SortSchema-> ToString ( ));
9
9
Y_ABORT_UNLESS (point->IsReverseSort () == Reverse);
10
10
Y_ABORT_UNLESS (++ControlPoints == 1 );
11
11
12
- SortHeap.emplace_back (TBatchIterator (*point));
13
- std::push_heap (SortHeap.begin (), SortHeap.end ());
12
+ SortHeap.Push (TBatchIterator (*point));
14
13
}
15
14
16
- void TMergePartialStream::AddPoolSource ( const std::optional<ui32> poolId, std::shared_ptr<arrow::RecordBatch> batch, std::shared_ptr<NArrow::TColumnFilter> filter) {
15
+ void TMergePartialStream::AddSource ( std::shared_ptr<arrow::RecordBatch> batch, std::shared_ptr<NArrow::TColumnFilter> filter) {
17
16
if (!batch || !batch->num_rows ()) {
18
17
return ;
19
18
}
20
19
Y_VERIFY_DEBUG (NArrow::IsSorted (batch, SortSchema));
21
- if (!poolId) {
22
- AddNewToHeap (poolId, batch, filter, true );
23
- } else {
24
- auto it = BatchPools.find (*poolId);
25
- if (it == BatchPools.end ()) {
26
- it = BatchPools.emplace (*poolId, std::deque<TIteratorData>()).first ;
27
- }
28
- it->second .emplace_back (batch, filter);
29
- if (it->second .size () == 1 ) {
30
- AddNewToHeap (poolId, batch, filter, true );
31
- }
32
- }
20
+ AddNewToHeap (batch, filter);
33
21
}
34
22
35
- void TMergePartialStream::AddNewToHeap (const std::optional<ui32> poolId, std:: shared_ptr<arrow::RecordBatch> batch, std::shared_ptr<NArrow::TColumnFilter> filter, const bool restoreHeap ) {
23
+ void TMergePartialStream::AddNewToHeap (std::shared_ptr<arrow::RecordBatch> batch, std::shared_ptr<NArrow::TColumnFilter> filter) {
36
24
if (!filter || filter->IsTotalAllowFilter ()) {
37
- SortHeap.emplace_back (TBatchIterator (batch, nullptr , SortSchema->field_names (), DataSchema ? DataSchema->field_names () : std::vector<std::string>(), Reverse, poolId ));
25
+ SortHeap.Push (TBatchIterator (batch, nullptr , SortSchema->field_names (), DataSchema ? DataSchema->field_names () : std::vector<std::string>(), Reverse));
38
26
} else if (filter->IsTotalDenyFilter ()) {
39
27
return ;
40
28
} else {
41
- SortHeap.emplace_back (TBatchIterator (batch, filter, SortSchema->field_names (), DataSchema ? DataSchema->field_names () : std::vector<std::string>(), Reverse, poolId));
42
- }
43
- if (restoreHeap) {
44
- std::push_heap (SortHeap.begin (), SortHeap.end ());
29
+ SortHeap.Push (TBatchIterator (batch, filter, SortSchema->field_names (), DataSchema ? DataSchema->field_names () : std::vector<std::string>(), Reverse));
45
30
}
46
31
}
47
32
48
33
void TMergePartialStream::RemoveControlPoint () {
49
34
Y_ABORT_UNLESS (ControlPoints == 1 );
50
35
Y_ABORT_UNLESS (ControlPointEnriched ());
51
36
Y_ABORT_UNLESS (-- ControlPoints == 0 );
52
- std::pop_heap (SortHeap.begin (), SortHeap. end ());
53
- SortHeap.pop_back ();
37
+ Y_ABORT_UNLESS (SortHeap.Current (). IsControlPoint ());
38
+ SortHeap.RemoveTop ();
54
39
}
55
40
56
41
void TMergePartialStream::CheckSequenceInDebug (const TSortableBatchPosition& nextKeyColumnsPosition) {
@@ -73,11 +58,11 @@ bool TMergePartialStream::DrainCurrentTo(TRecordBatchBuilder& builder, const TSo
73
58
Y_ABORT_UNLESS ((ui32)DataSchema->num_fields () == builder.GetBuildersCount ());
74
59
PutControlPoint (std::make_shared<TSortableBatchPosition>(readTo));
75
60
bool cpReachedFlag = false ;
76
- while (SortHeap.size () && !cpReachedFlag) {
77
- if (SortHeap.front ().IsControlPoint ()) {
61
+ while (SortHeap.Size () && !cpReachedFlag) {
62
+ if (SortHeap.Current ().IsControlPoint ()) {
78
63
RemoveControlPoint ();
79
64
cpReachedFlag = true ;
80
- if (SortHeap.empty () || !includeFinish || SortHeap.front ().GetKeyColumns ().Compare (readTo) == std::partial_ordering::greater) {
65
+ if (SortHeap.Empty () || !includeFinish || SortHeap.Current ().GetKeyColumns ().Compare (readTo) == std::partial_ordering::greater) {
81
66
return true ;
82
67
}
83
68
}
@@ -92,7 +77,7 @@ bool TMergePartialStream::DrainCurrentTo(TRecordBatchBuilder& builder, const TSo
92
77
93
78
bool TMergePartialStream::DrainAll (TRecordBatchBuilder& builder) {
94
79
Y_ABORT_UNLESS ((ui32)DataSchema->num_fields () == builder.GetBuildersCount ());
95
- while (SortHeap.size ()) {
80
+ while (SortHeap.Size ()) {
96
81
if (auto currentPosition = DrainCurrentPosition ()) {
97
82
CheckSequenceInDebug (*currentPosition);
98
83
builder.AddRecord (*currentPosition);
@@ -102,19 +87,19 @@ bool TMergePartialStream::DrainAll(TRecordBatchBuilder& builder) {
102
87
}
103
88
104
89
std::optional<TSortableBatchPosition> TMergePartialStream::DrainCurrentPosition () {
105
- Y_ABORT_UNLESS (SortHeap.size ());
106
- Y_ABORT_UNLESS (!SortHeap.front ().IsControlPoint ());
107
- TSortableBatchPosition result = SortHeap.front ().GetKeyColumns ();
108
- TSortableBatchPosition resultVersion = SortHeap.front ().GetVersionColumns ();
90
+ Y_ABORT_UNLESS (SortHeap.Size ());
91
+ Y_ABORT_UNLESS (!SortHeap.Current ().IsControlPoint ());
92
+ TSortableBatchPosition result = SortHeap.Current ().GetKeyColumns ();
93
+ TSortableBatchPosition resultVersion = SortHeap.Current ().GetVersionColumns ();
109
94
bool isFirst = true ;
110
- const bool deletedFlag = SortHeap.front ().IsDeleted ();
111
- while (SortHeap.size () && (isFirst || result.Compare (SortHeap.front ().GetKeyColumns ()) == std::partial_ordering::equivalent)) {
112
- auto & anotherIterator = SortHeap.front ();
95
+ const bool deletedFlag = SortHeap.Current ().IsDeleted ();
96
+ while (SortHeap.Size () && (isFirst || result.Compare (SortHeap.Current ().GetKeyColumns ()) == std::partial_ordering::equivalent)) {
97
+ auto & anotherIterator = SortHeap.Current ();
113
98
if (!isFirst) {
114
- AFL_VERIFY (resultVersion.Compare (anotherIterator.GetVersionColumns ()) == std::partial_ordering::greater )(" r" , resultVersion.DebugJson ())(" a" , anotherIterator.GetVersionColumns ().DebugJson ())
99
+ AFL_VERIFY (resultVersion.Compare (anotherIterator.GetVersionColumns ()) != std::partial_ordering::less )(" r" , resultVersion.DebugJson ())(" a" , anotherIterator.GetVersionColumns ().DebugJson ())
115
100
(" key" , result.DebugJson ());
116
101
}
117
- NextInHeap ( true );
102
+ SortHeap. Next ( );
118
103
isFirst = false ;
119
104
}
120
105
if (deletedFlag) {
@@ -123,13 +108,13 @@ std::optional<TSortableBatchPosition> TMergePartialStream::DrainCurrentPosition(
123
108
return result;
124
109
}
125
110
126
- std::vector<std::shared_ptr<arrow::RecordBatch>> TMergePartialStream::DrainAllParts (const std::vector <TSortableBatchPosition>& positions,
127
- const std::vector<std::shared_ptr<arrow::Field>>& resultFields, const bool includePositions )
111
+ std::vector<std::shared_ptr<arrow::RecordBatch>> TMergePartialStream::DrainAllParts (const std::map <TSortableBatchPosition, bool >& positions,
112
+ const std::vector<std::shared_ptr<arrow::Field>>& resultFields)
128
113
{
129
114
std::vector<std::shared_ptr<arrow::RecordBatch>> result;
130
115
for (auto && i : positions) {
131
116
NIndexedReader::TRecordBatchBuilder indexesBuilder (resultFields);
132
- DrainCurrentTo (indexesBuilder, i, includePositions );
117
+ DrainCurrentTo (indexesBuilder, i. first , i. second );
133
118
result.emplace_back (indexesBuilder.Finalize ());
134
119
if (result.back ()->num_rows () == 0 ) {
135
120
result.pop_back ();
@@ -147,11 +132,6 @@ std::vector<std::shared_ptr<arrow::RecordBatch>> TMergePartialStream::DrainAllPa
147
132
NJson::TJsonValue TMergePartialStream::TBatchIterator::DebugJson () const {
148
133
NJson::TJsonValue result;
149
134
result[" is_cp" ] = IsControlPoint ();
150
- if (PoolId) {
151
- result[" pool_id" ] = *PoolId;
152
- } else {
153
- result[" pool_id" ] = " absent" ;
154
- }
155
135
result[" key" ] = KeyColumns.DebugJson ();
156
136
return result;
157
137
}
0 commit comments