@@ -24,26 +24,7 @@ std::shared_ptr<arrow::Array> IChunkedArray::TReader::CopyRecord(const ui64 reco
24
24
}
25
25
26
26
std::shared_ptr<arrow::ChunkedArray> IChunkedArray::Slice (const ui32 offset, const ui32 count) const {
27
- AFL_VERIFY (offset + count <= (ui64)GetRecordsCount ())(" offset" , offset)(" count" , count)(" length" , GetRecordsCount ());
28
- ui32 currentOffset = offset;
29
- ui32 countLeast = count;
30
- std::vector<std::shared_ptr<arrow::Array>> chunks;
31
- auto address = GetChunkSlow (offset);
32
- while (countLeast) {
33
- address = GetChunk (address.GetAddress (), currentOffset);
34
- const ui64 internalPos = address.GetAddress ().GetLocalIndex (currentOffset);
35
- if (internalPos + countLeast <= (ui64)address.GetArray ()->length ()) {
36
- chunks.emplace_back (address.GetArray ()->Slice (internalPos, countLeast));
37
- break ;
38
- } else {
39
- const ui32 deltaCount = address.GetArray ()->length () - internalPos;
40
- chunks.emplace_back (address.GetArray ()->Slice (internalPos, deltaCount));
41
- AFL_VERIFY (countLeast >= deltaCount);
42
- countLeast -= deltaCount;
43
- currentOffset += deltaCount;
44
- }
45
- }
46
- return std::make_shared<arrow::ChunkedArray>(chunks, DataType);
27
+ return GetChunkedArray (TColumnConstructionContext ().SetStartIndex (offset).SetRecordsCount (count));
47
28
}
48
29
49
30
IChunkedArray::TFullDataAddress IChunkedArray::GetChunk (const std::optional<TAddressChain>& chunkCurrent, const ui64 position) const {
@@ -62,10 +43,10 @@ IChunkedArray::TFullDataAddress IChunkedArray::GetChunk(const std::optional<TAdd
62
43
return TFullDataAddress (localAddress.GetArray (), std::move (addressChain));
63
44
} else {
64
45
auto chunkedArrayAddress = GetArray (chunkCurrent, position, nullptr );
65
- if (chunkCurrent) {
66
- AFL_VERIFY (chunkCurrent->GetSize () == 1 + chunkedArrayAddress.GetAddress ().GetSize ())(" current" , chunkCurrent->GetSize ())(
67
- " chunked" , chunkedArrayAddress.GetAddress ().GetSize ());
68
- }
46
+ // if (chunkCurrent) {
47
+ // AFL_VERIFY(chunkCurrent->GetSize() == chunkedArrayAddress.GetAddress().GetSize())("current", chunkCurrent->GetSize())(
48
+ // "chunked", chunkedArrayAddress.GetAddress().GetSize());
49
+ // }
69
50
auto localAddress = chunkedArrayAddress.GetArray ()->GetLocalData (address, chunkedArrayAddress.GetAddress ().GetLocalIndex (position));
70
51
auto fullAddress = std::move (chunkedArrayAddress.MutableAddress ());
71
52
fullAddress.Add (localAddress.GetAddress ());
@@ -112,7 +93,7 @@ std::shared_ptr<IChunkedArray> IChunkedArray::DoApplyFilter(const TColumnFilter&
112
93
auto schema = std::make_shared<arrow::Schema>(fields);
113
94
auto table = arrow::Table::Make (schema, { arr }, GetRecordsCount ());
114
95
AFL_VERIFY (table->num_columns () == 1 );
115
- AFL_VERIFY ( filter.Apply (table) );
96
+ filter.Apply (table);
116
97
if (table->column (0 )->num_chunks () == 1 ) {
117
98
return std::make_shared<TTrivialArray>(table->column (0 )->chunk (0 ));
118
99
} else {
@@ -121,8 +102,8 @@ std::shared_ptr<IChunkedArray> IChunkedArray::DoApplyFilter(const TColumnFilter&
121
102
}
122
103
123
104
std::shared_ptr<IChunkedArray> IChunkedArray::ApplyFilter (const TColumnFilter& filter, const std::shared_ptr<IChunkedArray>& selfPtr) const {
124
- AFL_VERIFY (selfPtr);
125
105
if (filter.IsTotalAllowFilter ()) {
106
+ AFL_VERIFY (selfPtr);
126
107
return selfPtr;
127
108
}
128
109
if (filter.IsTotalDenyFilter ()) {
@@ -134,7 +115,7 @@ std::shared_ptr<IChunkedArray> IChunkedArray::ApplyFilter(const TColumnFilter& f
134
115
return result;
135
116
}
136
117
137
- std::shared_ptr<arrow::ChunkedArray> IChunkedArray::GetChunkedArray () const {
118
+ std::shared_ptr<arrow::ChunkedArray> IChunkedArray::GetChunkedArrayTrivial () const {
138
119
std::vector<std::shared_ptr<arrow::Array>> chunks;
139
120
std::optional<TFullDataAddress> address;
140
121
for (ui32 position = 0 ; position < GetRecordsCount ();) {
@@ -145,6 +126,23 @@ std::shared_ptr<arrow::ChunkedArray> IChunkedArray::GetChunkedArray() const {
145
126
return std::make_shared<arrow::ChunkedArray>(chunks, GetDataType ());
146
127
}
147
128
129
+ std::shared_ptr<arrow::ChunkedArray> IChunkedArray::GetChunkedArray (const TColumnConstructionContext& context) const {
130
+ if (context.GetStartIndex () || context.GetRecordsCount ()) {
131
+ const ui32 start = context.GetStartIndex ().value_or (0 );
132
+ const ui32 count = context.GetRecordsCount ().value_or (GetRecordsCount () - start);
133
+ auto slice = ISlice (start, count);
134
+ if (context.GetFilter () && !context.GetFilter ()->IsTotalAllowFilter ()) {
135
+ return slice->ApplyFilter (context.GetFilter ()->Slice (start, count), slice)->GetChunkedArrayTrivial ();
136
+ } else {
137
+ return slice->GetChunkedArrayTrivial ();
138
+ }
139
+ } else if (context.GetFilter () && !context.GetFilter ()->IsTotalAllowFilter ()) {
140
+ return ApplyFilter (*context.GetFilter (), nullptr )->GetChunkedArrayTrivial ();
141
+ } else {
142
+ return GetChunkedArrayTrivial ();
143
+ }
144
+ }
145
+
148
146
TString IChunkedArray::TReader::DebugString (const ui32 position) const {
149
147
auto address = GetReadChunk (position);
150
148
return NArrow::DebugString (address.GetArray (), address.GetPosition ());
0 commit comments