@@ -22,9 +22,22 @@ namespace internal {
22
22
// null-ness by determining whether the optional has a value.
23
23
// kTimestamp is its own special case, where nullness is determined by the underlying nanos
24
24
// being equal to Deephaven's NULL_LONG.
25
- // kLocalDate and kLocalTime are like kTimestamp except they resolve to different data types.
25
+ // kLocalDate and kLocalTime are similar to kTimestamp in nullness except they resolve to different
26
+ // data types.
26
27
enum class ArrowProcessingStyle { kNormal , kBooleanOrString , kTimestamp , kLocalDate , kLocalTime };
27
28
29
+ /* *
30
+ * When 'array' has dynamic type arrow::TimestampArray or arrow::Time64Array, look at the
31
+ * underlying time resolution of the arrow type and calculate a conversion factor from that unit
32
+ * to nanoseconds. For example if the underlying time unit is arrow::TimeUnit::MILLI, then the
33
+ * conversion factor would be 1_000_000, meaning that one needs to multiply incoming millisecond
34
+ * values by one million to convert them to nanoseconds. If 'array' is not one of those types,
35
+ * return 1.
36
+ * @param array The Arrow array
37
+ * @return For supported time types, the conversion factor to nanoseconds. Otherwise, 1.
38
+ */
39
+ size_t CalcTimeNanoScaleFactor (const arrow::Array &array);
40
+
28
41
template <ArrowProcessingStyle Style , typename TColumnSourceBase, typename TArrowArray, typename TChunk>
29
42
class GenericArrowColumnSource final : public TColumnSourceBase {
30
43
using BooleanChunk = deephaven::dhcore::chunk::BooleanChunk;
@@ -37,7 +50,8 @@ class GenericArrowColumnSource final : public TColumnSourceBase {
37
50
using UInt64Chunk = deephaven::dhcore::chunk::UInt64Chunk;
38
51
39
52
public:
40
- static std::shared_ptr<GenericArrowColumnSource> OfArrowArray (std::shared_ptr<TArrowArray> array) {
53
+ static std::shared_ptr<GenericArrowColumnSource>
54
+ OfArrowArray (std::shared_ptr<TArrowArray> array) {
41
55
std::vector<std::shared_ptr<TArrowArray>> arrays{std::move (array)};
42
56
return OfArrowArrayVec (std::move (arrays));
43
57
}
@@ -48,7 +62,9 @@ class GenericArrowColumnSource final : public TColumnSourceBase {
48
62
}
49
63
50
64
explicit GenericArrowColumnSource (std::vector<std::shared_ptr<TArrowArray>> arrays) :
51
- arrays_(std::move(arrays)) {}
65
+ arrays_(std::move(arrays)) {
66
+ time_nano_scale_factor_ = arrays_.empty () ? 1 : CalcTimeNanoScaleFactor (*arrays_.front ());
67
+ }
52
68
53
69
~GenericArrowColumnSource () final = default ;
54
70
@@ -67,13 +83,14 @@ class GenericArrowColumnSource final : public TColumnSourceBase {
67
83
68
84
// This algorithm is a little tricky because the source data and RowSequence are both
69
85
// segmented, perhaps in different ways.
70
- auto *typed_dest = VerboseCast<TChunk*>(DEEPHAVEN_LOCATION_EXPR (dest_data));
86
+ auto *typed_dest = VerboseCast<TChunk *>(DEEPHAVEN_LOCATION_EXPR (dest_data));
71
87
auto *destp = typed_dest->data ();
72
88
auto outerp = arrays_.begin ();
73
89
size_t src_segment_begin = 0 ;
74
90
size_t src_segment_end = (*outerp)->length ();
75
91
76
- auto *null_destp = optional_dest_null_flags != nullptr ? optional_dest_null_flags->data () : nullptr ;
92
+ auto *null_destp =
93
+ optional_dest_null_flags != nullptr ? optional_dest_null_flags->data () : nullptr ;
77
94
78
95
rows.ForEachInterval ([&](uint64_t requested_segment_begin, uint64_t requested_segment_end) {
79
96
while (true ) {
@@ -147,11 +164,12 @@ class GenericArrowColumnSource final : public TColumnSourceBase {
147
164
const auto *src_endp = innerp->raw_values () + relative_end;
148
165
149
166
for (const auto *ip = src_beginp; ip != src_endp; ++ip) {
150
- *destp = DateTime::FromNanos (*ip);
167
+ auto is_null = *ip == DeephavenTraits<int64_t >::kNullValue ;
168
+ *destp = DateTime::FromNanos (is_null ? *ip : (*ip * time_nano_scale_factor_));
151
169
++destp;
152
170
153
171
if (null_destp != nullptr ) {
154
- *null_destp = *ip == DeephavenTraits< int64_t >:: kNullValue ;
172
+ *null_destp = is_null ;
155
173
++null_destp;
156
174
}
157
175
}
@@ -175,11 +193,12 @@ class GenericArrowColumnSource final : public TColumnSourceBase {
175
193
const auto *src_endp = innerp->raw_values () + relative_end;
176
194
177
195
for (const auto *ip = src_beginp; ip != src_endp; ++ip) {
178
- *destp = LocalTime::FromNanos (*ip);
196
+ auto is_null = *ip == DeephavenTraits<int64_t >::kNullValue ;
197
+ *destp = LocalTime::FromNanos (is_null ? *ip : (*ip * time_nano_scale_factor_));
179
198
++destp;
180
199
181
200
if (null_destp != nullptr ) {
182
- *null_destp = *ip == DeephavenTraits< int64_t >:: kNullValue ;
201
+ *null_destp = is_null ;
183
202
++null_destp;
184
203
}
185
204
}
@@ -200,6 +219,18 @@ class GenericArrowColumnSource final : public TColumnSourceBase {
200
219
201
220
private:
202
221
std::vector<std::shared_ptr<TArrowArray>> arrays_;
222
+ /* *
223
+ * This value is valid for Style == ArrowProcessingStyle::kTimestamp and
224
+ * ArrowProcessingStyle::kLocalTime, and ignored for other ArrowProcessingStyle enumeration
225
+ * values.
226
+ *
227
+ * These ArrowProcessingStyles come into play when processing the arrow types
228
+ * arrow::TimestampType and arrow::Time64Type respectively.
229
+ *
230
+ * The value stores a conversion factor from whatever the input scale is to nanoseconds.
231
+ * For example, if the input timescale is milliseconds, this value will be 1_000_000.
232
+ */
233
+ size_t time_nano_scale_factor_ = 1 ;
203
234
};
204
235
} // namespace internal
205
236
0 commit comments