@@ -317,6 +317,14 @@ def convert_to_default_dataframe(self, dataframe):
317
317
)
318
318
)
319
319
320
+ @staticmethod
321
+ def utc_disguised_as_local (dt ):
322
+ local_tz = tzlocal .get_localzone ()
323
+ utc = timezone .utc
324
+ if not dt .tzinfo :
325
+ dt = dt .replace (tzinfo = utc )
326
+ return dt .astimezone (utc ).replace (tzinfo = local_tz )
327
+
320
328
def convert_list_to_spark_dataframe (self , dataframe ):
321
329
if HAS_NUMPY :
322
330
return self .convert_numpy_to_spark_dataframe (np .array (dataframe ))
@@ -342,7 +350,11 @@ def convert_list_to_spark_dataframe(self, dataframe):
342
350
c = "col_" + str (n_col )
343
351
dataframe_dict [c ] = [dataframe [i ][n_col ] for i in range (len (dataframe ))]
344
352
return self .convert_pandas_to_spark_dataframe (pd .DataFrame (dataframe_dict ))
345
- # We have neither numpy nor pandas, so there is no need to transform timestamps
353
+ for i in range (len (dataframe )):
354
+ dataframe [i ] = [
355
+ self .utc_disguised_as_local (d ) if isinstance (d , datetime ) else d
356
+ for d in dataframe [i ]
357
+ ]
346
358
return self ._spark_session .createDataFrame (
347
359
dataframe , ["col_" + str (n ) for n in range (num_cols )]
348
360
)
@@ -361,13 +373,12 @@ def convert_numpy_to_spark_dataframe(self, dataframe):
361
373
dataframe_dict [c ] = dataframe [:, n_col ]
362
374
return self .convert_pandas_to_spark_dataframe (pd .DataFrame (dataframe_dict ))
363
375
# convert timestamps to current timezone
364
- local_tz = tzlocal .get_localzone ()
365
376
for n_col in range (num_cols ):
366
377
if dataframe [:, n_col ].dtype == np .dtype ("datetime64[ns]" ):
367
378
# set the timezone to the client's timezone because that is
368
379
# what spark expects.
369
- dataframe [:, n_col ] = dataframe [:, n_col ]. map (
370
- lambda d : local_tz . fromutc (d .item (). astimezone ( local_tz ))
380
+ dataframe [:, n_col ] = np . array (
381
+ [ self . utc_disguised_as_local (d .item ()) for d in dataframe [:, n_col ]]
371
382
)
372
383
return self ._spark_session .createDataFrame (
373
384
dataframe .tolist (), ["col_" + str (n ) for n in range (num_cols )]
0 commit comments