Skip to content

Commit c267d30

Browse files
authored
Fix and Refine TradeTickDataWrangler.process_bar_data (#1586)
1 parent a8592cd commit c267d30

File tree

2 files changed

+55
-14
lines changed

2 files changed

+55
-14
lines changed

nautilus_trader/persistence/wranglers.pyx

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,8 @@ def prepare_tick_data_from_bars(
200200

201201
# Merge tick data
202202
df_ticks_final = pd.concat([df_ticks_o, df_ticks_h, df_ticks_l, df_ticks_c])
203-
df_ticks_final.dropna(inplace=True)
204-
df_ticks_final.sort_index(axis=0, kind="mergesort", inplace=True)
203+
df_ticks_final = df_ticks_final.dropna()
204+
df_ticks_final = df_ticks_final.sort_index(axis=0, kind="mergesort")
205205

206206
cdef int i
207207
# Randomly shift high low prices
@@ -572,32 +572,24 @@ cdef class TradeTickDataWrangler:
572572
# Ensure index is tz-aware UTC
573573
data = as_utc_index(data)
574574

575-
# Determine the Aggressor Side based on Close vs Open
576-
if "side" not in data and "buyer_maker" not in data:
577-
data['side'] = ['BUY' if close > open_ else 'SELL' for open_, close in zip(data['open'], data['close'])]
578-
579575
cdef dict data_open = {
580576
"price": data["open"],
581577
"size": data["volume"] / 4,
582-
"side": data["side"],
583578
}
584579

585580
cdef dict data_high = {
586581
"price": data["high"],
587582
"size": data["volume"] / 4,
588-
"side": data["side"],
589583
}
590584

591585
cdef dict data_low = {
592586
"price": data["low"],
593587
"size": data["volume"] / 4,
594-
"side": data["side"],
595588
}
596589

597590
cdef dict data_close = {
598591
"price": data["close"],
599592
"size": data["volume"] / 4,
600-
"side": data["side"],
601593
}
602594

603595
df_ticks_final, ts_events, ts_inits = prepare_tick_data_from_bars(
@@ -612,12 +604,19 @@ cdef class TradeTickDataWrangler:
612604
)
613605
df_ticks_final["trade_id"] = df_ticks_final.index.view(np.uint64).astype(str)
614606

607+
# Adjust size precision
608+
size_precision = self.instrument.size_precision
609+
if is_raw:
610+
df_ticks_final["size"] = df_ticks_final["size"].apply(lambda x: round(x, size_precision - 9))
611+
else:
612+
df_ticks_final["size"] = df_ticks_final["size"].round(size_precision)
613+
615614
if is_raw:
616615
return list(map(
617616
self._build_tick_from_raw,
618617
df_ticks_final["price"],
619618
df_ticks_final["size"],
620-
self._create_side_if_not_exist(data),
619+
self._create_side_if_not_exist(df_ticks_final),
621620
df_ticks_final["trade_id"],
622621
ts_events,
623622
ts_inits,
@@ -627,7 +626,7 @@ cdef class TradeTickDataWrangler:
627626
self._build_tick,
628627
df_ticks_final["price"],
629628
df_ticks_final["size"],
630-
self._create_side_if_not_exist(data),
629+
self._create_side_if_not_exist(df_ticks_final),
631630
df_ticks_final["trade_id"],
632631
ts_events,
633632
ts_inits,
@@ -636,8 +635,10 @@ cdef class TradeTickDataWrangler:
636635
def _create_side_if_not_exist(self, data):
637636
if "side" in data.columns:
638637
return data["side"].apply(lambda x: AggressorSide.BUYER if str(x).upper() == "BUY" else AggressorSide.SELLER)
639-
else:
638+
elif "buyer_maker" in data.columns:
640639
return data["buyer_maker"].apply(lambda x: AggressorSide.SELLER if x is True else AggressorSide.BUYER)
640+
else:
641+
return [AggressorSide.NO_AGGRESSOR] * len(data)
641642

642643
# cpdef method for Python wrap() (called with map)
643644
cpdef TradeTick _build_tick_from_raw(

tests/unit_tests/persistence/test_wranglers.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
# -------------------------------------------------------------------------------------------------
15-
15+
import pandas as pd
1616
import pytest
1717

1818
from nautilus_trader.model.enums import BookAction
@@ -111,6 +111,7 @@ def test_trade_bar_data_wrangler(
111111
provider = TestDataProvider()
112112
data = provider.read_csv_bars("fxcm/usdjpy-m1-bid-2013.csv")
113113
data.loc[:, "volume"] = 100_0000
114+
expected_ticks_count = len(data) * 4
114115

115116
# Act
116117
ticks = wrangler.process_bar_data(
@@ -124,3 +125,42 @@ def test_trade_bar_data_wrangler(
124125
assert ticks[1].ts_event == ts_event2
125126
assert ticks[2].ts_event == ts_event3
126127
assert ticks[3].ts_event == ts_event4
128+
assert len(ticks) == expected_ticks_count
129+
130+
131+
@pytest.mark.parametrize("is_raw", [False, True])
132+
def test_trade_bar_data_wrangler_size_precision(is_raw: bool) -> None:
133+
# Arrange
134+
spy = TestInstrumentProvider.equity("SPY", "ARCA")
135+
wrangler = TradeTickDataWrangler(instrument=spy)
136+
factor = 1e9 if is_raw else 1
137+
ts = pd.Timestamp("2024-01-05 21:00:00+0000", tz="UTC")
138+
data = pd.DataFrame(
139+
{
140+
"open": {ts: 468.01 * factor},
141+
"high": {ts: 468.08 * factor},
142+
"low": {ts: 467.81 * factor},
143+
"close": {ts: 467.96 * factor},
144+
"volume": {ts: 18735.0 * factor},
145+
},
146+
)
147+
148+
# Calculate expected_size
149+
if is_raw:
150+
# For raw data, adjust precision by -9
151+
expected_size = round(data["volume"].iloc[0] / 4, spy.size_precision - 9)
152+
else:
153+
# For non-raw data, apply standard precision and scale back up to compare with raw
154+
expected_size = round(data["volume"].iloc[0] / 4, spy.size_precision) * 1e9
155+
156+
# Act
157+
ticks = wrangler.process_bar_data(
158+
data=data,
159+
offset_interval_ms=0,
160+
timestamp_is_close=True,
161+
is_raw=is_raw,
162+
)
163+
164+
# Assert
165+
for tick in ticks:
166+
assert tick.size.raw == expected_size

0 commit comments

Comments
 (0)