Fix a slurry of bugs..

notadamking · notadamking · commit 924df37e539f · 2019-07-10T00:59:17.000-07:00
diff --git a/cli.py b/cli.py
@@ -20,7 +20,7 @@ def run_optimize(args, logger):
     from lib.RLTrader import RLTrader
 
     trader = RLTrader(**vars(args), logger=logger, reward_strategy=reward_strategy)
-    trader.optimize(n_trials=args.trials, n_prune_evals_per_trial=args.prune_evals, n_tests_per_eval=args.eval_tests)
+    trader.optimize(n_trials=args.trials)
 
 
 if __name__ == '__main__':
diff --git a/data/params.db b/data/params.db
diff --git a/lib/RLTrader.py b/lib/RLTrader.py
@@ -185,9 +185,9 @@ def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_e
 
         return -1 * last_reward
 
-    def optimize(self, n_trials: int = 20, **optimize_params):
+    def optimize(self, n_trials: int = 20):
         try:
-            self.optuna_study.optimize(self.optimize_params, n_trials=n_trials, n_jobs=1, **optimize_params)
+            self.optuna_study.optimize(self.optimize_params, n_trials=n_trials, n_jobs=1)
         except KeyboardInterrupt:
             pass
 
@@ -278,7 +278,7 @@ def test(self, model_epoch: int = 0, render_env: bool = True, render_report: boo
             if done:
                 net_worths = pd.DataFrame({
                     'Date': info[0]['timestamps'],
-                    'Balance': info[0]['networths'],
+                    'Balance': info[0]['net_worths'],
                 })
 
                 net_worths.set_index('Date', drop=True, inplace=True)
diff --git a/lib/env/TradingEnv.py b/lib/env/TradingEnv.py
@@ -94,6 +94,7 @@ def _get_trade(self, action: int):
 
     def _take_action(self, action: int):
         amount_asset_to_buy, amount_asset_to_sell = self._get_trade(action)
+
         asset_bought, asset_sold, purchase_cost, sale_revenue = self.trade_strategy.trade(buy_amount=amount_asset_to_buy,
                                                                                           sell_amount=amount_asset_to_sell,
                                                                                           balance=self.balance,
@@ -104,15 +105,20 @@ def _take_action(self, action: int):
             self.asset_held += asset_bought
             self.balance -= purchase_cost
 
-            self.trades.append({'step': self.current_step, 'amount': asset_bought,
-                                'total': purchase_cost, 'type': 'buy'})
+            self.trades.append({'step': self.current_step,
+                                'amount': asset_bought,
+                                'total': purchase_cost,
+                                'type': 'buy'})
         elif asset_sold:
             self.asset_held -= asset_sold
             self.balance += sale_revenue
+
             self.reward_strategy.reset_reward()
 
-            self.trades.append({'step': self.current_step, 'amount': asset_sold,
-                                'total': sale_revenue, 'type': 'sell'})
+            self.trades.append({'step': self.current_step,
+                                'amount': asset_sold,
+                                'total': sale_revenue,
+                                'type': 'sell'})
 
         current_net_worth = round(self.balance + self.asset_held * self._current_price(), self.base_precision)
         self.net_worths.append(current_net_worth)
@@ -132,7 +138,7 @@ def _done(self):
 
     def _reward(self):
         reward = self.reward_strategy.get_reward(current_step=self.current_step,
-                                                 current_price=self._current_price(),
+                                                 current_price=self._current_price,
                                                  observations=self.observations,
                                                  account_history=self.account_history,
                                                  net_worths=self.net_worths)
@@ -214,7 +220,8 @@ def step(self, action):
         obs = self._next_observation()
         reward = self._reward()
         done = self._done()
-        return obs, reward, done, {'networths': self.net_worths, 'timestamps': self.timestamps}
+
+        return obs, reward, done, {'net_worths': self.net_worths, 'timestamps': self.timestamps}
 
     def render(self, mode='human'):
         if mode == 'system':
diff --git a/lib/env/reward/WeightedUnrealizedProfit.py b/lib/env/reward/WeightedUnrealizedProfit.py
@@ -36,6 +36,6 @@ def get_reward(self,
         if account_history['asset_sold'].values[-1] > 0:
             reward = self.calc_reward(account_history['sale_revenue'].values[-1])
         else:
-            reward = self.calc_reward(account_history['asset_held'].values[-1] * current_price)
+            reward = self.calc_reward(account_history['asset_held'].values[-1] * current_price())
 
         return reward
diff --git a/lib/env/trade/SimulatedTradeStrategy.py b/lib/env/trade/SimulatedTradeStrategy.py
@@ -30,7 +30,7 @@ def trade(self,
         commission = self.commissionPercent / 100
         slippage = np.random.uniform(0, self.maxSlippagePercent) / 100
 
-        asset_bought, asset_sold, purchase_cost, sale_revenue = 0, 0, 0, 0
+        asset_bought, asset_sold, purchase_cost, sale_revenue = buy_amount, sell_amount, 0, 0
 
         if buy_amount > 0 and balance >= self.min_cost_limit:
             price_adjustment = (1 + commission) * (1 + slippage)