forked from spawnaga/FlexTrader
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrainEpsilonGreedy.py
111 lines (99 loc) · 4.08 KB
/
trainEpsilonGreedy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 18 01:45:26 2023
@author: alial
"""
from collections import deque
import matplotlib.pyplot as plt
from trader import Trader, Market
from agentsEpsGreed import MultiTask
import numpy as np
import tensorflow as tf
from multiprocessing import Pool
import gc
def train(task):
# Initialize the Trader object and connect to the IB gateway
levels = {0: 0.005, 1: 0.01, 2: 0.02, 3: 0.03, 4: 0.04, 5: 0.05, 6: 0.1, 7: 0.15, 8: 0.2, 9: 0.25}
# Update learning of models
trader = Trader()
# Initialize the DQN agent
action_size = 5
previous_row = None
rewards = [0]
steps = [0]
# Initialize the rolling window to store the last n rewards
window_size = 100
rolling_window = deque(maxlen=window_size)
# Initialize the list to store the rolling average of the rewards, profits, iterations and batch size levels
rolling_average = []
current_batch_size_level = 0
current_iteration = 0
batch_size = 10
previous_action = 2
current_iteration += 1
# initialize market and get the dataframe
market = Market(trader)
market.update_data()
df = market.get_df()
# Get the current and next states
state = market.get_state(i=0)
state_size = state.shape[1]
agent = MultiTask(task=task, action_size=action_size, state_size=state_size, job='train')
replay_functions = {
"dqn": agent.replay_dqn,
"ddqn": agent.replay_ddqn,
"actor_critic": agent.replay_actor_critic,
"policy_gradient": agent.replay_policy_gradient
}
# Load saved trainings and memories from previous sessions
if eval(f'agent.{task}_memory._size()') == 0:
agent.load(name='trial1', task=task)
for level, percentage in levels.items():
if int(len(df) * percentage) < eval(f'agent.{task}_memory._size()'):
batch_size = int(len(df) * percentage)
for i, row in df.iterrows():
if previous_row is None:
previous_row = row
done = i + 2 >= len(df)
if done:
break
if i == int(len(df) * levels[current_batch_size_level]) and batch_size <= len(df) * levels[
current_batch_size_level]:
batch_size = int(len(df) * levels[current_batch_size_level])
print(
f'Level {list(levels.keys())[current_batch_size_level]} is done. Batch size now is {batch_size} '
f'({levels[current_batch_size_level] * 100}% of the data)')
if current_batch_size_level <= next(reversed(levels.items()))[0] - 1:
current_batch_size_level += 1
# Get nextstate value
next_state = market.get_state(i + 1, numContracts=trader.num_contracts)
# Predict the action using the model
action = agent.act(state=state, task=task, job='train')
# Execute the trade and get the reward
reward = trader.trade(action, row, previous_row, i, previous_action)
previous_action = action
# Append the total reward and number of steps for this episode to the lists
rewards.append(reward)
steps.append(i)
agent.add_to_memory(task, state, action, reward, next_state, done)
previous_row = row
state = next_state
rolling_window.append(trader.realized_profit_loss)
# Calculate the rolling average of the rewards and append it to the list
rolling_average.append(np.mean(rolling_window))
replay_functions[task](batch_size)
if i % 50 == 0:
# Save the update agents
agent.save('trial1', task)
# Garbage data disposal
gc.collect()
# Print progress
print(
f"***************** Episode {current_iteration} of {task} final account was {trader.total_value}"
f" which is a total profit/loss of {trader.total_value - trader.capital}")
return trader.total_value - trader.capital
if __name__ == '__main__':
results = train(task="ddqn")
# with Pool(4) as p:
# results = [p.map(train, ['dqn', 'ddqn', 'actor_critic', 'policy_gradient'])]
# print(results)