-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathmain.py
114 lines (92 loc) · 2.58 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Jun 17 16:20:05 2017
@author: farismismar
"""
import random
import numpy as np
from collections import deque
#import matplotlib.pyplot as plt
#from matplotlib import rc
#import matplotlib.ticker as tick
memory = deque(maxlen=2000)
from environment import SON_environment
#from QLearningAgent import QLearningAgent as QLearner
from DQNLearningAgent import DQNLearningAgent as QLearner
seed = 0 # change in Top_File.m also
random.seed(seed)
np.random.seed(seed)
batch_size = 12
state_count = 3
action_count_b = 6
env = None
agent = None
# This is the entry point to the simulation
def env_reset_wrapper():
random.seed(seed)
np.random.seed(seed)
global env
global agent
state = env.reset()
return state
def agent_get_exploration_rate_wrapper():
global env
global agent
return agent.exploration_rate;
def set_environment(state_size, action_size):
state_count = int(state_size)
action_count_b = int(action_size)
global env
global agent
env = SON_environment(seed=seed)
agent = QLearner(seed=seed)
def env_step_wrapper(action):
global env
global agent
return env.step(action)
def agent_act_wrapper(state):
global env
global agent
state = np.asarray(state)
return agent.act(state)
def agent_begin_episode_wrapper(state):
global env
global agent
state = np.asarray(state)
return agent.begin_episode(state)
def agent_replay_wrapper():
global env
global agent
global batch_size
global memory
[q, loss] = agent.replay(memory, batch_size)
return [q, loss]
def agent_memory_length_diff_wrapper():
global env
global agent
global batch_size
global memory
return (len(memory) - batch_size)
def agent_remember_wrapper(state, action, reward, next_state, done):
global env
global agent
global memory
state = np.asarray(state)
next_state = np.asarray(next_state)
action = np.asarray(action)
state = int(state[0])
next_state = int(next_state[0])
action = int(action[0])
reward = int(reward)
done = int(done)
#m = np.array([state, action, reward, next_state, done])
#f=open('/Users/farismismar/Desktop/memory.csv','ab')
#np.savetxt(f, m.T, fmt="%s", delimiter=",")
#f.close()
memory = agent.remember(memory, state, action, reward, next_state, done)
# Checking if the memory populated properly... yes.
# m = np.array(memory)
#f=open('/Users/farismismar/Desktop/memory.csv','ab')
# np.savetxt(f, m, fmt="%s", delimiter=",")
# f.close()