mirror of
https://github.com/aqeelanwar/DRLwithTL.git
synced 2023-04-23 23:24:24 +03:00
Initial Commit
This commit is contained in:
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
*.npy
|
||||
*.pyc
|
||||
*.xml
|
||||
.idea/
|
||||
images/
|
||||
models/
|
||||
107
Activation.py
Normal file
107
Activation.py
Normal file
@@ -0,0 +1,107 @@
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
class Activation(object):
|
||||
def forward(self, x):
|
||||
pass
|
||||
|
||||
def gradient(self, x):
|
||||
pass
|
||||
|
||||
class Sigmoid(Activation):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def forward(self, x):
|
||||
return tf.sigmoid(x)
|
||||
|
||||
def sigmoid_gradient(self, x):
|
||||
sig = tf.sigmoid(x)
|
||||
return tf.multiply(sig, tf.subtract(1.0, sig))
|
||||
|
||||
def gradient(self, x):
|
||||
return tf.multiply(x, tf.subtract(1.0, x))
|
||||
|
||||
class Relu(Activation):
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def forward(self, x):
|
||||
return tf.nn.relu(x)
|
||||
|
||||
def gradient(self, x):
|
||||
# pretty sure this gradient works for A and Z
|
||||
return tf.cast(x > 0.0, dtype=tf.float32)
|
||||
|
||||
# https://theclevermachine.wordpress.com/tag/tanh-function/
|
||||
class Tanh(Activation):
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def forward(self, x):
|
||||
return tf.tanh(x)
|
||||
|
||||
def gradient(self, x):
|
||||
# this is gradient wtf A, not Z
|
||||
return 1 - tf.pow(x, 2)
|
||||
|
||||
# https://medium.com/@aerinykim/how-to-implement-the-softmax-derivative-independently-from-any-loss-function-ae6d44363a9d
|
||||
# /home/brian/tensorflow/tensorflow/python/ops/nn_grad ... grep "_SoftmaxGrad"
|
||||
|
||||
class Softmax(Activation):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def forward(self, x):
|
||||
return tf.softmax(x)
|
||||
|
||||
# this is gradient for A
|
||||
def gradient(self, x):
|
||||
# this is impossible and not bio plausible
|
||||
assert(False)
|
||||
|
||||
flat = tf.reshape(x, [-1])
|
||||
diagflat = tf.diag(flat)
|
||||
dot = tf.matmul(flat, tf.transpose(flat))
|
||||
return diagflag - dot
|
||||
|
||||
class LeakyRelu(Activation):
|
||||
def __init__(self, leak=0.2):
|
||||
self.leak=leak
|
||||
|
||||
def forward(self, x):
|
||||
return tf.nn.leaky_relu(x, alpha=self.leak)
|
||||
|
||||
def gradient(self, x):
|
||||
# pretty sure this gradient works for A and Z
|
||||
return tf.add(tf.cast(x > 0.0, dtype=tf.float32), tf.cast(x < 0.0, dtype=tf.float32) * self.leak)
|
||||
|
||||
class SqrtRelu(Activation):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def forward(self, x):
|
||||
return tf.sqrt(tf.nn.relu(x))
|
||||
|
||||
def gradient(self, x):
|
||||
# pretty sure this gradient works for A and Z
|
||||
return tf.cast(x > 0.0, dtype=tf.float32)
|
||||
|
||||
class Linear(Activation):
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def forward(self, x):
|
||||
return x
|
||||
|
||||
def gradient(self, x):
|
||||
return tf.ones(shape=tf.shape(x))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 Anwar, Malik Aqeel
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
54
Layer.py
Normal file
54
Layer.py
Normal file
@@ -0,0 +1,54 @@
|
||||
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
class Layer:
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
###################################################################
|
||||
|
||||
def get_weights(self):
|
||||
pass
|
||||
|
||||
def num_params(self):
|
||||
pass
|
||||
|
||||
def forward(self, X):
|
||||
pass
|
||||
|
||||
###################################################################
|
||||
|
||||
def backward(self, AI, AO, DO):
|
||||
pass
|
||||
|
||||
def gv(self, AI, AO, DO):
|
||||
pass
|
||||
|
||||
def train(self, AI, AO, DO):
|
||||
pass
|
||||
|
||||
###################################################################
|
||||
|
||||
def dfa_backward(self, AI, AO, E, DO):
|
||||
pass
|
||||
|
||||
def dfa_gv(self, AI, AO, E, DO):
|
||||
pass
|
||||
|
||||
def dfa(self, AI, AO, E, DO):
|
||||
pass
|
||||
|
||||
###################################################################
|
||||
|
||||
def lel_backward(self, AI, AO, E, DO, Y):
|
||||
assert(False)
|
||||
|
||||
def lel_gv(self, AI, AO, E, DO, Y):
|
||||
assert(False)
|
||||
|
||||
def lel(self, AI, AO, E, DO, Y):
|
||||
assert(False)
|
||||
|
||||
###################################################################
|
||||
72
MaxPool.py
Normal file
72
MaxPool.py
Normal file
@@ -0,0 +1,72 @@
|
||||
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import math
|
||||
from tensorflow.python.ops import gen_nn_ops
|
||||
# return gen_nn_ops.max_pool_v2(value=X, ksize=self.size, strides=self.strides, padding="SAME")
|
||||
|
||||
from Layer import Layer
|
||||
from Activation import Activation
|
||||
from Activation import Sigmoid
|
||||
|
||||
class MaxPool(Layer):
|
||||
def __init__(self, size, ksize, strides, padding):
|
||||
self.size = size
|
||||
self.ksize = ksize
|
||||
self.strides = strides
|
||||
self.padding = padding
|
||||
|
||||
###################################################################
|
||||
|
||||
def get_weights(self):
|
||||
return []
|
||||
|
||||
def num_params(self):
|
||||
return 0
|
||||
|
||||
def forward(self, X):
|
||||
Z = tf.nn.max_pool(X, ksize=self.ksize, strides=self.strides, padding=self.padding)
|
||||
# Z = tf.Print(Z, [Z], message="", summarize=1000)
|
||||
return Z
|
||||
|
||||
###################################################################
|
||||
|
||||
def backward(self, AI, AO, DO):
|
||||
grad = gen_nn_ops.max_pool_grad(grad=DO, orig_input=AI, orig_output=AO, ksize=self.ksize, strides=self.strides, padding=self.padding)
|
||||
return grad
|
||||
|
||||
def gv(self, AI, AO, DO):
|
||||
return []
|
||||
|
||||
def train(self, AI, AO, DO):
|
||||
return []
|
||||
|
||||
###################################################################
|
||||
|
||||
def dfa_backward(self, AI, AO, E, DO):
|
||||
grad = gen_nn_ops.max_pool_grad(grad=DO, orig_input=AI, orig_output=AO, ksize=self.ksize, strides=self.strides, padding=self.padding)
|
||||
# grad = tf.Print(grad, [tf.shape(grad), tf.count_nonzero(tf.equal(grad, 1)), tf.count_nonzero(tf.equal(grad, 2)), tf.count_nonzero(tf.equal(grad, 3)), tf.count_nonzero(tf.equal(grad, 4)), tf.count_nonzero(tf.equal(grad, 5))], message="", summarize=1000)
|
||||
return grad
|
||||
|
||||
def dfa_gv(self, AI, AO, E, DO):
|
||||
return []
|
||||
|
||||
def dfa(self, AI, AO, E, DO):
|
||||
return []
|
||||
|
||||
###################################################################
|
||||
|
||||
def lel_backward(self, AI, AO, E, DO, Y):
|
||||
grad = gen_nn_ops.max_pool_grad(grad=DO, orig_input=AI, orig_output=AO, ksize=self.ksize, strides=self.strides, padding=self.padding)
|
||||
# grad = tf.Print(grad, [tf.shape(grad), tf.count_nonzero(tf.equal(grad, 1)), tf.count_nonzero(tf.equal(grad, 2)), tf.count_nonzero(tf.equal(grad, 3)), tf.count_nonzero(tf.equal(grad, 4)), tf.count_nonzero(tf.equal(grad, 5))], message="", summarize=1000)
|
||||
return grad
|
||||
|
||||
def lel_gv(self, AI, AO, E, DO, Y):
|
||||
return []
|
||||
|
||||
def lel(self, AI, AO, E, DO, Y):
|
||||
return []
|
||||
|
||||
###################################################################
|
||||
|
||||
|
||||
249
aux_functions.py
Normal file
249
aux_functions.py
Normal file
@@ -0,0 +1,249 @@
|
||||
# Author: Aqeel Anwar(ICSRL)
|
||||
# Created: 10/14/2019, 12:50 PM
|
||||
# Email: aqeel.anwar@gatech.edu
|
||||
import numpy as np
|
||||
import math
|
||||
import random
|
||||
import time
|
||||
import airsim
|
||||
import pygame
|
||||
from configs.read_cfg import read_cfg
|
||||
|
||||
def translate_action(action, num_actions):
|
||||
action_word = ['Forward', 'Right', 'Left', 'Sharp Right', 'Sharp Left']
|
||||
sqrt_num_actions = np.sqrt(num_actions)
|
||||
ind = np.arange(sqrt_num_actions)
|
||||
if sqrt_num_actions % 2 == 0:
|
||||
v_string = list('U'*int(sqrt_num_actions/2) + 'F'+ 'D'*int(sqrt_num_actions/2))
|
||||
h_string = list('L' * int(sqrt_num_actions/2) + 'F' + 'R' * int(sqrt_num_actions/2))
|
||||
else:
|
||||
v_string = list('U' * int((sqrt_num_actions-1)/2) + 'D' * int((sqrt_num_actions-1)/2))
|
||||
h_string = list('L' * int((sqrt_num_actions-1)/2) + 'R' * int((sqrt_num_actions-1)/2))
|
||||
|
||||
|
||||
v_ind = int(action[0]/sqrt_num_actions)
|
||||
h_ind = int(action[0]%sqrt_num_actions)
|
||||
action_word = v_string[v_ind] + str(int(np.ceil(abs((sqrt_num_actions-1)/2-v_ind)))) + '-' + h_string[h_ind]+str(int(np.ceil(abs((sqrt_num_actions-1)/2-h_ind))))
|
||||
|
||||
return action_word
|
||||
|
||||
def get_errors(data_tuple, choose, ReplayMemory, input_size, agent, target_agent, gamma, Q_clip):
|
||||
|
||||
_, Q_target, _, err, _ = minibatch_double(data_tuple, len(data_tuple), choose, ReplayMemory, input_size, agent, target_agent, gamma, Q_clip)
|
||||
|
||||
return err
|
||||
|
||||
|
||||
def minibatch_double(data_tuple, batch_size, choose, ReplayMemory, input_size, agent, target_agent, gamma, Q_clip):
|
||||
# Needs NOT to be in DeepAgent
|
||||
# NO TD error term, and using huber loss instead
|
||||
# Bellman Optimality equation update, with less computation, updated
|
||||
|
||||
if batch_size==1:
|
||||
train_batch = data_tuple
|
||||
idx=None
|
||||
else:
|
||||
batch = ReplayMemory.sample(batch_size)
|
||||
train_batch = np.array([b[1][0] for b in batch])
|
||||
idx = [b[0] for b in batch]
|
||||
|
||||
|
||||
actions = np.zeros(shape=(batch_size), dtype=int)
|
||||
crashes = np.zeros(shape=(batch_size))
|
||||
rewards = np.zeros(shape=batch_size)
|
||||
curr_states = np.zeros(shape=(batch_size, input_size, input_size, 3))
|
||||
new_states = np.zeros(shape=(batch_size, input_size, input_size, 3))
|
||||
for ii, m in enumerate(train_batch):
|
||||
curr_state_m, action_m, new_state_m, reward_m, crash_m = m
|
||||
curr_states[ii, :, :, :] = curr_state_m[...]
|
||||
actions[ii] = action_m
|
||||
new_states[ii,:,:,:] = new_state_m
|
||||
rewards[ii] = reward_m
|
||||
crashes[ii] = crash_m
|
||||
|
||||
#
|
||||
# oldQval = np.zeros(shape = [batch_size, num_actions])
|
||||
if choose:
|
||||
oldQval_A = target_agent.Q_val(curr_states)
|
||||
newQval_A = target_agent.Q_val(new_states)
|
||||
newQval_B = agent.Q_val(new_states)
|
||||
else:
|
||||
oldQval_A = agent.Q_val(curr_states)
|
||||
newQval_A = agent.Q_val(new_states)
|
||||
newQval_B = target_agent.Q_val(new_states)
|
||||
|
||||
|
||||
TD = np.zeros(shape=[batch_size])
|
||||
err = np.zeros(shape=[batch_size])
|
||||
Q_target = np.zeros(shape=[batch_size])
|
||||
|
||||
term_ind = np.where(rewards==-1)[0]
|
||||
nonterm_ind = np.where(rewards!=-1)[0]
|
||||
|
||||
TD[nonterm_ind] = rewards[nonterm_ind] + gamma* newQval_B[nonterm_ind, np.argmax(newQval_A[nonterm_ind], axis=1)] - oldQval_A[nonterm_ind, actions[nonterm_ind].astype(int)]
|
||||
TD[term_ind] = rewards[term_ind]
|
||||
|
||||
if Q_clip:
|
||||
TD_clip = np.clip(TD, -1, 1)
|
||||
else:
|
||||
TD_clip = TD
|
||||
|
||||
Q_target[nonterm_ind] = oldQval_A[nonterm_ind, actions[nonterm_ind].astype(int)] + TD_clip[nonterm_ind]
|
||||
Q_target[term_ind] = TD_clip[term_ind]
|
||||
|
||||
err=abs(TD) # or abs(TD_clip)
|
||||
return curr_states, Q_target, actions, err, idx
|
||||
|
||||
|
||||
def policy(epsilon,curr_state, iter, b, epsilon_model, wait_before_train, num_actions, agent):
|
||||
qvals=[]
|
||||
epsilon_ceil=0.95
|
||||
if epsilon_model=='linear':
|
||||
epsilon = epsilon_ceil* (iter-wait_before_train) / (b-wait_before_train)
|
||||
if epsilon > epsilon_ceil:
|
||||
epsilon = epsilon_ceil
|
||||
|
||||
elif epsilon_model=='exponential':
|
||||
epsilon = 1- math.exp(-2/(b-wait_before_train) * (iter-wait_before_train) )
|
||||
if epsilon > epsilon_ceil:
|
||||
epsilon = epsilon_ceil
|
||||
|
||||
if random.random() > epsilon:
|
||||
sss =curr_state.shape
|
||||
action = np.random.randint(0, num_actions, size = sss[0], dtype=np.int32)
|
||||
action_type = 'Rand'
|
||||
else:
|
||||
# Use NN to predict action
|
||||
action = agent.action_selection(curr_state)
|
||||
action_type = 'Pred'
|
||||
# print(action_array/(np.mean(action_array)))
|
||||
return action, action_type, epsilon, qvals
|
||||
|
||||
def reset_to_initial(level, reset_array, client):
|
||||
reset_pos = reset_array[level]
|
||||
client.simSetVehiclePose(reset_pos, ignore_collison=True)
|
||||
time.sleep(0.1)
|
||||
|
||||
|
||||
def connect_drone():
|
||||
print('------------------------------ Drone ------------------------------')
|
||||
client = airsim.MultirotorClient(timeout_value=10)
|
||||
client.confirmConnection()
|
||||
old_posit = client.simGetVehiclePose()
|
||||
client.simSetVehiclePose(
|
||||
airsim.Pose(airsim.Vector3r(0, 0, -2.2), old_posit.orientation),
|
||||
ignore_collison=True)
|
||||
|
||||
return client, old_posit
|
||||
|
||||
def blit_text(surface, text, pos, font, color=pygame.Color('black')):
|
||||
words = [word.split(' ') for word in text.splitlines()] # 2D array where each row is a list of words.
|
||||
space = font.size(' ')[0] # The width of a space.
|
||||
max_width, max_height = surface.get_size()
|
||||
x, y = pos
|
||||
for line in words:
|
||||
for word in line:
|
||||
word_surface = font.render(word, 0, color)
|
||||
word_width, word_height = word_surface.get_size()
|
||||
if x + word_width >= max_width:
|
||||
x = pos[0] # Reset the x.
|
||||
y += word_height # Start on new row.
|
||||
surface.blit(word_surface, (x, y))
|
||||
x += word_width + space
|
||||
x = pos[0] # Reset the x.
|
||||
y += word_height # Start on new row.
|
||||
|
||||
def pygame_connect(H, W):
|
||||
pygame.init()
|
||||
screen_width = H
|
||||
screen_height = W
|
||||
screen = pygame.display.set_mode([screen_width, screen_height])
|
||||
carImg = pygame.image.load('images/keys.png')
|
||||
screen.blit(carImg, (0, 0))
|
||||
pygame.display.set_caption('DLwithTL')
|
||||
# screen.fill((21, 116, 163))
|
||||
# text = 'Supported Keys:\n'
|
||||
# font = pygame.font.SysFont('arial', 32)
|
||||
# blit_text(screen, text, (20, 20), font, color = (214, 169, 19))
|
||||
# pygame.display.update()
|
||||
#
|
||||
# font = pygame.font.SysFont('arial', 24)
|
||||
# text = 'R - Reconnect unreal\nbackspace - Pause/play\nL - Update configurations\nEnter - Save Network'
|
||||
# blit_text(screen, text, (20, 70), font, color=(214, 169, 19))
|
||||
pygame.display.update()
|
||||
|
||||
return screen
|
||||
|
||||
def check_user_input(active, automate, lr, epsilon, agent, network_path, client, old_posit, initZ):
|
||||
for event in pygame.event.get():
|
||||
|
||||
if event.type == pygame.QUIT:
|
||||
active = False
|
||||
pygame.quit()
|
||||
|
||||
if event.type == pygame.KEYDOWN:
|
||||
if event.key == pygame.K_l:
|
||||
# Load the parameters - epsilon
|
||||
cfg = read_cfg(config_filename='configs/config.cfg', verbose=False)
|
||||
lr = cfg.lr
|
||||
print('Updated Parameters')
|
||||
print('Learning Rate: ', cfg.lr)
|
||||
|
||||
if event.key == pygame.K_RETURN:
|
||||
# take_action(-1)
|
||||
automate = False
|
||||
print('Saving Model')
|
||||
# agent.save_network(iter, save_path, ' ')
|
||||
agent.save_network(network_path)
|
||||
# agent.save_data(iter, data_tuple, tuple_path)
|
||||
print('Model Saved: ', network_path)
|
||||
|
||||
|
||||
if event.key == pygame.K_BACKSPACE:
|
||||
automate = automate ^ True
|
||||
|
||||
if event.key == pygame.K_r:
|
||||
# reconnect
|
||||
client = []
|
||||
client = airsim.MultirotorClient()
|
||||
client.confirmConnection()
|
||||
# posit1_old = client.simGetVehiclePose()
|
||||
client.simSetVehiclePose(old_posit,
|
||||
ignore_collison=True)
|
||||
agent.client = client
|
||||
|
||||
if event.key == pygame.K_m:
|
||||
agent.get_state()
|
||||
print('got_state')
|
||||
# automate = automate ^ True
|
||||
|
||||
# Set the routine for manual control if not automate
|
||||
|
||||
if not automate:
|
||||
# print('manual')
|
||||
# action=[-1]
|
||||
if event.key == pygame.K_UP:
|
||||
action = 0
|
||||
elif event.key == pygame.K_RIGHT:
|
||||
action = 1
|
||||
elif event.key == pygame.K_LEFT:
|
||||
action = 2
|
||||
elif event.key == pygame.K_d:
|
||||
action = 3
|
||||
elif event.key == pygame.K_a:
|
||||
action = 4
|
||||
elif event.key == pygame.K_DOWN:
|
||||
action = -2
|
||||
elif event.key == pygame.K_y:
|
||||
pos = client.getPosition()
|
||||
|
||||
client.moveToPosition(pos.x_val, pos.y_val, 3 * initZ, 1)
|
||||
time.sleep(0.5)
|
||||
elif event.key == pygame.K_h:
|
||||
client.reset()
|
||||
# agent.take_action(action)
|
||||
|
||||
return active, automate, lr, client
|
||||
|
||||
|
||||
|
||||
28
configs/config.cfg
Normal file
28
configs/config.cfg
Normal file
@@ -0,0 +1,28 @@
|
||||
[general_params]
|
||||
run_name: Tello_indoor
|
||||
custom_load: False
|
||||
custom_load_path: DeepNet/models/e2e/e2e
|
||||
env_type: Indoor
|
||||
env_name: indoor_long
|
||||
|
||||
[simulation_params]
|
||||
load_data: False
|
||||
load_data_path: DeepNet/models/Tello_indoor/VanLeer/
|
||||
|
||||
[RL_params]
|
||||
num_actions: 400
|
||||
train_type: e2e
|
||||
wait_before_train: 100
|
||||
max_iters: 300000
|
||||
buffer_len: 60000
|
||||
batch_size: 32
|
||||
epsilon_saturation: 200000
|
||||
crash_thresh: 1.3
|
||||
Q_clip: True
|
||||
train_interval: 3
|
||||
update_target_interval: 8000
|
||||
gamma: 0.99
|
||||
dropout_rate: 0.1
|
||||
learning_rate: 1e-3
|
||||
switch_env_steps: 3000
|
||||
epsilon_model: exponential
|
||||
59
configs/read_cfg.py
Normal file
59
configs/read_cfg.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# Author: Aqeel Anwar(ICSRL)
|
||||
# Created: 9/20/2019, 12:43 PM
|
||||
# Email: aqeel.anwar@gatech.edu
|
||||
|
||||
import configparser as cp
|
||||
from dotmap import DotMap
|
||||
|
||||
def read_cfg(config_filename = 'configs/main.cfg', verbose = False):
|
||||
# Load from config file
|
||||
cfg = DotMap()
|
||||
|
||||
config = cp.ConfigParser()
|
||||
config.read(config_filename)
|
||||
|
||||
cfg.run_name = config.get('general_params', 'run_name')
|
||||
if str(config.get('general_params', 'custom_load')) =='True':
|
||||
cfg.custom_load = True
|
||||
else:
|
||||
cfg.custom_load = False
|
||||
cfg.custom_load_path = str(config.get('general_params', 'custom_load_path'))
|
||||
cfg.env_type = config.get('general_params', 'env_type')
|
||||
cfg.env_name = config.get('general_params', 'env_name')
|
||||
|
||||
# [Simulation Parameters]
|
||||
if str(config.get('simulation_params', 'load_data')) =='True':
|
||||
cfg.load_data = True
|
||||
else:
|
||||
cfg.load_data = False
|
||||
cfg.load_data_path = str(config.get('simulation_params', 'load_data_path'))
|
||||
|
||||
# [RL Parameters]
|
||||
cfg.num_actions = int(config.get('RL_params', 'num_actions').split(',')[0])
|
||||
cfg.train_type = config.get('RL_params', 'train_type')
|
||||
cfg.wait_before_train = int(config.get('RL_params', 'wait_before_train').split(',')[0])
|
||||
cfg.max_iters = int(config.get('RL_params', 'max_iters').split(',')[0])
|
||||
cfg.buffer_len = int(config.get('RL_params', 'buffer_len').split(',')[0])
|
||||
cfg.batch_size = int(config.get('RL_params', 'batch_size').split(',')[0])
|
||||
cfg.epsilon_saturation = int(config.get('RL_params', 'epsilon_saturation').split(',')[0])
|
||||
cfg.crash_thresh = float(config.get('RL_params', 'crash_thresh').split(',')[0])
|
||||
cfg.gamma = float(config.get('RL_params', 'gamma').split(',')[0])
|
||||
cfg.dropout_rate = float(config.get('RL_params', 'dropout_rate').split(',')[0])
|
||||
cfg.lr = float(config.get('RL_params', 'learning_rate').split(',')[0])
|
||||
cfg.switch_env_steps = int(config.get('RL_params', 'switch_env_steps').split(',')[0])
|
||||
cfg.epsilon_model = config.get('RL_params', 'epsilon_model')
|
||||
cfg.Q_clip = bool(config.get('RL_params', 'Q_clip'))
|
||||
cfg.train_interval = int(config.get('RL_params', 'train_interval').split(',')[0])
|
||||
cfg.update_target_interval = int(config.get('RL_params', 'update_target_interval').split(',')[0])
|
||||
|
||||
|
||||
if verbose:
|
||||
print('------------------------------ Config File ------------------------------')
|
||||
for param in cfg:
|
||||
spaces = ' '*(30-len(param))
|
||||
print(param+':'+spaces + str(cfg[param]))
|
||||
|
||||
# print('-------------------------------------------------------------------------')
|
||||
print()
|
||||
return cfg
|
||||
|
||||
149
environments/initial_positions.py
Normal file
149
environments/initial_positions.py
Normal file
@@ -0,0 +1,149 @@
|
||||
import numpy as np
|
||||
import airsim
|
||||
|
||||
def indoor_meta():
|
||||
|
||||
orig_ip = [ #x, y, theta in DEGREES
|
||||
|
||||
# One - Pyramid
|
||||
[-21593, -1563, -45], # Player Start
|
||||
[-22059, -2617, -45],
|
||||
[-22800, -3489, 90],
|
||||
|
||||
# Two - FrogEyes
|
||||
[-15744, -1679, 0],
|
||||
[-15539, -3043, 180],
|
||||
[-13792, -3371, 90],
|
||||
|
||||
# Three - UpDown
|
||||
[-11221, -3171, 180],
|
||||
[-9962, -3193, 0],
|
||||
[-7464, -4558, 90],
|
||||
|
||||
# Four - Long
|
||||
[-649, -4287, 180], # Player Start
|
||||
[-4224, -2601, 180],
|
||||
[1180, -2153, -90],
|
||||
|
||||
# Five - VanLeer
|
||||
[6400, -4731, 90], # Player Start
|
||||
[5992, -2736, 180],
|
||||
[8143, -2835, -90],
|
||||
|
||||
# Six - Complex_Indoor
|
||||
[11320, -2948, 0],
|
||||
[12546, -3415, -180],
|
||||
[10809, -2106, 0],
|
||||
|
||||
# Seven - Techno
|
||||
[19081, -8867, 0],
|
||||
[17348, -3864, -120],
|
||||
[20895, -4757, 30],
|
||||
|
||||
# Eight - GT
|
||||
[26042, -4336, 180],
|
||||
[26668, -3070, 0],
|
||||
[27873, -2792, -135]
|
||||
|
||||
|
||||
|
||||
]# x, y, theta
|
||||
level_name = [
|
||||
'Pyramid1', 'Pyramid2', 'Pyramid3',
|
||||
'FrogEyes1', 'FrogEyes2', 'FrogEyes3',
|
||||
'UpDown1', 'UpDown2', 'UpDown3',
|
||||
'Long1', 'Long2', 'Long3',
|
||||
'VanLeer1', 'VanLeer2', 'VanLeer3',
|
||||
'ComplexIndoor1', 'ComplexIndoor2', 'ComplexIndoor3',
|
||||
'Techno1', 'Techno2', 'Techno3',
|
||||
'GT1', 'GT2', 'GT3',
|
||||
]
|
||||
crash_threshold = 0.07
|
||||
initZ = -1
|
||||
return orig_ip, level_name, crash_threshold, initZ
|
||||
|
||||
# Test condo indoor initial positions
|
||||
|
||||
def indoor_condo():
|
||||
orig_ip = [
|
||||
[-290, -1700, 0], # Player start
|
||||
[580, 1200, 180],
|
||||
[-240, -500, 90]
|
||||
]
|
||||
level_name = ['Condo1', 'Condo2', 'Condo3']
|
||||
crash_threshold = 0.07
|
||||
initZ = -2
|
||||
return orig_ip, level_name, crash_threshold, initZ
|
||||
|
||||
def indoor_techno():
|
||||
orig_ip = [
|
||||
[19081, -8867, 0],
|
||||
[17348, -3864, -120],
|
||||
[20895, -4757, 30]
|
||||
]
|
||||
level_name = ['Techno1', 'Techno2', 'Techno3']
|
||||
crash_threshold = 0.07
|
||||
initZ = -2
|
||||
return orig_ip, level_name, crash_threshold, initZ
|
||||
|
||||
def indoor_long():
|
||||
orig_ip = [
|
||||
[-649, -4287, 180], # Player Start
|
||||
[-4224, -2601, 180],
|
||||
[1180, -2153, -90],
|
||||
[2058, -3184, 50],
|
||||
[1644,-1464, 15],
|
||||
[-3754, -4302, 0]
|
||||
]
|
||||
level_name = ['Long1', 'Long2', 'Long3', 'Long4', 'Long5', 'Long6']
|
||||
crash_threshold = 0.07
|
||||
initZ = -1
|
||||
return orig_ip, level_name, crash_threshold, initZ
|
||||
|
||||
# Four - Long
|
||||
|
||||
# Seven - Techno
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def indoor_twisty():
|
||||
orig_ip = [
|
||||
[127, -770, 90], # Player start
|
||||
[2209, -115, 180],
|
||||
[1680, 1590, -145]
|
||||
]
|
||||
level_name = ['Twisty1', 'Twisty2', 'Twisty3']
|
||||
crash_threshold = 0.07
|
||||
initZ = -2
|
||||
return orig_ip, level_name, crash_threshold, initZ
|
||||
|
||||
def indoor_pretzel():
|
||||
orig_ip = [
|
||||
[3308, 650, 180], # Player start
|
||||
[3330, -200, -160],
|
||||
[1480, -1040, 25]
|
||||
]
|
||||
level_name = ['Cloud1', 'Cloud2', 'Cloud3']
|
||||
crash_threshold = 0.07
|
||||
initZ = -1.5
|
||||
return orig_ip, level_name, crash_threshold, initZ
|
||||
|
||||
def initial_positions(name):
|
||||
name = name+'()'
|
||||
orig_ip, level_name, crash_threshold, initZ = eval(name)
|
||||
player_start_unreal=orig_ip[0]
|
||||
reset_array = []
|
||||
|
||||
for i in range(0, len(orig_ip)):
|
||||
x1 = (orig_ip[i][0]-player_start_unreal[0])/100
|
||||
y1 = (orig_ip[i][1]-player_start_unreal[1])/100
|
||||
z1 = initZ
|
||||
pitch = 0
|
||||
roll = 0
|
||||
yaw = orig_ip[i][2]*np.pi/180
|
||||
pp = airsim.Pose(airsim.Vector3r(x1, y1, z1), airsim.to_quaternion(pitch, roll, yaw))
|
||||
reset_array.append(pp)
|
||||
|
||||
return reset_array, level_name, crash_threshold, initZ
|
||||
282
main.py
Normal file
282
main.py
Normal file
@@ -0,0 +1,282 @@
|
||||
# Branch - DFA Implementation
|
||||
import sys
|
||||
from network.agent import DeepAgent
|
||||
from environments.initial_positions import *
|
||||
import os
|
||||
import psutil
|
||||
from os import getpid
|
||||
from network.Memory import Memory
|
||||
from aux_functions import *
|
||||
from configs.read_cfg import read_cfg
|
||||
|
||||
# Debug message suppressed
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
|
||||
process = psutil.Process(getpid())
|
||||
|
||||
cfg = read_cfg(config_filename='configs/config.cfg', verbose=True)
|
||||
|
||||
|
||||
input_size = 227
|
||||
num_actions = cfg.num_actions
|
||||
|
||||
screen = pygame_connect(H=925, W=380)
|
||||
|
||||
dropout_rate = cfg.dropout
|
||||
env_type=cfg.env_type
|
||||
env_name = cfg.env_name
|
||||
train_type = cfg.train_type # 'e2e', 'last4', 'last3', 'last2'
|
||||
weights_type = 'Imagenet'
|
||||
|
||||
reset_array, level_name, crash_threshold, initZ = initial_positions(env_name)
|
||||
|
||||
epsilon_annealing = cfg.epsilon_saturation
|
||||
wait_before_train = cfg.wait_before_train
|
||||
train_interval=cfg.train_interval
|
||||
max_iters = cfg.max_iters
|
||||
gamma = cfg.gamma
|
||||
update_target = cfg.update_target_interval
|
||||
buffer_length = cfg.buffer_len
|
||||
ReplayMemory = Memory(cfg.buffer_len)
|
||||
switch_env_steps = cfg.switch_env_steps
|
||||
batch_size=cfg.batch_size
|
||||
Q_clip=cfg.Q_clip
|
||||
custom_load=cfg.custom_load
|
||||
custom_load_path = cfg.custom_load_path
|
||||
lr = cfg.lr
|
||||
epsilon = cfg.epsilon
|
||||
epsilon_model = cfg.epsilon_model
|
||||
|
||||
# Save the network to the directory network_path
|
||||
if custom_load == True:
|
||||
network_path = 'models/trained/' + env_type + '/' + env_name + '/' + 'CustomLoad/' + train_type + '/'+ train_type
|
||||
else:
|
||||
network_path = 'models/trained/' + '/' + env_type + '/' + env_name + '/' + weights_type + '/' + train_type + '/'+ train_type
|
||||
|
||||
if not os.path.exists(network_path):
|
||||
os.makedirs(network_path)
|
||||
|
||||
# Connect to Unreal Engine and get the drone handle: client
|
||||
client, old_posit = connect_drone()
|
||||
|
||||
# Define DQN agents
|
||||
agent = DeepAgent(input_size, num_actions, client, env_type,train_type,network_path, name='DQN')
|
||||
target_agent = DeepAgent(input_size, num_actions, client, env_type,train_type, network_path, name='Target')
|
||||
|
||||
# Load custom weights from custom_load_path if required
|
||||
if custom_load==True:
|
||||
print('Loading weights from: ', custom_load_path)
|
||||
agent.load_network(custom_load_path)
|
||||
target_agent.load_network(custom_load_path)
|
||||
|
||||
|
||||
|
||||
iter = 0
|
||||
num_col1 = 0
|
||||
epi1 = 0
|
||||
|
||||
active = True
|
||||
action_type = 'Wait_for_expert'
|
||||
|
||||
automate = True
|
||||
epsilon_greedy = True
|
||||
choose=False
|
||||
print_qval=False
|
||||
last_crash1=0
|
||||
environ=True
|
||||
e1 =0
|
||||
e2 = 0
|
||||
ret = 0
|
||||
dist = 0
|
||||
switch_env=False
|
||||
|
||||
save_posit = old_posit
|
||||
|
||||
level_state = [None]*len(level_name)
|
||||
level_posit = [None]*len(level_name)
|
||||
last_crash_array = np.zeros(shape=len(level_name), dtype=np.int32)
|
||||
ret_array = np.zeros(shape=len(level_name))
|
||||
dist_array = np.zeros(shape=len(level_name))
|
||||
epi_env_array = np.zeros(shape=len(level_name), dtype=np.int32)
|
||||
|
||||
level = 0
|
||||
times_switch = 0
|
||||
curr_state1 = agent.get_state()
|
||||
|
||||
i = 0
|
||||
log_path = network_path+'log.txt'
|
||||
f = open(log_path, 'w')
|
||||
|
||||
|
||||
while active:
|
||||
try:
|
||||
|
||||
active, automate, lr, client = check_user_input(active, automate, lr, epsilon, agent, network_path, client, old_posit, initZ)
|
||||
|
||||
if automate:
|
||||
start_time = time.time()
|
||||
if switch_env:
|
||||
posit1_old = client.simGetVehiclePose()
|
||||
times_switch=times_switch+1
|
||||
level_state[level] = curr_state1
|
||||
level_posit[level] = posit1_old
|
||||
last_crash_array[level] = last_crash1
|
||||
ret_array[level] = ret
|
||||
dist_array[level] = dist
|
||||
epi_env_array[int(level/3)] = epi1
|
||||
|
||||
level = (level + 1) % len(reset_array)
|
||||
|
||||
print('Transferring to level: ', level ,' - ', level_name[level])
|
||||
|
||||
if times_switch < len(reset_array):
|
||||
reset_to_initial(level, reset_array, client)
|
||||
else:
|
||||
curr_state1 = level_state[level]
|
||||
posit1_old = level_posit[level]
|
||||
|
||||
reset_to_initial(level, reset_array, client)
|
||||
client.simSetVehiclePose(posit1_old, ignore_collison=True)
|
||||
time.sleep(0.1)
|
||||
|
||||
last_crash1 = last_crash_array[level]
|
||||
ret = ret_array[level]
|
||||
dist = dist_array[level]
|
||||
epi1 = epi_env_array[int(level/3)]
|
||||
xxx = client.simGetVehiclePose()
|
||||
environ = environ^True
|
||||
|
||||
|
||||
action1, action_type1, epsilon, qvals = policy(epsilon, curr_state1, iter, epsilon_annealing, epsilon_model, wait_before_train, num_actions, agent)
|
||||
|
||||
action_word1 = translate_action(action1, num_actions)
|
||||
# Take the action
|
||||
agent.take_action(action1, num_actions)
|
||||
time.sleep(0.05)
|
||||
|
||||
posit = client.simGetVehiclePose()
|
||||
|
||||
new_state1 = agent.get_state()
|
||||
new_depth1, thresh = agent.get_depth()
|
||||
|
||||
# Get GPS information
|
||||
posit = client.simGetVehiclePose()
|
||||
orientation = posit.orientation
|
||||
position = posit.position
|
||||
old_p = np.array([old_posit.position.x_val, old_posit.position.y_val])
|
||||
new_p = np.array([position.x_val, position.y_val])
|
||||
# calculate distance
|
||||
dist = dist + np.linalg.norm(new_p - old_p)
|
||||
old_posit = posit
|
||||
|
||||
reward1, crash1 = agent.reward_gen(new_depth1, action1, crash_threshold, thresh)
|
||||
|
||||
ret = ret+reward1
|
||||
agent_state1 = agent.GetAgentState()
|
||||
|
||||
if agent_state1.has_collided:
|
||||
# if car_state.collision.object_id==77:
|
||||
|
||||
num_col1 = num_col1 + 1
|
||||
print('crash')
|
||||
crash1 = True
|
||||
reward1 = -1
|
||||
data_tuple=[]
|
||||
data_tuple.append([curr_state1, action1, new_state1, reward1, crash1])
|
||||
err = get_errors(data_tuple, choose, ReplayMemory, input_size, agent, target_agent, gamma, Q_clip)
|
||||
ReplayMemory.add(err, data_tuple)
|
||||
|
||||
# Train if sufficient frames have been stored
|
||||
if iter > wait_before_train:
|
||||
if iter%train_interval==0:
|
||||
# Train the RL network
|
||||
old_states, Qvals, actions, err, idx = minibatch_double(data_tuple, batch_size, choose, ReplayMemory, input_size, agent, target_agent, gamma, Q_clip)
|
||||
|
||||
for i in range(batch_size):
|
||||
ReplayMemory.update(idx[i], err[i])
|
||||
|
||||
if print_qval:
|
||||
print(Qvals)
|
||||
|
||||
if choose:
|
||||
# Double-DQN
|
||||
target_agent.train_n(old_states, Qvals, actions, batch_size, dropout_rate, lr, epsilon, iter)
|
||||
else:
|
||||
agent.train_n(old_states, Qvals,actions, batch_size, dropout_rate, lr, epsilon, iter)
|
||||
|
||||
if iter % update_target == 0:
|
||||
agent.take_action([-1], num_actions)
|
||||
print('Switching Target Network')
|
||||
choose = not choose
|
||||
agent.save_network(network_path)
|
||||
|
||||
iter += 1
|
||||
|
||||
time_exec = time.time()-start_time
|
||||
VC = ''
|
||||
if environ:
|
||||
e1 = e1+1
|
||||
e_print=e1
|
||||
else:
|
||||
e2 = e2+1
|
||||
e_print = e2
|
||||
# init_p = epi1%len(init_pose_array)
|
||||
mem_percent = process.memory_info()[0]/2.**30
|
||||
|
||||
s_log = 'Level :{:>2d}: Iter: {:>6d}/{:<5d} {:<8s}-{:>5s} Eps: {:<1.4f} lr: {:>1.8f} Ret = {:<+6.4f} Last Crash = {:<5d} t={:<1.3f} Mem = {:<5.4f} Reward: {:<+1.4f} '.format(
|
||||
int(level),iter, epi1,
|
||||
action_word1,
|
||||
action_type1,
|
||||
epsilon,
|
||||
lr,
|
||||
ret,
|
||||
last_crash1,
|
||||
time_exec,
|
||||
mem_percent,
|
||||
reward1)
|
||||
|
||||
print(s_log)
|
||||
f.write(s_log+'\n')
|
||||
|
||||
last_crash1=last_crash1+1
|
||||
|
||||
if crash1:
|
||||
agent.return_plot(ret, epi1, int(level/3), mem_percent, iter, dist)
|
||||
ret=0
|
||||
dist=0
|
||||
epi1 = epi1 + 1
|
||||
last_crash1=0
|
||||
|
||||
reset_to_initial(level, reset_array, client)
|
||||
time.sleep(0.2)
|
||||
curr_state1 =agent.get_state()
|
||||
else:
|
||||
curr_state1 = new_state1
|
||||
|
||||
|
||||
if iter%switch_env_steps==0:
|
||||
switch_env=True
|
||||
else:
|
||||
switch_env=False
|
||||
|
||||
if iter% max_iters==0:
|
||||
automate=False
|
||||
|
||||
# if iter >140:
|
||||
# active=False
|
||||
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print('------------- Error -------------')
|
||||
exc_type, exc_obj, exc_tb = sys.exc_info()
|
||||
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
|
||||
print(exc_type, fname, exc_tb.tb_lineno)
|
||||
print(exc_obj)
|
||||
automate = False
|
||||
print('Hit r and then backspace to start from this point')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
36
network/Memory.py
Normal file
36
network/Memory.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# Code used from https://github.com/rlcode/per
|
||||
|
||||
from network.SumTree import SumTree
|
||||
import random
|
||||
#-------------------- MEMORY --------------------------
|
||||
class Memory: # stored as ( s, a, r, s_ ) in SumTree
|
||||
e = 0.01
|
||||
a = 0.6
|
||||
|
||||
def __init__(self, capacity):
|
||||
self.tree = SumTree(capacity)
|
||||
|
||||
def _getPriority(self, error):
|
||||
return (error + self.e) ** self.a
|
||||
|
||||
def add(self, error, sample):
|
||||
p = self._getPriority(error)
|
||||
self.tree.add(p, sample)
|
||||
|
||||
def sample(self, n):
|
||||
batch = []
|
||||
segment = self.tree.total() / n
|
||||
|
||||
for i in range(n):
|
||||
a = segment * i
|
||||
b = segment * (i + 1)
|
||||
|
||||
s = random.uniform(a, b)
|
||||
(idx, p, data) = self.tree.get(s)
|
||||
batch.append( (idx, data) )
|
||||
|
||||
return batch
|
||||
|
||||
def update(self, idx, error):
|
||||
p = self._getPriority(error)
|
||||
self.tree.update(idx, p)
|
||||
54
network/SumTree.py
Normal file
54
network/SumTree.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import numpy
|
||||
|
||||
class SumTree:
|
||||
write = 0
|
||||
|
||||
def __init__(self, capacity):
|
||||
self.capacity = capacity
|
||||
self.tree = numpy.zeros( 2*capacity - 1 )
|
||||
self.data = numpy.zeros( capacity, dtype=object )
|
||||
|
||||
def _propagate(self, idx, change):
|
||||
parent = (idx - 1) // 2
|
||||
|
||||
self.tree[parent] += change
|
||||
|
||||
if parent != 0:
|
||||
self._propagate(parent, change)
|
||||
|
||||
def _retrieve(self, idx, s):
|
||||
left = 2 * idx + 1
|
||||
right = left + 1
|
||||
|
||||
if left >= len(self.tree):
|
||||
return idx
|
||||
|
||||
if s <= self.tree[left]:
|
||||
return self._retrieve(left, s)
|
||||
else:
|
||||
return self._retrieve(right, s-self.tree[left])
|
||||
|
||||
def total(self):
|
||||
return self.tree[0]
|
||||
|
||||
def add(self, p, data):
|
||||
idx = self.write + self.capacity - 1
|
||||
|
||||
self.data[self.write] = data
|
||||
self.update(idx, p)
|
||||
|
||||
self.write += 1
|
||||
if self.write >= self.capacity:
|
||||
self.write = 0
|
||||
|
||||
def update(self, idx, p):
|
||||
change = p - self.tree[idx]
|
||||
|
||||
self.tree[idx] = p
|
||||
self._propagate(idx, change)
|
||||
|
||||
def get(self, s):
|
||||
idx = self._retrieve(0, s)
|
||||
dataIdx = idx - self.capacity + 1
|
||||
|
||||
return (idx, self.tree[idx], self.data[dataIdx])
|
||||
375
network/agent.py
Normal file
375
network/agent.py
Normal file
@@ -0,0 +1,375 @@
|
||||
import numpy as np
|
||||
import os
|
||||
import tensorflow as tf
|
||||
import cv2
|
||||
from network.network import *
|
||||
import airsim
|
||||
import random
|
||||
import matplotlib.pyplot as plt
|
||||
from util.transformations import euler_from_quaternion
|
||||
from PIL import Image
|
||||
from network.loss_functions import *
|
||||
from numpy import linalg as LA
|
||||
|
||||
class DeepAgent():
|
||||
def __init__(self, input_size, num_actions, client, env_type, train_fc, network_path, name):
|
||||
print('------------------------------ ' +str(name)+ ' ------------------------------')
|
||||
self.g = tf.Graph()
|
||||
self.iter=0
|
||||
with self.g.as_default():
|
||||
|
||||
self.stat_writer = tf.summary.FileWriter(network_path+'return_plot')
|
||||
# name_array = 'D:/train/loss'+'/'+name
|
||||
self.loss_writer = tf.summary.FileWriter(network_path+'loss/'+name)
|
||||
self.env_type=env_type
|
||||
self.client=client
|
||||
self.input_size = input_size
|
||||
self.num_actions = num_actions
|
||||
|
||||
#Placeholders
|
||||
self.batch_size = tf.placeholder(tf.int32, shape=())
|
||||
self.learning_rate = tf.placeholder(tf.float32, shape=())
|
||||
self.X1 = tf.placeholder(tf.float32, [None, input_size, input_size, 3], name='States')
|
||||
|
||||
#self.X = tf.image.resize_images(self.X1, (227, 227))
|
||||
|
||||
|
||||
self.X = tf.map_fn(lambda frame: tf.image.per_image_standardization(frame), self.X1)
|
||||
self.target = tf.placeholder(tf.float32, shape = [None], name='Qvals')
|
||||
self.actions= tf.placeholder(tf.int32, shape = [None], name='Actions')
|
||||
|
||||
initial_weights ='imagenet'
|
||||
initial_weights = 'models/weights/weights.npy'
|
||||
self.model = AlexNetDuel(self.X, num_actions, train_fc)
|
||||
|
||||
self.predict = self.model.output
|
||||
ind = tf.one_hot(self.actions, num_actions)
|
||||
pred_Q = tf.reduce_sum(tf.multiply(self.model.output, ind), axis=1)
|
||||
self.loss = huber_loss(pred_Q, self.target)
|
||||
self.train = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.99).minimize(self.loss, name="train")
|
||||
|
||||
self.sess = tf.InteractiveSession()
|
||||
tf.global_variables_initializer().run()
|
||||
tf.local_variables_initializer().run()
|
||||
self.saver = tf.train.Saver()
|
||||
|
||||
self.sess.graph.finalize()
|
||||
|
||||
print()
|
||||
|
||||
def Q_val(self, xs):
|
||||
target = np.zeros(shape=[xs.shape[0]], dtype=np.float32)
|
||||
actions = np.zeros(dtype=int, shape=[xs.shape[0]])
|
||||
return self.sess.run(self.predict,
|
||||
feed_dict={self.batch_size: xs.shape[0], self.learning_rate: 0, self.X1: xs,
|
||||
self.target: target, self.actions:actions})
|
||||
|
||||
|
||||
def train_n(self, xs, ys,actions, batch_size, dropout_rate, lr, epsilon, iter):
|
||||
# loss = self.sess.run(self.loss,
|
||||
# feed_dict={self.batch_size: batch_size, self.dropout_rate: dropout_rate, self.learning_rate: lr, self.X: xs,
|
||||
# self.Y: ys, self.actions:actions})
|
||||
_, loss, Q = self.sess.run([self.train,self.loss, self.predict],
|
||||
feed_dict={self.batch_size: batch_size, self.learning_rate: lr, self.X1: xs,
|
||||
self.target: ys, self.actions: actions})
|
||||
meanQ = np.mean(Q)
|
||||
maxQ = np.max(Q)
|
||||
|
||||
|
||||
summary = tf.Summary()
|
||||
# summary.value.add(tag='Loss', simple_value=LA.norm(loss[ind, actions.astype(int)]))
|
||||
summary.value.add(tag='Loss', simple_value=LA.norm(loss)/batch_size)
|
||||
self.loss_writer.add_summary(summary, iter)
|
||||
|
||||
summary = tf.Summary()
|
||||
summary.value.add(tag='Epsilon', simple_value=epsilon)
|
||||
self.loss_writer.add_summary(summary, iter)
|
||||
|
||||
summary = tf.Summary()
|
||||
summary.value.add(tag='Learning Rate', simple_value=lr)
|
||||
self.loss_writer.add_summary(summary, iter)
|
||||
|
||||
summary = tf.Summary()
|
||||
summary.value.add(tag='MeanQ', simple_value=meanQ)
|
||||
self.loss_writer.add_summary(summary, iter)
|
||||
|
||||
summary = tf.Summary()
|
||||
summary.value.add(tag='MaxQ', simple_value=maxQ)
|
||||
self.loss_writer.add_summary(summary, iter)
|
||||
|
||||
# return _correct
|
||||
|
||||
def action_selection(self, state):
|
||||
target = np.zeros(shape=[state.shape[0]], dtype=np.float32)
|
||||
actions = np.zeros(dtype=int, shape=[state.shape[0]])
|
||||
qvals= self.sess.run(self.predict,
|
||||
feed_dict={self.batch_size: state.shape[0], self.learning_rate: 0.0001,
|
||||
self.X1: state,
|
||||
self.target: target, self.actions:actions})
|
||||
|
||||
if qvals.shape[0]>1:
|
||||
# Evaluating batch
|
||||
action = np.argmax(qvals, axis=1)
|
||||
else:
|
||||
# Evaluating one sample
|
||||
action = np.zeros(1)
|
||||
action[0]=np.argmax(qvals)
|
||||
|
||||
|
||||
|
||||
# self.action_array[action[0].astype(int)]+=1
|
||||
return action.astype(int)
|
||||
|
||||
def take_action(self, action, num_actions):
|
||||
# Set Paramaters
|
||||
fov_v = 45 * np.pi / 180
|
||||
fov_h = 80 * np.pi / 180
|
||||
r = 0.4
|
||||
|
||||
ignore_collision = False
|
||||
sqrt_num_actions = np.sqrt(num_actions)
|
||||
|
||||
|
||||
posit = self.client.simGetVehiclePose()
|
||||
pos = posit.position
|
||||
orientation = posit.orientation
|
||||
|
||||
quat = (orientation.w_val, orientation.x_val, orientation.y_val, orientation.z_val)
|
||||
eulers = euler_from_quaternion(quat)
|
||||
alpha = eulers[2]
|
||||
|
||||
theta_ind = int(action[0] / sqrt_num_actions)
|
||||
psi_ind = action[0] % sqrt_num_actions
|
||||
|
||||
theta = fov_v/sqrt_num_actions * (theta_ind - (sqrt_num_actions - 1) / 2)
|
||||
psi = fov_h / sqrt_num_actions * (psi_ind - (sqrt_num_actions - 1) / 2)
|
||||
|
||||
noise_theta = (fov_v / sqrt_num_actions) / 6
|
||||
noise_psi = (fov_h / sqrt_num_actions) / 6
|
||||
|
||||
psi = psi + random.uniform(-1, 1)*noise_psi
|
||||
theta = theta + random.uniform(-1, 1)*noise_theta
|
||||
|
||||
# print('Theta: ', theta * 180 / np.pi, end='')
|
||||
# print(' Psi: ', psi * 180 / np.pi)
|
||||
|
||||
|
||||
|
||||
|
||||
x = pos.x_val + r * np.cos(alpha + psi)
|
||||
y = pos.y_val + r * np.sin(alpha + psi)
|
||||
z = pos.z_val + r * np.sin(theta) # -ve because Unreal has -ve z direction going upwards
|
||||
|
||||
self.client.simSetVehiclePose(airsim.Pose(airsim.Vector3r(x, y, z), airsim.to_quaternion(0, 0, alpha + psi)),
|
||||
ignore_collison=ignore_collision)
|
||||
|
||||
def get_depth(self):
|
||||
responses = self.client.simGetImages([airsim.ImageRequest(2, airsim.ImageType.DepthVis, False, False)])
|
||||
depth = []
|
||||
img1d = np.fromstring(responses[0].image_data_uint8, dtype=np.uint8)
|
||||
depth = img1d.reshape(responses[0].height, responses[0].width, 4)[:, :, 0]
|
||||
|
||||
# To make sure the wall leaks in the unreal environment doesn't mess up with the reward function
|
||||
thresh = 50
|
||||
super_threshold_indices = depth > thresh
|
||||
depth[super_threshold_indices] = thresh
|
||||
depth = depth / thresh
|
||||
# plt.imshow(depth)
|
||||
# # plt.gray()
|
||||
# plt.show()
|
||||
return depth, thresh
|
||||
|
||||
def get_state(self):
|
||||
responses1 = self.client.simGetImages([ # depth visualization image
|
||||
airsim.ImageRequest("1", airsim.ImageType.Scene, False,
|
||||
False)]) # scene vision image in uncompressed RGBA array
|
||||
|
||||
response = responses1[0]
|
||||
img1d = np.fromstring(response.image_data_uint8, dtype=np.uint8) # get numpy array
|
||||
img_rgba = img1d.reshape(response.height, response.width, 4)
|
||||
img = Image.fromarray(img_rgba)
|
||||
img_rgb = img.convert('RGB')
|
||||
self.iter = self.iter+1
|
||||
state = np.asarray(img_rgb)
|
||||
|
||||
state = cv2.resize(state, (self.input_size, self.input_size), cv2.INTER_LINEAR)
|
||||
state = cv2.normalize(state, state, 0, 1, cv2.NORM_MINMAX, cv2.CV_32F)
|
||||
state_rgb = []
|
||||
state_rgb.append(state[:, :, 0:3])
|
||||
state_rgb = np.array(state_rgb)
|
||||
state_rgb = state_rgb.astype('float32')
|
||||
|
||||
return state_rgb
|
||||
|
||||
def avg_depth(self, depth_map1, thresh):
|
||||
# Version 0.2
|
||||
# Thresholded depth map to ignore objects too far and give them a constant value
|
||||
# Globally (not locally as in the version 0.1) Normalise the thresholded map between 0 and 1
|
||||
# Threshold depends on the environment nature (indoor/ outdoor)
|
||||
depth_map = depth_map1
|
||||
# L1=0
|
||||
# R1=0
|
||||
# C1=0
|
||||
# print(global_depth)
|
||||
# dynamic_window = False
|
||||
plot_depth = False
|
||||
global_depth = np.mean(depth_map)
|
||||
n = global_depth*thresh/3
|
||||
# print("n=", n)
|
||||
# n = 3
|
||||
H = np.size(depth_map, 0)
|
||||
W = np.size(depth_map, 1)
|
||||
grid_size = (np.array([H, W]) / n)
|
||||
|
||||
# scale by 0.9 to select the window towards top from the mid line
|
||||
h = int(0.9 * H * (n - 1) / (2 * n))
|
||||
w = int(W * (n - 1) / (2 * n))
|
||||
grid_location = [h, w]
|
||||
|
||||
x1 = int(round(grid_location[0]))
|
||||
y = int(round(grid_location[1]))
|
||||
|
||||
a4 = int(round(grid_location[0] + grid_size[0]))
|
||||
|
||||
a5 = int(round(grid_location[0] + grid_size[0]))
|
||||
b5 = int(round(grid_location[1] + grid_size[1]))
|
||||
|
||||
a2 = int(round(grid_location[0] - grid_size[0]))
|
||||
b2 = int(round(grid_location[1] + grid_size[1]))
|
||||
|
||||
a8 = int(round(grid_location[0] + 2 * grid_size[0]))
|
||||
b8 = int(round(grid_location[1] + grid_size[1]))
|
||||
|
||||
b4 = int(round(grid_location[1] - grid_size[1]))
|
||||
if b4 < 0:
|
||||
b4 = 0
|
||||
|
||||
a6 = int(round(grid_location[0] + grid_size[0]))
|
||||
b6 = int(round(grid_location[1] + 2 * grid_size[1]))
|
||||
if b6 > 640:
|
||||
b6 = 640
|
||||
|
||||
# L = 1 / np.min(depth_map[x1:a4, b4:y])
|
||||
# C = 1 / np.min(depth_map[x1:a5, y:b5])
|
||||
# R = 1 / np.min(depth_map[x1:a6, b5:b6])
|
||||
|
||||
fract_min = 0.05
|
||||
|
||||
L_map = depth_map[x1:a4, b4:y]
|
||||
C_map = depth_map[x1:a5, y:b5]
|
||||
R_map = depth_map[x1:a6, b5:b6]
|
||||
|
||||
L_sort = np.sort(L_map.flatten())
|
||||
end_ind = int(np.round(fract_min * len(L_sort)))
|
||||
L1 = np.mean(L_sort[0:end_ind])
|
||||
|
||||
R_sort = np.sort(R_map.flatten())
|
||||
end_ind = int(np.round(fract_min * len(R_sort)))
|
||||
R1 = np.mean(R_sort[0:end_ind])
|
||||
|
||||
C_sort = np.sort(C_map.flatten())
|
||||
end_ind = int(np.round(fract_min * len(C_sort)))
|
||||
C1 = np.mean(C_sort[0:end_ind])
|
||||
if plot_depth:
|
||||
cv2.rectangle(depth_map1, (y, x1), (b5, a5), (0, 0, 0), 3)
|
||||
cv2.rectangle(depth_map1, (y, x1), (b4, a4), (0, 0, 0), 3)
|
||||
cv2.rectangle(depth_map1, (b5, x1), (b6, a6), (0, 0, 0), 3)
|
||||
|
||||
dispL = str(np.round(L1, 3))
|
||||
dispC = str(np.round(C1, 3))
|
||||
dispR = str(np.round(R1, 3))
|
||||
cv2.putText(depth_map1, dispL, (20, 75), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0))
|
||||
cv2.putText(depth_map1, dispC, (110, 75), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0))
|
||||
cv2.putText(depth_map1, dispR, (200, 75), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0))
|
||||
|
||||
plt.imshow(depth_map1)
|
||||
plt.show()
|
||||
xxx = 1
|
||||
# time.sleep(0.7)
|
||||
#
|
||||
xxxxx = 1
|
||||
# print(L1, C1, R1)
|
||||
return L1, C1, R1
|
||||
|
||||
|
||||
|
||||
def reward_gen(self, d_new, action, crash_threshold, thresh):
|
||||
L_new, C_new, R_new = self.avg_depth(d_new, thresh)
|
||||
# print('Rew_C', C_new)
|
||||
# print(L_new, C_new, R_new)
|
||||
# For now, lets keep the reward a simple one
|
||||
if C_new < crash_threshold:
|
||||
done = True
|
||||
reward = -1
|
||||
else:
|
||||
done = False
|
||||
if action == 0:
|
||||
reward = C_new
|
||||
else:
|
||||
# reward = C_new/3
|
||||
reward = C_new
|
||||
|
||||
# if action != 0:
|
||||
# reward = 0
|
||||
|
||||
return reward, done
|
||||
|
||||
def GetAgentState(self):
|
||||
return self.client.simGetCollisionInfo()
|
||||
|
||||
def return_plot(self, ret, epi, env_type, mem_percent, iter, dist):
|
||||
# ret, epi1, int(level/4), mem_percent, iter
|
||||
summary = tf.Summary()
|
||||
tag = 'Return'
|
||||
summary.value.add(tag=tag, simple_value=ret)
|
||||
self.stat_writer.add_summary(summary, epi)
|
||||
|
||||
summary = tf.Summary()
|
||||
summary.value.add(tag='Memory-GB', simple_value=mem_percent)
|
||||
self.stat_writer.add_summary(summary, iter)
|
||||
|
||||
summary = tf.Summary()
|
||||
summary.value.add(tag='Safe Flight', simple_value=dist)
|
||||
self.stat_writer.add_summary(summary, epi)
|
||||
|
||||
def save_network(self, save_path):
|
||||
self.saver.save(self.sess, save_path)
|
||||
|
||||
def save_weights(self, save_path):
|
||||
name = ['conv1W', 'conv1b', 'conv2W', 'conv2b', 'conv3W', 'conv3b', 'conv4W', 'conv4b', 'conv5W', 'conv5b',
|
||||
'fc6aW', 'fc6ab', 'fc7aW', 'fc7ab', 'fc8aW', 'fc8ab', 'fc9aW', 'fc9ab', 'fc10aW', 'fc10ab',
|
||||
'fc6vW', 'fc6vb', 'fc7vW', 'fc7vb', 'fc8vW', 'fc8vb', 'fc9vW', 'fc9vb', 'fc10vW', 'fc10vb'
|
||||
]
|
||||
weights = {}
|
||||
print('Saving weights in .npy format')
|
||||
for i in range(0, 30):
|
||||
# weights[name[i]] = self.sess.run(self.sess.graph._collections['variables'][i])
|
||||
if i==0:
|
||||
str1 = 'Variable:0'
|
||||
else:
|
||||
str1 = 'Variable_'+str(i)+':0'
|
||||
weights[name[i]] = self.sess.run(str1)
|
||||
save_path = save_path+'weights.npy'
|
||||
np.save(save_path, weights)
|
||||
|
||||
def load_network(self, load_path):
|
||||
self.saver.restore(self.sess, load_path)
|
||||
|
||||
|
||||
|
||||
def get_weights(self):
|
||||
xs=np.zeros(shape=(32, 227,227,3))
|
||||
actions = np.zeros(dtype=int, shape=[xs.shape[0]])
|
||||
ys = np.zeros(shape=[xs.shape[0]], dtype=np.float32)
|
||||
return self.sess.run(self.weights,
|
||||
feed_dict={self.batch_size: xs.shape[0], self.learning_rate: 0,
|
||||
self.X1: xs,
|
||||
self.target: ys, self.actions:actions})
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
16
network/loss_functions.py
Normal file
16
network/loss_functions.py
Normal file
@@ -0,0 +1,16 @@
|
||||
# Author: Aqeel Anwar(ICSRL)
|
||||
# Created: 2/22/2019, 4:57 PM
|
||||
# Email: aqeel.anwar@gatech.edu
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
def huber_loss(X,Y):
|
||||
err = X-Y
|
||||
loss = tf.where(tf.abs(err) < 1.0, 0.5 * tf.square(err), tf.abs(err) - 0.5)
|
||||
loss = tf.reduce_sum(loss)
|
||||
|
||||
return loss
|
||||
|
||||
def mse_loss(X,Y):
|
||||
err=X-Y
|
||||
return tf.reduce_sum(tf.square(err))
|
||||
343
network/network.py
Normal file
343
network/network.py
Normal file
@@ -0,0 +1,343 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
from network.loss_functions import huber_loss
|
||||
|
||||
|
||||
class AlexNetDuel(object):
|
||||
|
||||
def __init__(self, x, num_actions, train_type):
|
||||
self.x = x
|
||||
weights_path = 'models/imagenet.npy'
|
||||
weights = np.load(open(weights_path, "rb"), encoding="latin1").item()
|
||||
print('Loading imagenet weights for the conv layers and random for fc layers')
|
||||
train_conv = True
|
||||
train_fc6 = True
|
||||
train_fc7 = True
|
||||
train_fc8 = True
|
||||
train_fc9 = True
|
||||
|
||||
if train_type == 'last4':
|
||||
train_conv = False
|
||||
train_fc6 = False
|
||||
elif train_type == 'last3':
|
||||
train_conv = False
|
||||
train_fc6 = False
|
||||
train_fc7 = False
|
||||
elif train_type == 'last2':
|
||||
train_conv = False
|
||||
train_fc6 = False
|
||||
train_fc7 = False
|
||||
train_fc8 = False
|
||||
|
||||
self.conv1 = self.conv(self.x, weights["conv1"][0], weights["conv1"][1], k=11, out=96, s=4, p="VALID",trainable=train_conv)
|
||||
self.maxpool1 = tf.nn.max_pool(self.conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
self.conv2 = self.conv(self.maxpool1, weights["conv2"][0], weights["conv2"][1], k=5, out=256, s=1, p="SAME",trainable=train_conv)
|
||||
self.maxpool2 = tf.nn.max_pool(self.conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
self.conv3 = self.conv(self.maxpool2, weights["conv3"][0], weights["conv3"][1], k=3, out=384, s=1, p="SAME",trainable=train_conv)
|
||||
self.conv4 = self.conv(self.conv3, weights["conv4"][0], weights["conv4"][1], k=3, out=384, s=1, p="SAME",trainable=train_conv)
|
||||
self.conv5 = self.conv(self.conv4, weights["conv5"][0], weights["conv5"][1], k=3, out=256, s=1, p="SAME",trainable=train_conv)
|
||||
self.maxpool5 = tf.nn.max_pool(self.conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
self.flat = tf.contrib.layers.flatten(self.maxpool5)
|
||||
|
||||
# Advantage Network
|
||||
self.fc6_a = self.FullyConnected(self.flat, units_in=9216, units_out=2048, act='relu', trainable=train_fc6)
|
||||
self.fc7_a = self.FullyConnected(self.fc6_a, units_in=2048, units_out=1024, act='relu', trainable=train_fc7)
|
||||
self.fc8_a = self.FullyConnected(self.fc7_a, units_in=1024, units_out=1024, act='relu', trainable=train_fc8)
|
||||
self.fc9_a = self.FullyConnected(self.fc8_a, units_in=1024, units_out=512, act='relu', trainable=train_fc9)
|
||||
self.fc10_a = self.FullyConnected(self.fc9_a, units_in=512, units_out=num_actions, act='linear', trainable=True)
|
||||
|
||||
# Value Network
|
||||
self.fc6_v = self.FullyConnected(self.flat, units_in=9216, units_out=2048, act='relu', trainable=train_fc6)
|
||||
self.fc7_v = self.FullyConnected(self.fc6_v, units_in=2048, units_out=1024, act='relu', trainable=train_fc7)
|
||||
self.fc8_v = self.FullyConnected(self.fc7_v, units_in=1024, units_out=1024, act='relu', trainable=train_fc8)
|
||||
self.fc9_v = self.FullyConnected(self.fc8_v, units_in=1024, units_out=512, act='relu', trainable=train_fc9)
|
||||
self.fc10_v = self.FullyConnected(self.fc9_v, units_in=512, units_out=1, act='linear', trainable=True)
|
||||
|
||||
self.output = self.fc10_v + tf.subtract(self.fc10_a, tf.reduce_mean(self.fc10_a, axis=1, keep_dims=True))
|
||||
|
||||
|
||||
def conv(self, input, W, b, k, out, s, p, trainable=True):
|
||||
assert (W.shape[0] == k)
|
||||
assert (W.shape[1] == k)
|
||||
assert (W.shape[3] == out)
|
||||
|
||||
conv_kernel_1 = tf.nn.conv2d(input, tf.Variable(W, trainable), [1, s, s, 1], padding=p)
|
||||
bias_layer_1 = tf.nn.bias_add(conv_kernel_1, tf.Variable(b, trainable))
|
||||
|
||||
return tf.nn.relu(bias_layer_1)
|
||||
|
||||
def FullyConnected(self, input, units_in, units_out, act, trainable=True):
|
||||
W = tf.Variable(tf.truncated_normal(shape=(units_in, units_out), stddev=0.05), trainable=trainable)
|
||||
b = tf.Variable(tf.truncated_normal(shape=[units_out], stddev=0.05), trainable=trainable)
|
||||
|
||||
if act == 'relu':
|
||||
return tf.nn.relu_layer(input, W,b)
|
||||
elif act == 'linear':
|
||||
return tf.nn.xw_plus_b(input, W, b)
|
||||
else:
|
||||
assert (1 == 0)
|
||||
|
||||
|
||||
|
||||
class AlexNetConditional(object):
|
||||
|
||||
def __init__(self, x, num_actions, train_type):
|
||||
self.x = x
|
||||
weights_path = 'models/imagenet.npy'
|
||||
weights = np.load(open(weights_path, "rb"), encoding="latin1").item()
|
||||
print('Loading imagenet weights for the conv layers and random for fc layers')
|
||||
train_conv = True
|
||||
train_fc6 = True
|
||||
train_fc7 = True
|
||||
train_fc8 = True
|
||||
train_fc9 = True
|
||||
|
||||
if train_type == 'last4':
|
||||
train_conv = False
|
||||
train_fc6 = False
|
||||
elif train_type == 'last3':
|
||||
train_conv = False
|
||||
train_fc6 = False
|
||||
train_fc7 = False
|
||||
elif train_type == 'last2':
|
||||
train_conv = False
|
||||
train_fc6 = False
|
||||
train_fc7 = False
|
||||
train_fc8 = False
|
||||
|
||||
self.conv1 = self.conv(self.x, weights["conv1"][0], weights["conv1"][1], k=11, out=96, s=4, p="VALID",trainable=train_conv)
|
||||
self.maxpool1 = tf.nn.max_pool(self.conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
self.conv2 = self.conv(self.maxpool1, weights["conv2"][0], weights["conv2"][1], k=5, out=256, s=1, p="SAME",trainable=train_conv)
|
||||
self.maxpool2 = tf.nn.max_pool(self.conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
self.conv3 = self.conv(self.maxpool2, weights["conv3"][0], weights["conv3"][1], k=3, out=384, s=1, p="SAME",trainable=train_conv)
|
||||
|
||||
# Divide the network stream from this point onwards
|
||||
|
||||
# One - Main Network
|
||||
self.conv4_main = self.conv(self.conv3, weights["conv4"][0], weights["conv4"][1], k=3, out=384, s=1, p="SAME",trainable=train_conv)
|
||||
self.conv5_main = self.conv(self.conv4_main, weights["conv5"][0], weights["conv5"][1], k=3, out=256, s=1, p="SAME",trainable=train_conv)
|
||||
self.maxpool5_main = tf.nn.max_pool(self.conv5_main, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
self.flat_main = tf.contrib.layers.flatten(self.maxpool5_main)
|
||||
|
||||
# Advantage Network
|
||||
self.fc6_a_main = self.FullyConnected(self.flat_main, units_in=9216, units_out=4096, act='relu', trainable=train_fc6)
|
||||
self.fc7_a_main = self.FullyConnected(self.fc6_a_main, units_in=4096, units_out=2048, act='relu', trainable=train_fc7)
|
||||
self.fc8_a_main = self.FullyConnected(self.fc7_a_main, units_in=2048, units_out=num_actions, act='linear', trainable=train_fc8)
|
||||
|
||||
# Value Network
|
||||
self.fc6_v_main = self.FullyConnected(self.flat_main, units_in=9216, units_out=4096, act='relu', trainable=train_fc6)
|
||||
self.fc7_v_main = self.FullyConnected(self.fc6_v_main, units_in=4096, units_out=2048, act='relu', trainable=train_fc7)
|
||||
self.fc8_v_main = self.FullyConnected(self.fc7_v_main, units_in=2048, units_out=1, act='linear', trainable=True)
|
||||
|
||||
self.output_main = self.fc8_v_main + tf.subtract(self.fc8_a_main, tf.reduce_mean(self.fc8_a_main, axis=1, keep_dims=True))
|
||||
|
||||
# Two - Conditional Network
|
||||
conv4_cdl_k = np.random.rand(3, 3, 384, 256).astype(np.float32)
|
||||
conv4_cdl_b = np.random.rand(256).astype(np.float32)
|
||||
self.conv4_cdl = self.conv(self.conv3, conv4_cdl_k, conv4_cdl_b, k=3, out=256, s=1, p="SAME",trainable=train_conv)
|
||||
self.maxpool4_cdl = tf.nn.max_pool(self.conv4_cdl, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
self.flat_cdl = tf.contrib.layers.flatten(self.maxpool4_cdl)
|
||||
|
||||
# Advantage Network
|
||||
self.fc6_a_cdl = self.FullyConnected(self.flat_cdl, units_in=9216, units_out=2048, act='relu', trainable=train_fc6)
|
||||
self.fc7_a_cdl = self.FullyConnected(self.fc6_a_cdl, units_in=2048, units_out=num_actions, act='linear',trainable=train_fc7)
|
||||
|
||||
# Value Network
|
||||
self.fc6_v_cdl = self.FullyConnected(self.flat_cdl, units_in=9216, units_out=2048, act='relu', trainable=train_fc6)
|
||||
self.fc7_v_cdl = self.FullyConnected(self.fc6_v_cdl, units_in=2048, units_out=1, act='linear',trainable=train_fc7)
|
||||
|
||||
self.output_cdl = self.fc7_v_cdl + tf.subtract(self.fc7_a_cdl,tf.reduce_mean(self.fc7_a_cdl, axis=1, keep_dims=True))
|
||||
|
||||
def conv(self, input, W, b, k, out, s, p, trainable=True):
|
||||
assert (W.shape[0] == k)
|
||||
assert (W.shape[1] == k)
|
||||
assert (W.shape[3] == out)
|
||||
|
||||
conv_kernel_1 = tf.nn.conv2d(input, tf.Variable(W, trainable), [1, s, s, 1], padding=p)
|
||||
bias_layer_1 = tf.nn.bias_add(conv_kernel_1, tf.Variable(b, trainable))
|
||||
|
||||
return tf.nn.relu(bias_layer_1)
|
||||
|
||||
def FullyConnected(self, input, units_in, units_out, act, trainable=True):
|
||||
W = tf.Variable(tf.truncated_normal(shape=(units_in, units_out), stddev=0.05), trainable=trainable)
|
||||
b = tf.Variable(tf.truncated_normal(shape=[units_out], stddev=0.05), trainable=trainable)
|
||||
|
||||
if act == 'relu':
|
||||
return tf.nn.relu_layer(input, W,b)
|
||||
elif act == 'linear':
|
||||
return tf.nn.xw_plus_b(input, W, b)
|
||||
else:
|
||||
assert (1 == 0)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class AlexNetDuelPrune(object):
|
||||
|
||||
def __init__(self, x, num_actions, train_type):
|
||||
self.x = x
|
||||
# weights_path = 'models/imagenet.npy'
|
||||
weights_path = 'models/prune_weights.npy'
|
||||
weights = np.load(open(weights_path, "rb"), encoding="latin1").item()
|
||||
print('Loading pruned weights for the conv layers and random for fc layers')
|
||||
train_conv = True
|
||||
train_fc6 = True
|
||||
train_fc7 = True
|
||||
train_fc8 = True
|
||||
train_fc9 = True
|
||||
|
||||
if train_type == 'last4':
|
||||
train_conv = False
|
||||
train_fc6 = False
|
||||
elif train_type == 'last3':
|
||||
train_conv = False
|
||||
train_fc6 = False
|
||||
train_fc7 = False
|
||||
elif train_type == 'last2':
|
||||
train_conv = False
|
||||
train_fc6 = False
|
||||
train_fc7 = False
|
||||
train_fc8 = False
|
||||
|
||||
self.conv1 = self.conv(self.x, weights["conv1W"], weights["conv1b"], k=11, out=64, s=4, p="VALID",trainable=train_conv)
|
||||
self.maxpool1 = tf.nn.max_pool(self.conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
self.conv2 = self.conv(self.maxpool1, weights["conv2W"], weights["conv2b"], k=5, out=192, s=1, p="SAME",trainable=train_conv)
|
||||
self.maxpool2 = tf.nn.max_pool(self.conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
self.conv3 = self.conv(self.maxpool2, weights["conv3W"],weights["conv3b"], k=3, out=288, s=1, p="SAME",trainable=train_conv)
|
||||
self.conv4 = self.conv(self.conv3, weights["conv4W"], weights["conv4b"], k=3, out=288, s=1, p="SAME",trainable=train_conv)
|
||||
self.conv5 = self.conv(self.conv4, weights["conv5W"], weights["conv5b"], k=3, out=256, s=1, p="SAME",trainable=train_conv)
|
||||
self.maxpool5 = tf.nn.max_pool(self.conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
self.flat = tf.contrib.layers.flatten(self.maxpool5)
|
||||
|
||||
# Advantage Network
|
||||
self.fc6_a = self.FullyConnected(self.flat, units_in=9216, units_out=2048, act='relu', trainable=train_fc6)
|
||||
self.fc7_a = self.FullyConnected(self.fc6_a, units_in=2048, units_out=1024, act='relu', trainable=train_fc7)
|
||||
self.fc8_a = self.FullyConnected(self.fc7_a, units_in=1024, units_out=1024, act='relu', trainable=train_fc8)
|
||||
self.fc9_a = self.FullyConnected(self.fc8_a, units_in=1024, units_out=512, act='relu', trainable=train_fc9)
|
||||
self.fc10_a = self.FullyConnected(self.fc9_a, units_in=512, units_out=num_actions, act='linear', trainable=True)
|
||||
|
||||
# Value Network
|
||||
self.fc6_v = self.FullyConnected(self.flat, units_in=9216, units_out=2048, act='relu', trainable=train_fc6)
|
||||
self.fc7_v = self.FullyConnected(self.fc6_v, units_in=2048, units_out=1024, act='relu', trainable=train_fc7)
|
||||
self.fc8_v = self.FullyConnected(self.fc7_v, units_in=1024, units_out=1024, act='relu', trainable=train_fc8)
|
||||
self.fc9_v = self.FullyConnected(self.fc8_v, units_in=1024, units_out=512, act='relu', trainable=train_fc9)
|
||||
self.fc10_v = self.FullyConnected(self.fc9_v, units_in=512, units_out=1, act='linear', trainable=True)
|
||||
|
||||
self.output = self.fc10_v + tf.subtract(self.fc10_a, tf.reduce_mean(self.fc10_a, axis=1, keep_dims=True))
|
||||
|
||||
|
||||
def conv(self, input, W, b, k, out, s, p, trainable=True):
|
||||
assert (W.shape[0] == k)
|
||||
assert (W.shape[1] == k)
|
||||
assert (W.shape[3] == out)
|
||||
|
||||
conv_kernel_1 = tf.nn.conv2d(input, tf.Variable(W, trainable), [1, s, s, 1], padding=p)
|
||||
bias_layer_1 = tf.nn.bias_add(conv_kernel_1, tf.Variable(b, trainable))
|
||||
|
||||
return tf.nn.relu(bias_layer_1)
|
||||
|
||||
def FullyConnected(self, input, units_in, units_out, act, trainable=True):
|
||||
W = tf.Variable(tf.truncated_normal(shape=(units_in, units_out), stddev=0.05), trainable=trainable)
|
||||
b = tf.Variable(tf.truncated_normal(shape=[units_out], stddev=0.05), trainable=trainable)
|
||||
|
||||
if act == 'relu':
|
||||
return tf.nn.relu_layer(input, W,b)
|
||||
elif act == 'linear':
|
||||
return tf.nn.xw_plus_b(input, W, b)
|
||||
else:
|
||||
assert (1 == 0)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class AlexNet(object):
|
||||
|
||||
def __init__(self, x, num_actions, train_type):
|
||||
self.x = x
|
||||
weights_path = 'models/imagenet.npy'
|
||||
weights = np.load(open(weights_path, "rb"), encoding="latin1").item()
|
||||
print('Loading imagenet weights for the conv layers and random for fc layers')
|
||||
train_conv = True
|
||||
train_fc6 = True
|
||||
train_fc7 = True
|
||||
train_fc8 = True
|
||||
train_fc9 = True
|
||||
|
||||
if train_type == 'last4':
|
||||
train_conv = False
|
||||
train_fc6 = False
|
||||
elif train_type == 'last3':
|
||||
train_conv = False
|
||||
train_fc6 = False
|
||||
train_fc7 = False
|
||||
elif train_type == 'last2':
|
||||
train_conv = False
|
||||
train_fc6 = False
|
||||
train_fc7 = False
|
||||
train_fc8 = False
|
||||
|
||||
self.conv1 = self.conv(self.x, weights["conv1"][0], weights["conv1"][1], k=11, out=96, s=4, p="VALID",trainable=train_conv)
|
||||
self.maxpool1 = tf.nn.max_pool(self.conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
self.conv2 = self.conv(self.maxpool1, weights["conv2"][0], weights["conv2"][1], k=5, out=256, s=1, p="SAME",trainable=train_conv)
|
||||
self.maxpool2 = tf.nn.max_pool(self.conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
self.conv3 = self.conv(self.maxpool2, weights["conv3"][0], weights["conv3"][1], k=3, out=384, s=1, p="SAME",trainable=train_conv)
|
||||
self.conv4 = self.conv(self.conv3, weights["conv4"][0], weights["conv4"][1], k=3, out=384, s=1, p="SAME",trainable=train_conv)
|
||||
self.conv5 = self.conv(self.conv4, weights["conv5"][0], weights["conv5"][1], k=3, out=256, s=1, p="SAME",trainable=train_conv)
|
||||
self.maxpool5 = tf.nn.max_pool(self.conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
self.flat = tf.contrib.layers.flatten(self.maxpool5)
|
||||
|
||||
|
||||
self.fc6 = self.FullyConnected(self.flat, units_in=9216, units_out=4096, act='relu', trainable=train_fc6)
|
||||
self.fc7 = self.FullyConnected(self.fc6, units_in=4096, units_out=2048, act='relu', trainable=train_fc7)
|
||||
self.fc8 = self.FullyConnected(self.fc7, units_in=2048, units_out=2048, act='relu', trainable=train_fc8)
|
||||
self.fc9 = self.FullyConnected(self.fc8, units_in=2048, units_out=1024, act='relu', trainable=train_fc9)
|
||||
self.fc10 = self.FullyConnected(self.fc9, units_in=1024, units_out=num_actions, act='linear', trainable=True)
|
||||
|
||||
|
||||
self.output = self.fc10
|
||||
print(self.conv1)
|
||||
print(self.conv2)
|
||||
print(self.conv3)
|
||||
print(self.conv4)
|
||||
print(self.conv5)
|
||||
print(self.fc6)
|
||||
print(self.fc7)
|
||||
print(self.fc8)
|
||||
print(self.fc9)
|
||||
print(self.fc10)
|
||||
|
||||
def conv(self, input, W, b, k, out, s, p, trainable=True):
|
||||
assert (W.shape[0] == k)
|
||||
assert (W.shape[1] == k)
|
||||
assert (W.shape[3] == out)
|
||||
|
||||
conv_kernel_1 = tf.nn.conv2d(input, tf.Variable(W, trainable), [1, s, s, 1], padding=p)
|
||||
bias_layer_1 = tf.nn.bias_add(conv_kernel_1, tf.Variable(b, trainable))
|
||||
|
||||
return tf.nn.relu(bias_layer_1)
|
||||
|
||||
def FullyConnected(self, input, units_in, units_out, act, trainable=True):
|
||||
W = tf.truncated_normal(shape=(units_in, units_out), stddev=0.05)
|
||||
b = tf.truncated_normal(shape=[units_out], stddev=0.05)
|
||||
|
||||
if act == 'relu':
|
||||
return tf.nn.relu_layer(input, tf.Variable(W, trainable), tf.Variable(b, trainable))
|
||||
elif act == 'linear':
|
||||
return tf.nn.xw_plus_b(input, tf.Variable(W, trainable), tf.Variable(b, trainable))
|
||||
else:
|
||||
assert (1 == 0)
|
||||
1938
util/transformations.py
Normal file
1938
util/transformations.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user