Initial Commit

This commit is contained in:
Anwar, Malik Aqeel
2019-10-14 18:15:33 -04:00
parent 047c2876d3
commit d4e1f1136c
16 changed files with 3789 additions and 0 deletions

6
.gitignore vendored Normal file
View File

@@ -0,0 +1,6 @@
*.npy
*.pyc
*.xml
.idea/
images/
models/

107
Activation.py Normal file
View File

@@ -0,0 +1,107 @@
import numpy as np
import tensorflow as tf
class Activation(object):
def forward(self, x):
pass
def gradient(self, x):
pass
class Sigmoid(Activation):
def __init__(self):
pass
def forward(self, x):
return tf.sigmoid(x)
def sigmoid_gradient(self, x):
sig = tf.sigmoid(x)
return tf.multiply(sig, tf.subtract(1.0, sig))
def gradient(self, x):
return tf.multiply(x, tf.subtract(1.0, x))
class Relu(Activation):
def __init__(self):
pass
def forward(self, x):
return tf.nn.relu(x)
def gradient(self, x):
# pretty sure this gradient works for A and Z
return tf.cast(x > 0.0, dtype=tf.float32)
# https://theclevermachine.wordpress.com/tag/tanh-function/
class Tanh(Activation):
def __init__(self):
pass
def forward(self, x):
return tf.tanh(x)
def gradient(self, x):
# this is gradient wtf A, not Z
return 1 - tf.pow(x, 2)
# https://medium.com/@aerinykim/how-to-implement-the-softmax-derivative-independently-from-any-loss-function-ae6d44363a9d
# /home/brian/tensorflow/tensorflow/python/ops/nn_grad ... grep "_SoftmaxGrad"
class Softmax(Activation):
def __init__(self):
pass
def forward(self, x):
return tf.softmax(x)
# this is gradient for A
def gradient(self, x):
# this is impossible and not bio plausible
assert(False)
flat = tf.reshape(x, [-1])
diagflat = tf.diag(flat)
dot = tf.matmul(flat, tf.transpose(flat))
return diagflag - dot
class LeakyRelu(Activation):
def __init__(self, leak=0.2):
self.leak=leak
def forward(self, x):
return tf.nn.leaky_relu(x, alpha=self.leak)
def gradient(self, x):
# pretty sure this gradient works for A and Z
return tf.add(tf.cast(x > 0.0, dtype=tf.float32), tf.cast(x < 0.0, dtype=tf.float32) * self.leak)
class SqrtRelu(Activation):
def __init__(self):
pass
def forward(self, x):
return tf.sqrt(tf.nn.relu(x))
def gradient(self, x):
# pretty sure this gradient works for A and Z
return tf.cast(x > 0.0, dtype=tf.float32)
class Linear(Activation):
def __init__(self):
pass
def forward(self, x):
return x
def gradient(self, x):
return tf.ones(shape=tf.shape(x))

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2019 Anwar, Malik Aqeel
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

54
Layer.py Normal file
View File

@@ -0,0 +1,54 @@
import tensorflow as tf
import numpy as np
class Layer:
def __init__(self):
super().__init__()
###################################################################
def get_weights(self):
pass
def num_params(self):
pass
def forward(self, X):
pass
###################################################################
def backward(self, AI, AO, DO):
pass
def gv(self, AI, AO, DO):
pass
def train(self, AI, AO, DO):
pass
###################################################################
def dfa_backward(self, AI, AO, E, DO):
pass
def dfa_gv(self, AI, AO, E, DO):
pass
def dfa(self, AI, AO, E, DO):
pass
###################################################################
def lel_backward(self, AI, AO, E, DO, Y):
assert(False)
def lel_gv(self, AI, AO, E, DO, Y):
assert(False)
def lel(self, AI, AO, E, DO, Y):
assert(False)
###################################################################

72
MaxPool.py Normal file
View File

@@ -0,0 +1,72 @@
import tensorflow as tf
import numpy as np
import math
from tensorflow.python.ops import gen_nn_ops
# return gen_nn_ops.max_pool_v2(value=X, ksize=self.size, strides=self.strides, padding="SAME")
from Layer import Layer
from Activation import Activation
from Activation import Sigmoid
class MaxPool(Layer):
def __init__(self, size, ksize, strides, padding):
self.size = size
self.ksize = ksize
self.strides = strides
self.padding = padding
###################################################################
def get_weights(self):
return []
def num_params(self):
return 0
def forward(self, X):
Z = tf.nn.max_pool(X, ksize=self.ksize, strides=self.strides, padding=self.padding)
# Z = tf.Print(Z, [Z], message="", summarize=1000)
return Z
###################################################################
def backward(self, AI, AO, DO):
grad = gen_nn_ops.max_pool_grad(grad=DO, orig_input=AI, orig_output=AO, ksize=self.ksize, strides=self.strides, padding=self.padding)
return grad
def gv(self, AI, AO, DO):
return []
def train(self, AI, AO, DO):
return []
###################################################################
def dfa_backward(self, AI, AO, E, DO):
grad = gen_nn_ops.max_pool_grad(grad=DO, orig_input=AI, orig_output=AO, ksize=self.ksize, strides=self.strides, padding=self.padding)
# grad = tf.Print(grad, [tf.shape(grad), tf.count_nonzero(tf.equal(grad, 1)), tf.count_nonzero(tf.equal(grad, 2)), tf.count_nonzero(tf.equal(grad, 3)), tf.count_nonzero(tf.equal(grad, 4)), tf.count_nonzero(tf.equal(grad, 5))], message="", summarize=1000)
return grad
def dfa_gv(self, AI, AO, E, DO):
return []
def dfa(self, AI, AO, E, DO):
return []
###################################################################
def lel_backward(self, AI, AO, E, DO, Y):
grad = gen_nn_ops.max_pool_grad(grad=DO, orig_input=AI, orig_output=AO, ksize=self.ksize, strides=self.strides, padding=self.padding)
# grad = tf.Print(grad, [tf.shape(grad), tf.count_nonzero(tf.equal(grad, 1)), tf.count_nonzero(tf.equal(grad, 2)), tf.count_nonzero(tf.equal(grad, 3)), tf.count_nonzero(tf.equal(grad, 4)), tf.count_nonzero(tf.equal(grad, 5))], message="", summarize=1000)
return grad
def lel_gv(self, AI, AO, E, DO, Y):
return []
def lel(self, AI, AO, E, DO, Y):
return []
###################################################################

249
aux_functions.py Normal file
View File

@@ -0,0 +1,249 @@
# Author: Aqeel Anwar(ICSRL)
# Created: 10/14/2019, 12:50 PM
# Email: aqeel.anwar@gatech.edu
import numpy as np
import math
import random
import time
import airsim
import pygame
from configs.read_cfg import read_cfg
def translate_action(action, num_actions):
action_word = ['Forward', 'Right', 'Left', 'Sharp Right', 'Sharp Left']
sqrt_num_actions = np.sqrt(num_actions)
ind = np.arange(sqrt_num_actions)
if sqrt_num_actions % 2 == 0:
v_string = list('U'*int(sqrt_num_actions/2) + 'F'+ 'D'*int(sqrt_num_actions/2))
h_string = list('L' * int(sqrt_num_actions/2) + 'F' + 'R' * int(sqrt_num_actions/2))
else:
v_string = list('U' * int((sqrt_num_actions-1)/2) + 'D' * int((sqrt_num_actions-1)/2))
h_string = list('L' * int((sqrt_num_actions-1)/2) + 'R' * int((sqrt_num_actions-1)/2))
v_ind = int(action[0]/sqrt_num_actions)
h_ind = int(action[0]%sqrt_num_actions)
action_word = v_string[v_ind] + str(int(np.ceil(abs((sqrt_num_actions-1)/2-v_ind)))) + '-' + h_string[h_ind]+str(int(np.ceil(abs((sqrt_num_actions-1)/2-h_ind))))
return action_word
def get_errors(data_tuple, choose, ReplayMemory, input_size, agent, target_agent, gamma, Q_clip):
_, Q_target, _, err, _ = minibatch_double(data_tuple, len(data_tuple), choose, ReplayMemory, input_size, agent, target_agent, gamma, Q_clip)
return err
def minibatch_double(data_tuple, batch_size, choose, ReplayMemory, input_size, agent, target_agent, gamma, Q_clip):
# Needs NOT to be in DeepAgent
# NO TD error term, and using huber loss instead
# Bellman Optimality equation update, with less computation, updated
if batch_size==1:
train_batch = data_tuple
idx=None
else:
batch = ReplayMemory.sample(batch_size)
train_batch = np.array([b[1][0] for b in batch])
idx = [b[0] for b in batch]
actions = np.zeros(shape=(batch_size), dtype=int)
crashes = np.zeros(shape=(batch_size))
rewards = np.zeros(shape=batch_size)
curr_states = np.zeros(shape=(batch_size, input_size, input_size, 3))
new_states = np.zeros(shape=(batch_size, input_size, input_size, 3))
for ii, m in enumerate(train_batch):
curr_state_m, action_m, new_state_m, reward_m, crash_m = m
curr_states[ii, :, :, :] = curr_state_m[...]
actions[ii] = action_m
new_states[ii,:,:,:] = new_state_m
rewards[ii] = reward_m
crashes[ii] = crash_m
#
# oldQval = np.zeros(shape = [batch_size, num_actions])
if choose:
oldQval_A = target_agent.Q_val(curr_states)
newQval_A = target_agent.Q_val(new_states)
newQval_B = agent.Q_val(new_states)
else:
oldQval_A = agent.Q_val(curr_states)
newQval_A = agent.Q_val(new_states)
newQval_B = target_agent.Q_val(new_states)
TD = np.zeros(shape=[batch_size])
err = np.zeros(shape=[batch_size])
Q_target = np.zeros(shape=[batch_size])
term_ind = np.where(rewards==-1)[0]
nonterm_ind = np.where(rewards!=-1)[0]
TD[nonterm_ind] = rewards[nonterm_ind] + gamma* newQval_B[nonterm_ind, np.argmax(newQval_A[nonterm_ind], axis=1)] - oldQval_A[nonterm_ind, actions[nonterm_ind].astype(int)]
TD[term_ind] = rewards[term_ind]
if Q_clip:
TD_clip = np.clip(TD, -1, 1)
else:
TD_clip = TD
Q_target[nonterm_ind] = oldQval_A[nonterm_ind, actions[nonterm_ind].astype(int)] + TD_clip[nonterm_ind]
Q_target[term_ind] = TD_clip[term_ind]
err=abs(TD) # or abs(TD_clip)
return curr_states, Q_target, actions, err, idx
def policy(epsilon,curr_state, iter, b, epsilon_model, wait_before_train, num_actions, agent):
qvals=[]
epsilon_ceil=0.95
if epsilon_model=='linear':
epsilon = epsilon_ceil* (iter-wait_before_train) / (b-wait_before_train)
if epsilon > epsilon_ceil:
epsilon = epsilon_ceil
elif epsilon_model=='exponential':
epsilon = 1- math.exp(-2/(b-wait_before_train) * (iter-wait_before_train) )
if epsilon > epsilon_ceil:
epsilon = epsilon_ceil
if random.random() > epsilon:
sss =curr_state.shape
action = np.random.randint(0, num_actions, size = sss[0], dtype=np.int32)
action_type = 'Rand'
else:
# Use NN to predict action
action = agent.action_selection(curr_state)
action_type = 'Pred'
# print(action_array/(np.mean(action_array)))
return action, action_type, epsilon, qvals
def reset_to_initial(level, reset_array, client):
reset_pos = reset_array[level]
client.simSetVehiclePose(reset_pos, ignore_collison=True)
time.sleep(0.1)
def connect_drone():
print('------------------------------ Drone ------------------------------')
client = airsim.MultirotorClient(timeout_value=10)
client.confirmConnection()
old_posit = client.simGetVehiclePose()
client.simSetVehiclePose(
airsim.Pose(airsim.Vector3r(0, 0, -2.2), old_posit.orientation),
ignore_collison=True)
return client, old_posit
def blit_text(surface, text, pos, font, color=pygame.Color('black')):
words = [word.split(' ') for word in text.splitlines()] # 2D array where each row is a list of words.
space = font.size(' ')[0] # The width of a space.
max_width, max_height = surface.get_size()
x, y = pos
for line in words:
for word in line:
word_surface = font.render(word, 0, color)
word_width, word_height = word_surface.get_size()
if x + word_width >= max_width:
x = pos[0] # Reset the x.
y += word_height # Start on new row.
surface.blit(word_surface, (x, y))
x += word_width + space
x = pos[0] # Reset the x.
y += word_height # Start on new row.
def pygame_connect(H, W):
pygame.init()
screen_width = H
screen_height = W
screen = pygame.display.set_mode([screen_width, screen_height])
carImg = pygame.image.load('images/keys.png')
screen.blit(carImg, (0, 0))
pygame.display.set_caption('DLwithTL')
# screen.fill((21, 116, 163))
# text = 'Supported Keys:\n'
# font = pygame.font.SysFont('arial', 32)
# blit_text(screen, text, (20, 20), font, color = (214, 169, 19))
# pygame.display.update()
#
# font = pygame.font.SysFont('arial', 24)
# text = 'R - Reconnect unreal\nbackspace - Pause/play\nL - Update configurations\nEnter - Save Network'
# blit_text(screen, text, (20, 70), font, color=(214, 169, 19))
pygame.display.update()
return screen
def check_user_input(active, automate, lr, epsilon, agent, network_path, client, old_posit, initZ):
for event in pygame.event.get():
if event.type == pygame.QUIT:
active = False
pygame.quit()
if event.type == pygame.KEYDOWN:
if event.key == pygame.K_l:
# Load the parameters - epsilon
cfg = read_cfg(config_filename='configs/config.cfg', verbose=False)
lr = cfg.lr
print('Updated Parameters')
print('Learning Rate: ', cfg.lr)
if event.key == pygame.K_RETURN:
# take_action(-1)
automate = False
print('Saving Model')
# agent.save_network(iter, save_path, ' ')
agent.save_network(network_path)
# agent.save_data(iter, data_tuple, tuple_path)
print('Model Saved: ', network_path)
if event.key == pygame.K_BACKSPACE:
automate = automate ^ True
if event.key == pygame.K_r:
# reconnect
client = []
client = airsim.MultirotorClient()
client.confirmConnection()
# posit1_old = client.simGetVehiclePose()
client.simSetVehiclePose(old_posit,
ignore_collison=True)
agent.client = client
if event.key == pygame.K_m:
agent.get_state()
print('got_state')
# automate = automate ^ True
# Set the routine for manual control if not automate
if not automate:
# print('manual')
# action=[-1]
if event.key == pygame.K_UP:
action = 0
elif event.key == pygame.K_RIGHT:
action = 1
elif event.key == pygame.K_LEFT:
action = 2
elif event.key == pygame.K_d:
action = 3
elif event.key == pygame.K_a:
action = 4
elif event.key == pygame.K_DOWN:
action = -2
elif event.key == pygame.K_y:
pos = client.getPosition()
client.moveToPosition(pos.x_val, pos.y_val, 3 * initZ, 1)
time.sleep(0.5)
elif event.key == pygame.K_h:
client.reset()
# agent.take_action(action)
return active, automate, lr, client

28
configs/config.cfg Normal file
View File

@@ -0,0 +1,28 @@
[general_params]
run_name: Tello_indoor
custom_load: False
custom_load_path: DeepNet/models/e2e/e2e
env_type: Indoor
env_name: indoor_long
[simulation_params]
load_data: False
load_data_path: DeepNet/models/Tello_indoor/VanLeer/
[RL_params]
num_actions: 400
train_type: e2e
wait_before_train: 100
max_iters: 300000
buffer_len: 60000
batch_size: 32
epsilon_saturation: 200000
crash_thresh: 1.3
Q_clip: True
train_interval: 3
update_target_interval: 8000
gamma: 0.99
dropout_rate: 0.1
learning_rate: 1e-3
switch_env_steps: 3000
epsilon_model: exponential

59
configs/read_cfg.py Normal file
View File

@@ -0,0 +1,59 @@
# Author: Aqeel Anwar(ICSRL)
# Created: 9/20/2019, 12:43 PM
# Email: aqeel.anwar@gatech.edu
import configparser as cp
from dotmap import DotMap
def read_cfg(config_filename = 'configs/main.cfg', verbose = False):
# Load from config file
cfg = DotMap()
config = cp.ConfigParser()
config.read(config_filename)
cfg.run_name = config.get('general_params', 'run_name')
if str(config.get('general_params', 'custom_load')) =='True':
cfg.custom_load = True
else:
cfg.custom_load = False
cfg.custom_load_path = str(config.get('general_params', 'custom_load_path'))
cfg.env_type = config.get('general_params', 'env_type')
cfg.env_name = config.get('general_params', 'env_name')
# [Simulation Parameters]
if str(config.get('simulation_params', 'load_data')) =='True':
cfg.load_data = True
else:
cfg.load_data = False
cfg.load_data_path = str(config.get('simulation_params', 'load_data_path'))
# [RL Parameters]
cfg.num_actions = int(config.get('RL_params', 'num_actions').split(',')[0])
cfg.train_type = config.get('RL_params', 'train_type')
cfg.wait_before_train = int(config.get('RL_params', 'wait_before_train').split(',')[0])
cfg.max_iters = int(config.get('RL_params', 'max_iters').split(',')[0])
cfg.buffer_len = int(config.get('RL_params', 'buffer_len').split(',')[0])
cfg.batch_size = int(config.get('RL_params', 'batch_size').split(',')[0])
cfg.epsilon_saturation = int(config.get('RL_params', 'epsilon_saturation').split(',')[0])
cfg.crash_thresh = float(config.get('RL_params', 'crash_thresh').split(',')[0])
cfg.gamma = float(config.get('RL_params', 'gamma').split(',')[0])
cfg.dropout_rate = float(config.get('RL_params', 'dropout_rate').split(',')[0])
cfg.lr = float(config.get('RL_params', 'learning_rate').split(',')[0])
cfg.switch_env_steps = int(config.get('RL_params', 'switch_env_steps').split(',')[0])
cfg.epsilon_model = config.get('RL_params', 'epsilon_model')
cfg.Q_clip = bool(config.get('RL_params', 'Q_clip'))
cfg.train_interval = int(config.get('RL_params', 'train_interval').split(',')[0])
cfg.update_target_interval = int(config.get('RL_params', 'update_target_interval').split(',')[0])
if verbose:
print('------------------------------ Config File ------------------------------')
for param in cfg:
spaces = ' '*(30-len(param))
print(param+':'+spaces + str(cfg[param]))
# print('-------------------------------------------------------------------------')
print()
return cfg

View File

@@ -0,0 +1,149 @@
import numpy as np
import airsim
def indoor_meta():
orig_ip = [ #x, y, theta in DEGREES
# One - Pyramid
[-21593, -1563, -45], # Player Start
[-22059, -2617, -45],
[-22800, -3489, 90],
# Two - FrogEyes
[-15744, -1679, 0],
[-15539, -3043, 180],
[-13792, -3371, 90],
# Three - UpDown
[-11221, -3171, 180],
[-9962, -3193, 0],
[-7464, -4558, 90],
# Four - Long
[-649, -4287, 180], # Player Start
[-4224, -2601, 180],
[1180, -2153, -90],
# Five - VanLeer
[6400, -4731, 90], # Player Start
[5992, -2736, 180],
[8143, -2835, -90],
# Six - Complex_Indoor
[11320, -2948, 0],
[12546, -3415, -180],
[10809, -2106, 0],
# Seven - Techno
[19081, -8867, 0],
[17348, -3864, -120],
[20895, -4757, 30],
# Eight - GT
[26042, -4336, 180],
[26668, -3070, 0],
[27873, -2792, -135]
]# x, y, theta
level_name = [
'Pyramid1', 'Pyramid2', 'Pyramid3',
'FrogEyes1', 'FrogEyes2', 'FrogEyes3',
'UpDown1', 'UpDown2', 'UpDown3',
'Long1', 'Long2', 'Long3',
'VanLeer1', 'VanLeer2', 'VanLeer3',
'ComplexIndoor1', 'ComplexIndoor2', 'ComplexIndoor3',
'Techno1', 'Techno2', 'Techno3',
'GT1', 'GT2', 'GT3',
]
crash_threshold = 0.07
initZ = -1
return orig_ip, level_name, crash_threshold, initZ
# Test condo indoor initial positions
def indoor_condo():
orig_ip = [
[-290, -1700, 0], # Player start
[580, 1200, 180],
[-240, -500, 90]
]
level_name = ['Condo1', 'Condo2', 'Condo3']
crash_threshold = 0.07
initZ = -2
return orig_ip, level_name, crash_threshold, initZ
def indoor_techno():
orig_ip = [
[19081, -8867, 0],
[17348, -3864, -120],
[20895, -4757, 30]
]
level_name = ['Techno1', 'Techno2', 'Techno3']
crash_threshold = 0.07
initZ = -2
return orig_ip, level_name, crash_threshold, initZ
def indoor_long():
orig_ip = [
[-649, -4287, 180], # Player Start
[-4224, -2601, 180],
[1180, -2153, -90],
[2058, -3184, 50],
[1644,-1464, 15],
[-3754, -4302, 0]
]
level_name = ['Long1', 'Long2', 'Long3', 'Long4', 'Long5', 'Long6']
crash_threshold = 0.07
initZ = -1
return orig_ip, level_name, crash_threshold, initZ
# Four - Long
# Seven - Techno
def indoor_twisty():
orig_ip = [
[127, -770, 90], # Player start
[2209, -115, 180],
[1680, 1590, -145]
]
level_name = ['Twisty1', 'Twisty2', 'Twisty3']
crash_threshold = 0.07
initZ = -2
return orig_ip, level_name, crash_threshold, initZ
def indoor_pretzel():
orig_ip = [
[3308, 650, 180], # Player start
[3330, -200, -160],
[1480, -1040, 25]
]
level_name = ['Cloud1', 'Cloud2', 'Cloud3']
crash_threshold = 0.07
initZ = -1.5
return orig_ip, level_name, crash_threshold, initZ
def initial_positions(name):
name = name+'()'
orig_ip, level_name, crash_threshold, initZ = eval(name)
player_start_unreal=orig_ip[0]
reset_array = []
for i in range(0, len(orig_ip)):
x1 = (orig_ip[i][0]-player_start_unreal[0])/100
y1 = (orig_ip[i][1]-player_start_unreal[1])/100
z1 = initZ
pitch = 0
roll = 0
yaw = orig_ip[i][2]*np.pi/180
pp = airsim.Pose(airsim.Vector3r(x1, y1, z1), airsim.to_quaternion(pitch, roll, yaw))
reset_array.append(pp)
return reset_array, level_name, crash_threshold, initZ

282
main.py Normal file
View File

@@ -0,0 +1,282 @@
# Branch - DFA Implementation
import sys
from network.agent import DeepAgent
from environments.initial_positions import *
import os
import psutil
from os import getpid
from network.Memory import Memory
from aux_functions import *
from configs.read_cfg import read_cfg
# Debug message suppressed
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
process = psutil.Process(getpid())
cfg = read_cfg(config_filename='configs/config.cfg', verbose=True)
input_size = 227
num_actions = cfg.num_actions
screen = pygame_connect(H=925, W=380)
dropout_rate = cfg.dropout
env_type=cfg.env_type
env_name = cfg.env_name
train_type = cfg.train_type # 'e2e', 'last4', 'last3', 'last2'
weights_type = 'Imagenet'
reset_array, level_name, crash_threshold, initZ = initial_positions(env_name)
epsilon_annealing = cfg.epsilon_saturation
wait_before_train = cfg.wait_before_train
train_interval=cfg.train_interval
max_iters = cfg.max_iters
gamma = cfg.gamma
update_target = cfg.update_target_interval
buffer_length = cfg.buffer_len
ReplayMemory = Memory(cfg.buffer_len)
switch_env_steps = cfg.switch_env_steps
batch_size=cfg.batch_size
Q_clip=cfg.Q_clip
custom_load=cfg.custom_load
custom_load_path = cfg.custom_load_path
lr = cfg.lr
epsilon = cfg.epsilon
epsilon_model = cfg.epsilon_model
# Save the network to the directory network_path
if custom_load == True:
network_path = 'models/trained/' + env_type + '/' + env_name + '/' + 'CustomLoad/' + train_type + '/'+ train_type
else:
network_path = 'models/trained/' + '/' + env_type + '/' + env_name + '/' + weights_type + '/' + train_type + '/'+ train_type
if not os.path.exists(network_path):
os.makedirs(network_path)
# Connect to Unreal Engine and get the drone handle: client
client, old_posit = connect_drone()
# Define DQN agents
agent = DeepAgent(input_size, num_actions, client, env_type,train_type,network_path, name='DQN')
target_agent = DeepAgent(input_size, num_actions, client, env_type,train_type, network_path, name='Target')
# Load custom weights from custom_load_path if required
if custom_load==True:
print('Loading weights from: ', custom_load_path)
agent.load_network(custom_load_path)
target_agent.load_network(custom_load_path)
iter = 0
num_col1 = 0
epi1 = 0
active = True
action_type = 'Wait_for_expert'
automate = True
epsilon_greedy = True
choose=False
print_qval=False
last_crash1=0
environ=True
e1 =0
e2 = 0
ret = 0
dist = 0
switch_env=False
save_posit = old_posit
level_state = [None]*len(level_name)
level_posit = [None]*len(level_name)
last_crash_array = np.zeros(shape=len(level_name), dtype=np.int32)
ret_array = np.zeros(shape=len(level_name))
dist_array = np.zeros(shape=len(level_name))
epi_env_array = np.zeros(shape=len(level_name), dtype=np.int32)
level = 0
times_switch = 0
curr_state1 = agent.get_state()
i = 0
log_path = network_path+'log.txt'
f = open(log_path, 'w')
while active:
try:
active, automate, lr, client = check_user_input(active, automate, lr, epsilon, agent, network_path, client, old_posit, initZ)
if automate:
start_time = time.time()
if switch_env:
posit1_old = client.simGetVehiclePose()
times_switch=times_switch+1
level_state[level] = curr_state1
level_posit[level] = posit1_old
last_crash_array[level] = last_crash1
ret_array[level] = ret
dist_array[level] = dist
epi_env_array[int(level/3)] = epi1
level = (level + 1) % len(reset_array)
print('Transferring to level: ', level ,' - ', level_name[level])
if times_switch < len(reset_array):
reset_to_initial(level, reset_array, client)
else:
curr_state1 = level_state[level]
posit1_old = level_posit[level]
reset_to_initial(level, reset_array, client)
client.simSetVehiclePose(posit1_old, ignore_collison=True)
time.sleep(0.1)
last_crash1 = last_crash_array[level]
ret = ret_array[level]
dist = dist_array[level]
epi1 = epi_env_array[int(level/3)]
xxx = client.simGetVehiclePose()
environ = environ^True
action1, action_type1, epsilon, qvals = policy(epsilon, curr_state1, iter, epsilon_annealing, epsilon_model, wait_before_train, num_actions, agent)
action_word1 = translate_action(action1, num_actions)
# Take the action
agent.take_action(action1, num_actions)
time.sleep(0.05)
posit = client.simGetVehiclePose()
new_state1 = agent.get_state()
new_depth1, thresh = agent.get_depth()
# Get GPS information
posit = client.simGetVehiclePose()
orientation = posit.orientation
position = posit.position
old_p = np.array([old_posit.position.x_val, old_posit.position.y_val])
new_p = np.array([position.x_val, position.y_val])
# calculate distance
dist = dist + np.linalg.norm(new_p - old_p)
old_posit = posit
reward1, crash1 = agent.reward_gen(new_depth1, action1, crash_threshold, thresh)
ret = ret+reward1
agent_state1 = agent.GetAgentState()
if agent_state1.has_collided:
# if car_state.collision.object_id==77:
num_col1 = num_col1 + 1
print('crash')
crash1 = True
reward1 = -1
data_tuple=[]
data_tuple.append([curr_state1, action1, new_state1, reward1, crash1])
err = get_errors(data_tuple, choose, ReplayMemory, input_size, agent, target_agent, gamma, Q_clip)
ReplayMemory.add(err, data_tuple)
# Train if sufficient frames have been stored
if iter > wait_before_train:
if iter%train_interval==0:
# Train the RL network
old_states, Qvals, actions, err, idx = minibatch_double(data_tuple, batch_size, choose, ReplayMemory, input_size, agent, target_agent, gamma, Q_clip)
for i in range(batch_size):
ReplayMemory.update(idx[i], err[i])
if print_qval:
print(Qvals)
if choose:
# Double-DQN
target_agent.train_n(old_states, Qvals, actions, batch_size, dropout_rate, lr, epsilon, iter)
else:
agent.train_n(old_states, Qvals,actions, batch_size, dropout_rate, lr, epsilon, iter)
if iter % update_target == 0:
agent.take_action([-1], num_actions)
print('Switching Target Network')
choose = not choose
agent.save_network(network_path)
iter += 1
time_exec = time.time()-start_time
VC = ''
if environ:
e1 = e1+1
e_print=e1
else:
e2 = e2+1
e_print = e2
# init_p = epi1%len(init_pose_array)
mem_percent = process.memory_info()[0]/2.**30
s_log = 'Level :{:>2d}: Iter: {:>6d}/{:<5d} {:<8s}-{:>5s} Eps: {:<1.4f} lr: {:>1.8f} Ret = {:<+6.4f} Last Crash = {:<5d} t={:<1.3f} Mem = {:<5.4f} Reward: {:<+1.4f} '.format(
int(level),iter, epi1,
action_word1,
action_type1,
epsilon,
lr,
ret,
last_crash1,
time_exec,
mem_percent,
reward1)
print(s_log)
f.write(s_log+'\n')
last_crash1=last_crash1+1
if crash1:
agent.return_plot(ret, epi1, int(level/3), mem_percent, iter, dist)
ret=0
dist=0
epi1 = epi1 + 1
last_crash1=0
reset_to_initial(level, reset_array, client)
time.sleep(0.2)
curr_state1 =agent.get_state()
else:
curr_state1 = new_state1
if iter%switch_env_steps==0:
switch_env=True
else:
switch_env=False
if iter% max_iters==0:
automate=False
# if iter >140:
# active=False
except Exception as e:
print('------------- Error -------------')
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno)
print(exc_obj)
automate = False
print('Hit r and then backspace to start from this point')

36
network/Memory.py Normal file
View File

@@ -0,0 +1,36 @@
# Code used from https://github.com/rlcode/per
from network.SumTree import SumTree
import random
#-------------------- MEMORY --------------------------
class Memory: # stored as ( s, a, r, s_ ) in SumTree
e = 0.01
a = 0.6
def __init__(self, capacity):
self.tree = SumTree(capacity)
def _getPriority(self, error):
return (error + self.e) ** self.a
def add(self, error, sample):
p = self._getPriority(error)
self.tree.add(p, sample)
def sample(self, n):
batch = []
segment = self.tree.total() / n
for i in range(n):
a = segment * i
b = segment * (i + 1)
s = random.uniform(a, b)
(idx, p, data) = self.tree.get(s)
batch.append( (idx, data) )
return batch
def update(self, idx, error):
p = self._getPriority(error)
self.tree.update(idx, p)

54
network/SumTree.py Normal file
View File

@@ -0,0 +1,54 @@
import numpy
class SumTree:
write = 0
def __init__(self, capacity):
self.capacity = capacity
self.tree = numpy.zeros( 2*capacity - 1 )
self.data = numpy.zeros( capacity, dtype=object )
def _propagate(self, idx, change):
parent = (idx - 1) // 2
self.tree[parent] += change
if parent != 0:
self._propagate(parent, change)
def _retrieve(self, idx, s):
left = 2 * idx + 1
right = left + 1
if left >= len(self.tree):
return idx
if s <= self.tree[left]:
return self._retrieve(left, s)
else:
return self._retrieve(right, s-self.tree[left])
def total(self):
return self.tree[0]
def add(self, p, data):
idx = self.write + self.capacity - 1
self.data[self.write] = data
self.update(idx, p)
self.write += 1
if self.write >= self.capacity:
self.write = 0
def update(self, idx, p):
change = p - self.tree[idx]
self.tree[idx] = p
self._propagate(idx, change)
def get(self, s):
idx = self._retrieve(0, s)
dataIdx = idx - self.capacity + 1
return (idx, self.tree[idx], self.data[dataIdx])

375
network/agent.py Normal file
View File

@@ -0,0 +1,375 @@
import numpy as np
import os
import tensorflow as tf
import cv2
from network.network import *
import airsim
import random
import matplotlib.pyplot as plt
from util.transformations import euler_from_quaternion
from PIL import Image
from network.loss_functions import *
from numpy import linalg as LA
class DeepAgent():
def __init__(self, input_size, num_actions, client, env_type, train_fc, network_path, name):
print('------------------------------ ' +str(name)+ ' ------------------------------')
self.g = tf.Graph()
self.iter=0
with self.g.as_default():
self.stat_writer = tf.summary.FileWriter(network_path+'return_plot')
# name_array = 'D:/train/loss'+'/'+name
self.loss_writer = tf.summary.FileWriter(network_path+'loss/'+name)
self.env_type=env_type
self.client=client
self.input_size = input_size
self.num_actions = num_actions
#Placeholders
self.batch_size = tf.placeholder(tf.int32, shape=())
self.learning_rate = tf.placeholder(tf.float32, shape=())
self.X1 = tf.placeholder(tf.float32, [None, input_size, input_size, 3], name='States')
#self.X = tf.image.resize_images(self.X1, (227, 227))
self.X = tf.map_fn(lambda frame: tf.image.per_image_standardization(frame), self.X1)
self.target = tf.placeholder(tf.float32, shape = [None], name='Qvals')
self.actions= tf.placeholder(tf.int32, shape = [None], name='Actions')
initial_weights ='imagenet'
initial_weights = 'models/weights/weights.npy'
self.model = AlexNetDuel(self.X, num_actions, train_fc)
self.predict = self.model.output
ind = tf.one_hot(self.actions, num_actions)
pred_Q = tf.reduce_sum(tf.multiply(self.model.output, ind), axis=1)
self.loss = huber_loss(pred_Q, self.target)
self.train = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.99).minimize(self.loss, name="train")
self.sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
tf.local_variables_initializer().run()
self.saver = tf.train.Saver()
self.sess.graph.finalize()
print()
def Q_val(self, xs):
target = np.zeros(shape=[xs.shape[0]], dtype=np.float32)
actions = np.zeros(dtype=int, shape=[xs.shape[0]])
return self.sess.run(self.predict,
feed_dict={self.batch_size: xs.shape[0], self.learning_rate: 0, self.X1: xs,
self.target: target, self.actions:actions})
def train_n(self, xs, ys,actions, batch_size, dropout_rate, lr, epsilon, iter):
# loss = self.sess.run(self.loss,
# feed_dict={self.batch_size: batch_size, self.dropout_rate: dropout_rate, self.learning_rate: lr, self.X: xs,
# self.Y: ys, self.actions:actions})
_, loss, Q = self.sess.run([self.train,self.loss, self.predict],
feed_dict={self.batch_size: batch_size, self.learning_rate: lr, self.X1: xs,
self.target: ys, self.actions: actions})
meanQ = np.mean(Q)
maxQ = np.max(Q)
summary = tf.Summary()
# summary.value.add(tag='Loss', simple_value=LA.norm(loss[ind, actions.astype(int)]))
summary.value.add(tag='Loss', simple_value=LA.norm(loss)/batch_size)
self.loss_writer.add_summary(summary, iter)
summary = tf.Summary()
summary.value.add(tag='Epsilon', simple_value=epsilon)
self.loss_writer.add_summary(summary, iter)
summary = tf.Summary()
summary.value.add(tag='Learning Rate', simple_value=lr)
self.loss_writer.add_summary(summary, iter)
summary = tf.Summary()
summary.value.add(tag='MeanQ', simple_value=meanQ)
self.loss_writer.add_summary(summary, iter)
summary = tf.Summary()
summary.value.add(tag='MaxQ', simple_value=maxQ)
self.loss_writer.add_summary(summary, iter)
# return _correct
def action_selection(self, state):
target = np.zeros(shape=[state.shape[0]], dtype=np.float32)
actions = np.zeros(dtype=int, shape=[state.shape[0]])
qvals= self.sess.run(self.predict,
feed_dict={self.batch_size: state.shape[0], self.learning_rate: 0.0001,
self.X1: state,
self.target: target, self.actions:actions})
if qvals.shape[0]>1:
# Evaluating batch
action = np.argmax(qvals, axis=1)
else:
# Evaluating one sample
action = np.zeros(1)
action[0]=np.argmax(qvals)
# self.action_array[action[0].astype(int)]+=1
return action.astype(int)
def take_action(self, action, num_actions):
# Set Paramaters
fov_v = 45 * np.pi / 180
fov_h = 80 * np.pi / 180
r = 0.4
ignore_collision = False
sqrt_num_actions = np.sqrt(num_actions)
posit = self.client.simGetVehiclePose()
pos = posit.position
orientation = posit.orientation
quat = (orientation.w_val, orientation.x_val, orientation.y_val, orientation.z_val)
eulers = euler_from_quaternion(quat)
alpha = eulers[2]
theta_ind = int(action[0] / sqrt_num_actions)
psi_ind = action[0] % sqrt_num_actions
theta = fov_v/sqrt_num_actions * (theta_ind - (sqrt_num_actions - 1) / 2)
psi = fov_h / sqrt_num_actions * (psi_ind - (sqrt_num_actions - 1) / 2)
noise_theta = (fov_v / sqrt_num_actions) / 6
noise_psi = (fov_h / sqrt_num_actions) / 6
psi = psi + random.uniform(-1, 1)*noise_psi
theta = theta + random.uniform(-1, 1)*noise_theta
# print('Theta: ', theta * 180 / np.pi, end='')
# print(' Psi: ', psi * 180 / np.pi)
x = pos.x_val + r * np.cos(alpha + psi)
y = pos.y_val + r * np.sin(alpha + psi)
z = pos.z_val + r * np.sin(theta) # -ve because Unreal has -ve z direction going upwards
self.client.simSetVehiclePose(airsim.Pose(airsim.Vector3r(x, y, z), airsim.to_quaternion(0, 0, alpha + psi)),
ignore_collison=ignore_collision)
def get_depth(self):
responses = self.client.simGetImages([airsim.ImageRequest(2, airsim.ImageType.DepthVis, False, False)])
depth = []
img1d = np.fromstring(responses[0].image_data_uint8, dtype=np.uint8)
depth = img1d.reshape(responses[0].height, responses[0].width, 4)[:, :, 0]
# To make sure the wall leaks in the unreal environment doesn't mess up with the reward function
thresh = 50
super_threshold_indices = depth > thresh
depth[super_threshold_indices] = thresh
depth = depth / thresh
# plt.imshow(depth)
# # plt.gray()
# plt.show()
return depth, thresh
def get_state(self):
responses1 = self.client.simGetImages([ # depth visualization image
airsim.ImageRequest("1", airsim.ImageType.Scene, False,
False)]) # scene vision image in uncompressed RGBA array
response = responses1[0]
img1d = np.fromstring(response.image_data_uint8, dtype=np.uint8) # get numpy array
img_rgba = img1d.reshape(response.height, response.width, 4)
img = Image.fromarray(img_rgba)
img_rgb = img.convert('RGB')
self.iter = self.iter+1
state = np.asarray(img_rgb)
state = cv2.resize(state, (self.input_size, self.input_size), cv2.INTER_LINEAR)
state = cv2.normalize(state, state, 0, 1, cv2.NORM_MINMAX, cv2.CV_32F)
state_rgb = []
state_rgb.append(state[:, :, 0:3])
state_rgb = np.array(state_rgb)
state_rgb = state_rgb.astype('float32')
return state_rgb
def avg_depth(self, depth_map1, thresh):
# Version 0.2
# Thresholded depth map to ignore objects too far and give them a constant value
# Globally (not locally as in the version 0.1) Normalise the thresholded map between 0 and 1
# Threshold depends on the environment nature (indoor/ outdoor)
depth_map = depth_map1
# L1=0
# R1=0
# C1=0
# print(global_depth)
# dynamic_window = False
plot_depth = False
global_depth = np.mean(depth_map)
n = global_depth*thresh/3
# print("n=", n)
# n = 3
H = np.size(depth_map, 0)
W = np.size(depth_map, 1)
grid_size = (np.array([H, W]) / n)
# scale by 0.9 to select the window towards top from the mid line
h = int(0.9 * H * (n - 1) / (2 * n))
w = int(W * (n - 1) / (2 * n))
grid_location = [h, w]
x1 = int(round(grid_location[0]))
y = int(round(grid_location[1]))
a4 = int(round(grid_location[0] + grid_size[0]))
a5 = int(round(grid_location[0] + grid_size[0]))
b5 = int(round(grid_location[1] + grid_size[1]))
a2 = int(round(grid_location[0] - grid_size[0]))
b2 = int(round(grid_location[1] + grid_size[1]))
a8 = int(round(grid_location[0] + 2 * grid_size[0]))
b8 = int(round(grid_location[1] + grid_size[1]))
b4 = int(round(grid_location[1] - grid_size[1]))
if b4 < 0:
b4 = 0
a6 = int(round(grid_location[0] + grid_size[0]))
b6 = int(round(grid_location[1] + 2 * grid_size[1]))
if b6 > 640:
b6 = 640
# L = 1 / np.min(depth_map[x1:a4, b4:y])
# C = 1 / np.min(depth_map[x1:a5, y:b5])
# R = 1 / np.min(depth_map[x1:a6, b5:b6])
fract_min = 0.05
L_map = depth_map[x1:a4, b4:y]
C_map = depth_map[x1:a5, y:b5]
R_map = depth_map[x1:a6, b5:b6]
L_sort = np.sort(L_map.flatten())
end_ind = int(np.round(fract_min * len(L_sort)))
L1 = np.mean(L_sort[0:end_ind])
R_sort = np.sort(R_map.flatten())
end_ind = int(np.round(fract_min * len(R_sort)))
R1 = np.mean(R_sort[0:end_ind])
C_sort = np.sort(C_map.flatten())
end_ind = int(np.round(fract_min * len(C_sort)))
C1 = np.mean(C_sort[0:end_ind])
if plot_depth:
cv2.rectangle(depth_map1, (y, x1), (b5, a5), (0, 0, 0), 3)
cv2.rectangle(depth_map1, (y, x1), (b4, a4), (0, 0, 0), 3)
cv2.rectangle(depth_map1, (b5, x1), (b6, a6), (0, 0, 0), 3)
dispL = str(np.round(L1, 3))
dispC = str(np.round(C1, 3))
dispR = str(np.round(R1, 3))
cv2.putText(depth_map1, dispL, (20, 75), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0))
cv2.putText(depth_map1, dispC, (110, 75), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0))
cv2.putText(depth_map1, dispR, (200, 75), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0))
plt.imshow(depth_map1)
plt.show()
xxx = 1
# time.sleep(0.7)
#
xxxxx = 1
# print(L1, C1, R1)
return L1, C1, R1
def reward_gen(self, d_new, action, crash_threshold, thresh):
L_new, C_new, R_new = self.avg_depth(d_new, thresh)
# print('Rew_C', C_new)
# print(L_new, C_new, R_new)
# For now, lets keep the reward a simple one
if C_new < crash_threshold:
done = True
reward = -1
else:
done = False
if action == 0:
reward = C_new
else:
# reward = C_new/3
reward = C_new
# if action != 0:
# reward = 0
return reward, done
def GetAgentState(self):
return self.client.simGetCollisionInfo()
def return_plot(self, ret, epi, env_type, mem_percent, iter, dist):
# ret, epi1, int(level/4), mem_percent, iter
summary = tf.Summary()
tag = 'Return'
summary.value.add(tag=tag, simple_value=ret)
self.stat_writer.add_summary(summary, epi)
summary = tf.Summary()
summary.value.add(tag='Memory-GB', simple_value=mem_percent)
self.stat_writer.add_summary(summary, iter)
summary = tf.Summary()
summary.value.add(tag='Safe Flight', simple_value=dist)
self.stat_writer.add_summary(summary, epi)
def save_network(self, save_path):
self.saver.save(self.sess, save_path)
def save_weights(self, save_path):
name = ['conv1W', 'conv1b', 'conv2W', 'conv2b', 'conv3W', 'conv3b', 'conv4W', 'conv4b', 'conv5W', 'conv5b',
'fc6aW', 'fc6ab', 'fc7aW', 'fc7ab', 'fc8aW', 'fc8ab', 'fc9aW', 'fc9ab', 'fc10aW', 'fc10ab',
'fc6vW', 'fc6vb', 'fc7vW', 'fc7vb', 'fc8vW', 'fc8vb', 'fc9vW', 'fc9vb', 'fc10vW', 'fc10vb'
]
weights = {}
print('Saving weights in .npy format')
for i in range(0, 30):
# weights[name[i]] = self.sess.run(self.sess.graph._collections['variables'][i])
if i==0:
str1 = 'Variable:0'
else:
str1 = 'Variable_'+str(i)+':0'
weights[name[i]] = self.sess.run(str1)
save_path = save_path+'weights.npy'
np.save(save_path, weights)
def load_network(self, load_path):
self.saver.restore(self.sess, load_path)
def get_weights(self):
xs=np.zeros(shape=(32, 227,227,3))
actions = np.zeros(dtype=int, shape=[xs.shape[0]])
ys = np.zeros(shape=[xs.shape[0]], dtype=np.float32)
return self.sess.run(self.weights,
feed_dict={self.batch_size: xs.shape[0], self.learning_rate: 0,
self.X1: xs,
self.target: ys, self.actions:actions})

16
network/loss_functions.py Normal file
View File

@@ -0,0 +1,16 @@
# Author: Aqeel Anwar(ICSRL)
# Created: 2/22/2019, 4:57 PM
# Email: aqeel.anwar@gatech.edu
import numpy as np
import tensorflow as tf
def huber_loss(X,Y):
err = X-Y
loss = tf.where(tf.abs(err) < 1.0, 0.5 * tf.square(err), tf.abs(err) - 0.5)
loss = tf.reduce_sum(loss)
return loss
def mse_loss(X,Y):
err=X-Y
return tf.reduce_sum(tf.square(err))

343
network/network.py Normal file
View File

@@ -0,0 +1,343 @@
import tensorflow as tf
import numpy as np
from network.loss_functions import huber_loss
class AlexNetDuel(object):
def __init__(self, x, num_actions, train_type):
self.x = x
weights_path = 'models/imagenet.npy'
weights = np.load(open(weights_path, "rb"), encoding="latin1").item()
print('Loading imagenet weights for the conv layers and random for fc layers')
train_conv = True
train_fc6 = True
train_fc7 = True
train_fc8 = True
train_fc9 = True
if train_type == 'last4':
train_conv = False
train_fc6 = False
elif train_type == 'last3':
train_conv = False
train_fc6 = False
train_fc7 = False
elif train_type == 'last2':
train_conv = False
train_fc6 = False
train_fc7 = False
train_fc8 = False
self.conv1 = self.conv(self.x, weights["conv1"][0], weights["conv1"][1], k=11, out=96, s=4, p="VALID",trainable=train_conv)
self.maxpool1 = tf.nn.max_pool(self.conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
self.conv2 = self.conv(self.maxpool1, weights["conv2"][0], weights["conv2"][1], k=5, out=256, s=1, p="SAME",trainable=train_conv)
self.maxpool2 = tf.nn.max_pool(self.conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
self.conv3 = self.conv(self.maxpool2, weights["conv3"][0], weights["conv3"][1], k=3, out=384, s=1, p="SAME",trainable=train_conv)
self.conv4 = self.conv(self.conv3, weights["conv4"][0], weights["conv4"][1], k=3, out=384, s=1, p="SAME",trainable=train_conv)
self.conv5 = self.conv(self.conv4, weights["conv5"][0], weights["conv5"][1], k=3, out=256, s=1, p="SAME",trainable=train_conv)
self.maxpool5 = tf.nn.max_pool(self.conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
self.flat = tf.contrib.layers.flatten(self.maxpool5)
# Advantage Network
self.fc6_a = self.FullyConnected(self.flat, units_in=9216, units_out=2048, act='relu', trainable=train_fc6)
self.fc7_a = self.FullyConnected(self.fc6_a, units_in=2048, units_out=1024, act='relu', trainable=train_fc7)
self.fc8_a = self.FullyConnected(self.fc7_a, units_in=1024, units_out=1024, act='relu', trainable=train_fc8)
self.fc9_a = self.FullyConnected(self.fc8_a, units_in=1024, units_out=512, act='relu', trainable=train_fc9)
self.fc10_a = self.FullyConnected(self.fc9_a, units_in=512, units_out=num_actions, act='linear', trainable=True)
# Value Network
self.fc6_v = self.FullyConnected(self.flat, units_in=9216, units_out=2048, act='relu', trainable=train_fc6)
self.fc7_v = self.FullyConnected(self.fc6_v, units_in=2048, units_out=1024, act='relu', trainable=train_fc7)
self.fc8_v = self.FullyConnected(self.fc7_v, units_in=1024, units_out=1024, act='relu', trainable=train_fc8)
self.fc9_v = self.FullyConnected(self.fc8_v, units_in=1024, units_out=512, act='relu', trainable=train_fc9)
self.fc10_v = self.FullyConnected(self.fc9_v, units_in=512, units_out=1, act='linear', trainable=True)
self.output = self.fc10_v + tf.subtract(self.fc10_a, tf.reduce_mean(self.fc10_a, axis=1, keep_dims=True))
def conv(self, input, W, b, k, out, s, p, trainable=True):
assert (W.shape[0] == k)
assert (W.shape[1] == k)
assert (W.shape[3] == out)
conv_kernel_1 = tf.nn.conv2d(input, tf.Variable(W, trainable), [1, s, s, 1], padding=p)
bias_layer_1 = tf.nn.bias_add(conv_kernel_1, tf.Variable(b, trainable))
return tf.nn.relu(bias_layer_1)
def FullyConnected(self, input, units_in, units_out, act, trainable=True):
W = tf.Variable(tf.truncated_normal(shape=(units_in, units_out), stddev=0.05), trainable=trainable)
b = tf.Variable(tf.truncated_normal(shape=[units_out], stddev=0.05), trainable=trainable)
if act == 'relu':
return tf.nn.relu_layer(input, W,b)
elif act == 'linear':
return tf.nn.xw_plus_b(input, W, b)
else:
assert (1 == 0)
class AlexNetConditional(object):
def __init__(self, x, num_actions, train_type):
self.x = x
weights_path = 'models/imagenet.npy'
weights = np.load(open(weights_path, "rb"), encoding="latin1").item()
print('Loading imagenet weights for the conv layers and random for fc layers')
train_conv = True
train_fc6 = True
train_fc7 = True
train_fc8 = True
train_fc9 = True
if train_type == 'last4':
train_conv = False
train_fc6 = False
elif train_type == 'last3':
train_conv = False
train_fc6 = False
train_fc7 = False
elif train_type == 'last2':
train_conv = False
train_fc6 = False
train_fc7 = False
train_fc8 = False
self.conv1 = self.conv(self.x, weights["conv1"][0], weights["conv1"][1], k=11, out=96, s=4, p="VALID",trainable=train_conv)
self.maxpool1 = tf.nn.max_pool(self.conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
self.conv2 = self.conv(self.maxpool1, weights["conv2"][0], weights["conv2"][1], k=5, out=256, s=1, p="SAME",trainable=train_conv)
self.maxpool2 = tf.nn.max_pool(self.conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
self.conv3 = self.conv(self.maxpool2, weights["conv3"][0], weights["conv3"][1], k=3, out=384, s=1, p="SAME",trainable=train_conv)
# Divide the network stream from this point onwards
# One - Main Network
self.conv4_main = self.conv(self.conv3, weights["conv4"][0], weights["conv4"][1], k=3, out=384, s=1, p="SAME",trainable=train_conv)
self.conv5_main = self.conv(self.conv4_main, weights["conv5"][0], weights["conv5"][1], k=3, out=256, s=1, p="SAME",trainable=train_conv)
self.maxpool5_main = tf.nn.max_pool(self.conv5_main, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
self.flat_main = tf.contrib.layers.flatten(self.maxpool5_main)
# Advantage Network
self.fc6_a_main = self.FullyConnected(self.flat_main, units_in=9216, units_out=4096, act='relu', trainable=train_fc6)
self.fc7_a_main = self.FullyConnected(self.fc6_a_main, units_in=4096, units_out=2048, act='relu', trainable=train_fc7)
self.fc8_a_main = self.FullyConnected(self.fc7_a_main, units_in=2048, units_out=num_actions, act='linear', trainable=train_fc8)
# Value Network
self.fc6_v_main = self.FullyConnected(self.flat_main, units_in=9216, units_out=4096, act='relu', trainable=train_fc6)
self.fc7_v_main = self.FullyConnected(self.fc6_v_main, units_in=4096, units_out=2048, act='relu', trainable=train_fc7)
self.fc8_v_main = self.FullyConnected(self.fc7_v_main, units_in=2048, units_out=1, act='linear', trainable=True)
self.output_main = self.fc8_v_main + tf.subtract(self.fc8_a_main, tf.reduce_mean(self.fc8_a_main, axis=1, keep_dims=True))
# Two - Conditional Network
conv4_cdl_k = np.random.rand(3, 3, 384, 256).astype(np.float32)
conv4_cdl_b = np.random.rand(256).astype(np.float32)
self.conv4_cdl = self.conv(self.conv3, conv4_cdl_k, conv4_cdl_b, k=3, out=256, s=1, p="SAME",trainable=train_conv)
self.maxpool4_cdl = tf.nn.max_pool(self.conv4_cdl, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
self.flat_cdl = tf.contrib.layers.flatten(self.maxpool4_cdl)
# Advantage Network
self.fc6_a_cdl = self.FullyConnected(self.flat_cdl, units_in=9216, units_out=2048, act='relu', trainable=train_fc6)
self.fc7_a_cdl = self.FullyConnected(self.fc6_a_cdl, units_in=2048, units_out=num_actions, act='linear',trainable=train_fc7)
# Value Network
self.fc6_v_cdl = self.FullyConnected(self.flat_cdl, units_in=9216, units_out=2048, act='relu', trainable=train_fc6)
self.fc7_v_cdl = self.FullyConnected(self.fc6_v_cdl, units_in=2048, units_out=1, act='linear',trainable=train_fc7)
self.output_cdl = self.fc7_v_cdl + tf.subtract(self.fc7_a_cdl,tf.reduce_mean(self.fc7_a_cdl, axis=1, keep_dims=True))
def conv(self, input, W, b, k, out, s, p, trainable=True):
assert (W.shape[0] == k)
assert (W.shape[1] == k)
assert (W.shape[3] == out)
conv_kernel_1 = tf.nn.conv2d(input, tf.Variable(W, trainable), [1, s, s, 1], padding=p)
bias_layer_1 = tf.nn.bias_add(conv_kernel_1, tf.Variable(b, trainable))
return tf.nn.relu(bias_layer_1)
def FullyConnected(self, input, units_in, units_out, act, trainable=True):
W = tf.Variable(tf.truncated_normal(shape=(units_in, units_out), stddev=0.05), trainable=trainable)
b = tf.Variable(tf.truncated_normal(shape=[units_out], stddev=0.05), trainable=trainable)
if act == 'relu':
return tf.nn.relu_layer(input, W,b)
elif act == 'linear':
return tf.nn.xw_plus_b(input, W, b)
else:
assert (1 == 0)
class AlexNetDuelPrune(object):
def __init__(self, x, num_actions, train_type):
self.x = x
# weights_path = 'models/imagenet.npy'
weights_path = 'models/prune_weights.npy'
weights = np.load(open(weights_path, "rb"), encoding="latin1").item()
print('Loading pruned weights for the conv layers and random for fc layers')
train_conv = True
train_fc6 = True
train_fc7 = True
train_fc8 = True
train_fc9 = True
if train_type == 'last4':
train_conv = False
train_fc6 = False
elif train_type == 'last3':
train_conv = False
train_fc6 = False
train_fc7 = False
elif train_type == 'last2':
train_conv = False
train_fc6 = False
train_fc7 = False
train_fc8 = False
self.conv1 = self.conv(self.x, weights["conv1W"], weights["conv1b"], k=11, out=64, s=4, p="VALID",trainable=train_conv)
self.maxpool1 = tf.nn.max_pool(self.conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
self.conv2 = self.conv(self.maxpool1, weights["conv2W"], weights["conv2b"], k=5, out=192, s=1, p="SAME",trainable=train_conv)
self.maxpool2 = tf.nn.max_pool(self.conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
self.conv3 = self.conv(self.maxpool2, weights["conv3W"],weights["conv3b"], k=3, out=288, s=1, p="SAME",trainable=train_conv)
self.conv4 = self.conv(self.conv3, weights["conv4W"], weights["conv4b"], k=3, out=288, s=1, p="SAME",trainable=train_conv)
self.conv5 = self.conv(self.conv4, weights["conv5W"], weights["conv5b"], k=3, out=256, s=1, p="SAME",trainable=train_conv)
self.maxpool5 = tf.nn.max_pool(self.conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
self.flat = tf.contrib.layers.flatten(self.maxpool5)
# Advantage Network
self.fc6_a = self.FullyConnected(self.flat, units_in=9216, units_out=2048, act='relu', trainable=train_fc6)
self.fc7_a = self.FullyConnected(self.fc6_a, units_in=2048, units_out=1024, act='relu', trainable=train_fc7)
self.fc8_a = self.FullyConnected(self.fc7_a, units_in=1024, units_out=1024, act='relu', trainable=train_fc8)
self.fc9_a = self.FullyConnected(self.fc8_a, units_in=1024, units_out=512, act='relu', trainable=train_fc9)
self.fc10_a = self.FullyConnected(self.fc9_a, units_in=512, units_out=num_actions, act='linear', trainable=True)
# Value Network
self.fc6_v = self.FullyConnected(self.flat, units_in=9216, units_out=2048, act='relu', trainable=train_fc6)
self.fc7_v = self.FullyConnected(self.fc6_v, units_in=2048, units_out=1024, act='relu', trainable=train_fc7)
self.fc8_v = self.FullyConnected(self.fc7_v, units_in=1024, units_out=1024, act='relu', trainable=train_fc8)
self.fc9_v = self.FullyConnected(self.fc8_v, units_in=1024, units_out=512, act='relu', trainable=train_fc9)
self.fc10_v = self.FullyConnected(self.fc9_v, units_in=512, units_out=1, act='linear', trainable=True)
self.output = self.fc10_v + tf.subtract(self.fc10_a, tf.reduce_mean(self.fc10_a, axis=1, keep_dims=True))
def conv(self, input, W, b, k, out, s, p, trainable=True):
assert (W.shape[0] == k)
assert (W.shape[1] == k)
assert (W.shape[3] == out)
conv_kernel_1 = tf.nn.conv2d(input, tf.Variable(W, trainable), [1, s, s, 1], padding=p)
bias_layer_1 = tf.nn.bias_add(conv_kernel_1, tf.Variable(b, trainable))
return tf.nn.relu(bias_layer_1)
def FullyConnected(self, input, units_in, units_out, act, trainable=True):
W = tf.Variable(tf.truncated_normal(shape=(units_in, units_out), stddev=0.05), trainable=trainable)
b = tf.Variable(tf.truncated_normal(shape=[units_out], stddev=0.05), trainable=trainable)
if act == 'relu':
return tf.nn.relu_layer(input, W,b)
elif act == 'linear':
return tf.nn.xw_plus_b(input, W, b)
else:
assert (1 == 0)
class AlexNet(object):
def __init__(self, x, num_actions, train_type):
self.x = x
weights_path = 'models/imagenet.npy'
weights = np.load(open(weights_path, "rb"), encoding="latin1").item()
print('Loading imagenet weights for the conv layers and random for fc layers')
train_conv = True
train_fc6 = True
train_fc7 = True
train_fc8 = True
train_fc9 = True
if train_type == 'last4':
train_conv = False
train_fc6 = False
elif train_type == 'last3':
train_conv = False
train_fc6 = False
train_fc7 = False
elif train_type == 'last2':
train_conv = False
train_fc6 = False
train_fc7 = False
train_fc8 = False
self.conv1 = self.conv(self.x, weights["conv1"][0], weights["conv1"][1], k=11, out=96, s=4, p="VALID",trainable=train_conv)
self.maxpool1 = tf.nn.max_pool(self.conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
self.conv2 = self.conv(self.maxpool1, weights["conv2"][0], weights["conv2"][1], k=5, out=256, s=1, p="SAME",trainable=train_conv)
self.maxpool2 = tf.nn.max_pool(self.conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
self.conv3 = self.conv(self.maxpool2, weights["conv3"][0], weights["conv3"][1], k=3, out=384, s=1, p="SAME",trainable=train_conv)
self.conv4 = self.conv(self.conv3, weights["conv4"][0], weights["conv4"][1], k=3, out=384, s=1, p="SAME",trainable=train_conv)
self.conv5 = self.conv(self.conv4, weights["conv5"][0], weights["conv5"][1], k=3, out=256, s=1, p="SAME",trainable=train_conv)
self.maxpool5 = tf.nn.max_pool(self.conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
self.flat = tf.contrib.layers.flatten(self.maxpool5)
self.fc6 = self.FullyConnected(self.flat, units_in=9216, units_out=4096, act='relu', trainable=train_fc6)
self.fc7 = self.FullyConnected(self.fc6, units_in=4096, units_out=2048, act='relu', trainable=train_fc7)
self.fc8 = self.FullyConnected(self.fc7, units_in=2048, units_out=2048, act='relu', trainable=train_fc8)
self.fc9 = self.FullyConnected(self.fc8, units_in=2048, units_out=1024, act='relu', trainable=train_fc9)
self.fc10 = self.FullyConnected(self.fc9, units_in=1024, units_out=num_actions, act='linear', trainable=True)
self.output = self.fc10
print(self.conv1)
print(self.conv2)
print(self.conv3)
print(self.conv4)
print(self.conv5)
print(self.fc6)
print(self.fc7)
print(self.fc8)
print(self.fc9)
print(self.fc10)
def conv(self, input, W, b, k, out, s, p, trainable=True):
assert (W.shape[0] == k)
assert (W.shape[1] == k)
assert (W.shape[3] == out)
conv_kernel_1 = tf.nn.conv2d(input, tf.Variable(W, trainable), [1, s, s, 1], padding=p)
bias_layer_1 = tf.nn.bias_add(conv_kernel_1, tf.Variable(b, trainable))
return tf.nn.relu(bias_layer_1)
def FullyConnected(self, input, units_in, units_out, act, trainable=True):
W = tf.truncated_normal(shape=(units_in, units_out), stddev=0.05)
b = tf.truncated_normal(shape=[units_out], stddev=0.05)
if act == 'relu':
return tf.nn.relu_layer(input, tf.Variable(W, trainable), tf.Variable(b, trainable))
elif act == 'linear':
return tf.nn.xw_plus_b(input, tf.Variable(W, trainable), tf.Variable(b, trainable))
else:
assert (1 == 0)

1938
util/transformations.py Normal file

File diff suppressed because it is too large Load Diff