mirror of
https://github.com/aqeelanwar/DRLwithTL.git
synced 2023-04-23 23:24:24 +03:00
339 lines
12 KiB
Python
339 lines
12 KiB
Python
# Author: Aqeel Anwar(ICSRL)
|
|
# Created: 10/14/2019, 12:50 PM
|
|
# Email: aqeel.anwar@gatech.edu
|
|
import numpy as np
|
|
import os, subprocess, psutil
|
|
import math
|
|
import random
|
|
import time
|
|
import airsim
|
|
import pygame
|
|
from configs.read_cfg import read_cfg
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
def close_env(env_process):
|
|
process = psutil.Process(env_process.pid)
|
|
for proc in process.children(recursive=True):
|
|
proc.kill()
|
|
process.kill()
|
|
|
|
|
|
|
|
def save_network_path(cfg):
|
|
# Save the network to the directory network_path
|
|
weights_type = 'Imagenet'
|
|
if cfg.custom_load == True:
|
|
cfg.network_path = 'models/trained/' + cfg.env_type + '/' + cfg.env_name + '/' + 'CustomLoad/' + cfg.train_type + '/' + cfg.train_type
|
|
else:
|
|
cfg.network_path = 'models/trained/' + '/' + cfg.env_type + '/' + cfg.env_name + '/' + weights_type + '/' + cfg.train_type + '/' + cfg.train_type
|
|
|
|
if not os.path.exists(cfg.network_path):
|
|
os.makedirs(cfg.network_path)
|
|
|
|
return cfg
|
|
|
|
|
|
def start_environment(env_name):
|
|
env_folder = os.path.dirname(os.path.abspath(__file__)) + "/unreal_envs/" + env_name + "/"
|
|
path = env_folder + env_name + ".exe"
|
|
env_process = subprocess.Popen(path)
|
|
time.sleep(5)
|
|
print("Successfully loaded environment: " + env_name)
|
|
|
|
return env_process, env_folder
|
|
|
|
|
|
def initialize_infer(env_cfg, client, env_folder):
|
|
|
|
if not os.path.exists(env_folder+'results'):
|
|
os.makedirs(env_folder+'results')
|
|
|
|
# Mapping floor to 0 height
|
|
c_z = (env_cfg.ceiling_z-env_cfg.floor_z)/100
|
|
p_z = (env_cfg.player_start_z-env_cfg.floor_z)/100
|
|
|
|
plt.ion()
|
|
fig_z = plt.figure()
|
|
ax_z = fig_z.add_subplot(111)
|
|
line_z, = ax_z.plot(0, 0)
|
|
ax_z.set_ylim(0, c_z)
|
|
plt.title("Altitude variation")
|
|
|
|
# start_posit = client.simGetVehiclePose()
|
|
|
|
fig_nav = plt.figure()
|
|
ax_nav = fig_nav.add_subplot(111)
|
|
img = plt.imread(env_folder+ env_cfg.floorplan)
|
|
ax_nav.imshow(img)
|
|
plt.axis('off')
|
|
plt.title("Navigational map")
|
|
plt.plot(env_cfg.o_x, env_cfg.o_y, 'b*', linewidth=20)
|
|
nav, = ax_nav.plot(env_cfg.o_x, env_cfg.o_y)
|
|
|
|
|
|
return p_z, fig_z, ax_z, line_z, fig_nav, ax_nav, nav
|
|
|
|
def translate_action(action, num_actions):
|
|
# action_word = ['Forward', 'Right', 'Left', 'Sharp Right', 'Sharp Left']
|
|
sqrt_num_actions = np.sqrt(num_actions)
|
|
# ind = np.arange(sqrt_num_actions)
|
|
if sqrt_num_actions % 2 == 0:
|
|
v_string = list('U' * int((sqrt_num_actions - 1) / 2) + 'D' * int((sqrt_num_actions - 1) / 2))
|
|
h_string = list('L' * int((sqrt_num_actions - 1) / 2) + 'R' * int((sqrt_num_actions - 1) / 2))
|
|
else:
|
|
v_string = list('U' * int(sqrt_num_actions / 2) + 'F' + 'D' * int(sqrt_num_actions / 2))
|
|
h_string = list('L' * int(sqrt_num_actions / 2) + 'F' + 'R' * int(sqrt_num_actions / 2))
|
|
|
|
v_ind = int(action[0]/sqrt_num_actions)
|
|
h_ind = int(action[0]%sqrt_num_actions)
|
|
action_word = v_string[v_ind] + str(int(np.ceil(abs((sqrt_num_actions-1)/2-v_ind)))) + '-' + h_string[h_ind]+str(int(np.ceil(abs((sqrt_num_actions-1)/2-h_ind))))
|
|
|
|
return action_word
|
|
|
|
def get_errors(data_tuple, choose, ReplayMemory, input_size, agent, target_agent, gamma, Q_clip):
|
|
|
|
_, Q_target, _, err, _ = minibatch_double(data_tuple, len(data_tuple), choose, ReplayMemory, input_size, agent, target_agent, gamma, Q_clip)
|
|
|
|
return err
|
|
|
|
|
|
def minibatch_double(data_tuple, batch_size, choose, ReplayMemory, input_size, agent, target_agent, gamma, Q_clip):
|
|
# Needs NOT to be in DeepAgent
|
|
# NO TD error term, and using huber loss instead
|
|
# Bellman Optimality equation update, with less computation, updated
|
|
|
|
if batch_size==1:
|
|
train_batch = data_tuple
|
|
idx=None
|
|
else:
|
|
batch = ReplayMemory.sample(batch_size)
|
|
train_batch = np.array([b[1][0] for b in batch])
|
|
idx = [b[0] for b in batch]
|
|
|
|
|
|
actions = np.zeros(shape=(batch_size), dtype=int)
|
|
crashes = np.zeros(shape=(batch_size))
|
|
rewards = np.zeros(shape=batch_size)
|
|
curr_states = np.zeros(shape=(batch_size, input_size, input_size, 3))
|
|
new_states = np.zeros(shape=(batch_size, input_size, input_size, 3))
|
|
for ii, m in enumerate(train_batch):
|
|
curr_state_m, action_m, new_state_m, reward_m, crash_m = m
|
|
curr_states[ii, :, :, :] = curr_state_m[...]
|
|
actions[ii] = action_m
|
|
new_states[ii,:,:,:] = new_state_m
|
|
rewards[ii] = reward_m
|
|
crashes[ii] = crash_m
|
|
|
|
#
|
|
# oldQval = np.zeros(shape = [batch_size, num_actions])
|
|
if choose:
|
|
oldQval_A = target_agent.Q_val(curr_states)
|
|
newQval_A = target_agent.Q_val(new_states)
|
|
newQval_B = agent.Q_val(new_states)
|
|
else:
|
|
oldQval_A = agent.Q_val(curr_states)
|
|
newQval_A = agent.Q_val(new_states)
|
|
newQval_B = target_agent.Q_val(new_states)
|
|
|
|
|
|
TD = np.zeros(shape=[batch_size])
|
|
err = np.zeros(shape=[batch_size])
|
|
Q_target = np.zeros(shape=[batch_size])
|
|
|
|
term_ind = np.where(rewards==-1)[0]
|
|
nonterm_ind = np.where(rewards!=-1)[0]
|
|
|
|
TD[nonterm_ind] = rewards[nonterm_ind] + gamma* newQval_B[nonterm_ind, np.argmax(newQval_A[nonterm_ind], axis=1)] - oldQval_A[nonterm_ind, actions[nonterm_ind].astype(int)]
|
|
TD[term_ind] = rewards[term_ind]
|
|
|
|
if Q_clip:
|
|
TD_clip = np.clip(TD, -1, 1)
|
|
else:
|
|
TD_clip = TD
|
|
|
|
Q_target[nonterm_ind] = oldQval_A[nonterm_ind, actions[nonterm_ind].astype(int)] + TD_clip[nonterm_ind]
|
|
Q_target[term_ind] = TD_clip[term_ind]
|
|
|
|
err=abs(TD) # or abs(TD_clip)
|
|
return curr_states, Q_target, actions, err, idx
|
|
|
|
|
|
def policy(epsilon,curr_state, iter, b, epsilon_model, wait_before_train, num_actions, agent):
|
|
qvals=[]
|
|
|
|
epsilon_ceil=0.95
|
|
if epsilon_model=='linear':
|
|
epsilon = epsilon_ceil* (iter-wait_before_train) / (b-wait_before_train)
|
|
if epsilon > epsilon_ceil:
|
|
epsilon = epsilon_ceil
|
|
|
|
elif epsilon_model=='exponential':
|
|
epsilon = 1- math.exp(-2/(b-wait_before_train) * (iter-wait_before_train) )
|
|
if epsilon > epsilon_ceil:
|
|
epsilon = epsilon_ceil
|
|
|
|
if random.random() > epsilon:
|
|
sss =curr_state.shape
|
|
action = np.random.randint(0, num_actions, size = sss[0], dtype=np.int32)
|
|
action_type = 'Rand'
|
|
else:
|
|
# Use NN to predict action
|
|
action = agent.action_selection(curr_state)
|
|
action_type = 'Pred'
|
|
# print(action_array/(np.mean(action_array)))
|
|
return action, action_type, epsilon, qvals
|
|
|
|
def reset_to_initial(level, reset_array, client):
|
|
reset_pos = reset_array[level]
|
|
|
|
client.simSetVehiclePose(reset_pos, ignore_collison=True)
|
|
time.sleep(0.1)
|
|
|
|
|
|
def connect_drone(ip_address='127.0.0.0', phase='infer'):
|
|
print('------------------------------ Drone ------------------------------')
|
|
client = airsim.MultirotorClient(ip=ip_address, timeout_value=10)
|
|
client.confirmConnection()
|
|
old_posit = client.simGetVehiclePose()
|
|
if phase == 'train':
|
|
client.simSetVehiclePose(
|
|
airsim.Pose(airsim.Vector3r(0, 0, 0), old_posit.orientation),
|
|
ignore_collison=True)
|
|
elif phase == 'infer':
|
|
client.enableApiControl(True)
|
|
client.armDisarm(True)
|
|
client.takeoffAsync().join()
|
|
|
|
return client, old_posit
|
|
|
|
def blit_text(surface, text, pos, font, color=pygame.Color('black')):
|
|
words = [word.split(' ') for word in text.splitlines()] # 2D array where each row is a list of words.
|
|
space = font.size(' ')[0] # The width of a space.
|
|
max_width, max_height = surface.get_size()
|
|
x, y = pos
|
|
for line in words:
|
|
for word in line:
|
|
word_surface = font.render(word, 0, color)
|
|
word_width, word_height = word_surface.get_size()
|
|
if x + word_width >= max_width:
|
|
x = pos[0] # Reset the x.
|
|
y += word_height # Start on new row.
|
|
surface.blit(word_surface, (x, y))
|
|
x += word_width + space
|
|
x = pos[0] # Reset the x.
|
|
y += word_height # Start on new row.
|
|
|
|
def pygame_connect(phase):
|
|
pygame.init()
|
|
|
|
if phase == 'train':
|
|
img_path = 'images/train_keys.png'
|
|
elif phase == 'infer':
|
|
img_path = 'images/infer_keys.png'
|
|
img = pygame.image.load(img_path)
|
|
|
|
screen = pygame.display.set_mode(img.get_rect().size)
|
|
|
|
screen.blit(img, (0, 0))
|
|
pygame.display.set_caption('DLwithTL')
|
|
# screen.fill((21, 116, 163))
|
|
# text = 'Supported Keys:\n'
|
|
# font = pygame.font.SysFont('arial', 32)
|
|
# blit_text(screen, text, (20, 20), font, color = (214, 169, 19))
|
|
# pygame.display.update()
|
|
#
|
|
# font = pygame.font.SysFont('arial', 24)
|
|
# text = 'R - Reconnect unreal\nbackspace - Pause/play\nL - Update configurations\nEnter - Save Network'
|
|
# blit_text(screen, text, (20, 70), font, color=(214, 169, 19))
|
|
pygame.display.update()
|
|
|
|
return screen
|
|
|
|
def check_user_input(active, automate, lr, epsilon, agent, network_path, client, old_posit, initZ, phase, fig_z, fig_nav, env_folder):
|
|
for event in pygame.event.get():
|
|
|
|
if event.type == pygame.QUIT:
|
|
active = False
|
|
pygame.quit()
|
|
|
|
# Training keys control
|
|
if event.type == pygame.KEYDOWN and phase =='train':
|
|
if event.key == pygame.K_l:
|
|
# Load the parameters - epsilon
|
|
cfg = read_cfg(config_filename='configs/config.cfg', verbose=False)
|
|
lr = cfg.lr
|
|
print('Updated Parameters')
|
|
print('Learning Rate: ', cfg.lr)
|
|
|
|
if event.key == pygame.K_RETURN:
|
|
# take_action(-1)
|
|
automate = False
|
|
print('Saving Model')
|
|
# agent.save_network(iter, save_path, ' ')
|
|
agent.save_network(network_path)
|
|
# agent.save_data(iter, data_tuple, tuple_path)
|
|
print('Model Saved: ', network_path)
|
|
|
|
|
|
if event.key == pygame.K_BACKSPACE:
|
|
automate = automate ^ True
|
|
|
|
if event.key == pygame.K_r:
|
|
# reconnect
|
|
client = []
|
|
client = airsim.MultirotorClient()
|
|
client.confirmConnection()
|
|
# posit1_old = client.simGetVehiclePose()
|
|
client.simSetVehiclePose(old_posit,
|
|
ignore_collison=True)
|
|
agent.client = client
|
|
|
|
if event.key == pygame.K_m:
|
|
agent.get_state()
|
|
print('got_state')
|
|
# automate = automate ^ True
|
|
|
|
# Set the routine for manual control if not automate
|
|
|
|
if not automate:
|
|
# print('manual')
|
|
# action=[-1]
|
|
if event.key == pygame.K_UP:
|
|
action = 0
|
|
elif event.key == pygame.K_RIGHT:
|
|
action = 1
|
|
elif event.key == pygame.K_LEFT:
|
|
action = 2
|
|
elif event.key == pygame.K_d:
|
|
action = 3
|
|
elif event.key == pygame.K_a:
|
|
action = 4
|
|
elif event.key == pygame.K_DOWN:
|
|
action = -2
|
|
elif event.key == pygame.K_y:
|
|
pos = client.getPosition()
|
|
|
|
client.moveToPosition(pos.x_val, pos.y_val, 3 * initZ, 1)
|
|
time.sleep(0.5)
|
|
elif event.key == pygame.K_h:
|
|
client.reset()
|
|
# agent.take_action(action)
|
|
|
|
elif event.type == pygame.KEYDOWN and phase == 'infer':
|
|
if event.key == pygame.K_s:
|
|
# Save the figures
|
|
file_path = env_folder + 'results/'
|
|
fig_z.savefig(file_path+'altitude_variation.png', dpi=1000)
|
|
fig_nav.savefig(file_path+'navigation.png', dpi=1000)
|
|
print('Figures saved')
|
|
|
|
if event.key == pygame.K_BACKSPACE:
|
|
client.moveByVelocityAsync(vx=0, vy=0, vz=0, duration=0.1)
|
|
automate = automate ^ True
|
|
|
|
return active, automate, lr, client
|
|
|
|
|
|
|