mirror of
https://github.com/aqeelanwar/DRLwithTL.git
synced 2023-04-23 23:24:24 +03:00
Major update
Added inference interactive code
This commit is contained in:
@@ -9,6 +9,7 @@ import time
|
||||
import airsim
|
||||
import pygame
|
||||
from configs.read_cfg import read_cfg
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def save_network_path(cfg):
|
||||
# Save the network to the directory network_path
|
||||
@@ -25,27 +26,51 @@ def save_network_path(cfg):
|
||||
|
||||
|
||||
def start_environment(env_name):
|
||||
path = os.path.dirname(os.path.abspath(__file__)) + "/unreal_envs/" + env_name + "/" + env_name + ".exe"
|
||||
env_folder = os.path.dirname(os.path.abspath(__file__)) + "/unreal_envs/" + env_name + "/"
|
||||
path = env_folder + env_name + ".exe"
|
||||
# print(path)
|
||||
env_process = subprocess.Popen(path)
|
||||
time.sleep(6)
|
||||
print("Successfully loaded environment: " + env_name)
|
||||
|
||||
return env_process
|
||||
return env_process, env_folder
|
||||
|
||||
|
||||
def initialize_infer(env_cfg, client, env_folder):
|
||||
c_z = (env_cfg.ceiling_z - env_cfg.player_start_z) / 100
|
||||
f_z = (env_cfg.floor_z - env_cfg.player_start_z) / 100
|
||||
|
||||
plt.ion()
|
||||
fig_z = plt.figure()
|
||||
ax_z = fig_z.add_subplot(111)
|
||||
line_z, = ax_z.plot(0, 0)
|
||||
ax_z.set_ylim(0, c_z)
|
||||
plt.title("Altitude variation")
|
||||
|
||||
start_posit = client.simGetVehiclePose()
|
||||
|
||||
fig_nav = plt.figure()
|
||||
ax_nav = fig_nav.add_subplot(111)
|
||||
img = plt.imread(env_folder+ "floor.png")
|
||||
ax_nav.imshow(img)
|
||||
plt.axis('off')
|
||||
plt.title("Navigational map")
|
||||
plt.plot(env_cfg.o_x, env_cfg.o_y, 'b*', linewidth=20)
|
||||
nav, = ax_nav.plot(env_cfg.o_x, env_cfg.o_y)
|
||||
|
||||
|
||||
return f_z, fig_z, ax_z, line_z, fig_nav, ax_nav, nav
|
||||
|
||||
def translate_action(action, num_actions):
|
||||
action_word = ['Forward', 'Right', 'Left', 'Sharp Right', 'Sharp Left']
|
||||
# action_word = ['Forward', 'Right', 'Left', 'Sharp Right', 'Sharp Left']
|
||||
sqrt_num_actions = np.sqrt(num_actions)
|
||||
ind = np.arange(sqrt_num_actions)
|
||||
# ind = np.arange(sqrt_num_actions)
|
||||
if sqrt_num_actions % 2 == 0:
|
||||
v_string = list('U'*int(sqrt_num_actions/2) + 'F'+ 'D'*int(sqrt_num_actions/2))
|
||||
h_string = list('L' * int(sqrt_num_actions/2) + 'F' + 'R' * int(sqrt_num_actions/2))
|
||||
v_string = list('U' * int((sqrt_num_actions - 1) / 2) + 'D' * int((sqrt_num_actions - 1) / 2))
|
||||
h_string = list('L' * int((sqrt_num_actions - 1) / 2) + 'R' * int((sqrt_num_actions - 1) / 2))
|
||||
else:
|
||||
v_string = list('U' * int((sqrt_num_actions-1)/2) + 'D' * int((sqrt_num_actions-1)/2))
|
||||
h_string = list('L' * int((sqrt_num_actions-1)/2) + 'R' * int((sqrt_num_actions-1)/2))
|
||||
|
||||
v_string = list('U' * int(sqrt_num_actions / 2) + 'F' + 'D' * int(sqrt_num_actions / 2))
|
||||
h_string = list('L' * int(sqrt_num_actions / 2) + 'F' + 'R' * int(sqrt_num_actions / 2))
|
||||
|
||||
v_ind = int(action[0]/sqrt_num_actions)
|
||||
h_ind = int(action[0]%sqrt_num_actions)
|
||||
@@ -153,14 +178,19 @@ def reset_to_initial(level, reset_array, client):
|
||||
time.sleep(0.1)
|
||||
|
||||
|
||||
def connect_drone(ip_address='127.0.0.0'):
|
||||
def connect_drone(ip_address='127.0.0.0', phase='infer'):
|
||||
print('------------------------------ Drone ------------------------------')
|
||||
client = airsim.MultirotorClient(ip=ip_address, timeout_value=10)
|
||||
client.confirmConnection()
|
||||
old_posit = client.simGetVehiclePose()
|
||||
client.simSetVehiclePose(
|
||||
airsim.Pose(airsim.Vector3r(0, 0, -2.2), old_posit.orientation),
|
||||
ignore_collison=True)
|
||||
if phase == 'train':
|
||||
client.simSetVehiclePose(
|
||||
airsim.Pose(airsim.Vector3r(0, 0, -2.2), old_posit.orientation),
|
||||
ignore_collison=True)
|
||||
elif phase == 'infer':
|
||||
client.enableApiControl(True)
|
||||
client.armDisarm(True)
|
||||
client.takeoffAsync().join()
|
||||
|
||||
return client, old_posit
|
||||
|
||||
@@ -181,13 +211,18 @@ def blit_text(surface, text, pos, font, color=pygame.Color('black')):
|
||||
x = pos[0] # Reset the x.
|
||||
y += word_height # Start on new row.
|
||||
|
||||
def pygame_connect(H=925, W=380):
|
||||
def pygame_connect(phase):
|
||||
pygame.init()
|
||||
screen_width = H
|
||||
screen_height = W
|
||||
screen = pygame.display.set_mode([screen_width, screen_height])
|
||||
carImg = pygame.image.load('images/keys.png')
|
||||
screen.blit(carImg, (0, 0))
|
||||
|
||||
if phase == 'train':
|
||||
img_path = 'images/train_keys.png'
|
||||
elif phase == 'infer':
|
||||
img_path = 'images/infer_keys.png'
|
||||
img = pygame.image.load(img_path)
|
||||
|
||||
screen = pygame.display.set_mode(img.get_rect().size)
|
||||
|
||||
screen.blit(img, (0, 0))
|
||||
pygame.display.set_caption('DLwithTL')
|
||||
# screen.fill((21, 116, 163))
|
||||
# text = 'Supported Keys:\n'
|
||||
@@ -202,14 +237,15 @@ def pygame_connect(H=925, W=380):
|
||||
|
||||
return screen
|
||||
|
||||
def check_user_input(active, automate, lr, epsilon, agent, network_path, client, old_posit, initZ):
|
||||
def check_user_input(active, automate, lr, epsilon, agent, network_path, client, old_posit, initZ, phase, fig_z, fig_nav, env_folder):
|
||||
for event in pygame.event.get():
|
||||
|
||||
if event.type == pygame.QUIT:
|
||||
active = False
|
||||
pygame.quit()
|
||||
|
||||
if event.type == pygame.KEYDOWN:
|
||||
# Training keys control
|
||||
if event.type == pygame.KEYDOWN and phase =='train':
|
||||
if event.key == pygame.K_l:
|
||||
# Load the parameters - epsilon
|
||||
cfg = read_cfg(config_filename='configs/config.cfg', verbose=False)
|
||||
@@ -271,6 +307,18 @@ def check_user_input(active, automate, lr, epsilon, agent, network_path, client,
|
||||
client.reset()
|
||||
# agent.take_action(action)
|
||||
|
||||
elif event.type == pygame.KEYDOWN and phase == 'infer':
|
||||
if event.key == pygame.K_s:
|
||||
# Save the figures
|
||||
file_path = env_folder + 'results/'
|
||||
fig_z.savefig(file_path+'altitude_variation.png', dpi=1000)
|
||||
fig_nav.savefig(file_path+'navigation.png', dpi=1000)
|
||||
print('Figures saved')
|
||||
|
||||
if event.key == pygame.K_BACKSPACE:
|
||||
client.moveByVelocityAsync(vx=0, vy=0, vz=0, duration=0.1)
|
||||
automate = automate ^ True
|
||||
|
||||
return active, automate, lr, client
|
||||
|
||||
|
||||
|
||||
@@ -3,7 +3,9 @@ run_name: Tello_indoor
|
||||
custom_load: True
|
||||
custom_load_path: models/trained/Indoor/indoor_condo/Imagenet/e2e/e2e
|
||||
env_type: Indoor
|
||||
env_name: indoor_condo
|
||||
env_name: indoor_cloud
|
||||
phase: infer
|
||||
|
||||
|
||||
[simulation_params]
|
||||
ip_address: 127.0.0.1
|
||||
@@ -14,11 +16,11 @@ load_data_path: DeepNet/models/Tello_indoor/VanLeer/
|
||||
input_size: 227
|
||||
num_actions: 25
|
||||
train_type: e2e
|
||||
wait_before_train: 40000
|
||||
wait_before_train: 30000
|
||||
max_iters: 300000
|
||||
buffer_len: 60000
|
||||
batch_size: 32
|
||||
epsilon_saturation: 1
|
||||
epsilon_saturation: 200000
|
||||
crash_thresh: 1.3
|
||||
Q_clip: True
|
||||
train_interval: 3
|
||||
@@ -26,5 +28,5 @@ update_target_interval: 80000
|
||||
gamma: 0.99
|
||||
dropout_rate: 0.1
|
||||
learning_rate: 1e-6
|
||||
switch_env_steps: 3000000
|
||||
switch_env_steps: 3000
|
||||
epsilon_model: exponential
|
||||
|
||||
@@ -5,6 +5,25 @@
|
||||
import configparser as cp
|
||||
from dotmap import DotMap
|
||||
|
||||
def read_env_cfg(config_filename = 'configs/main.cfg'):
|
||||
# Load from config file
|
||||
cfg = DotMap()
|
||||
|
||||
config = cp.ConfigParser()
|
||||
config.read(config_filename)
|
||||
|
||||
cfg.run_name = config.get('general_params', 'env_name')
|
||||
cfg.custom_load_path = str(config.get('general_params', 'floorplan'))
|
||||
cfg.o_x = float(config.get('general_params', 'o_x').split(',')[0])
|
||||
cfg.o_y = float(config.get('general_params', 'o_y').split(',')[0])
|
||||
cfg.alpha = float(config.get('general_params', 'alpha').split(',')[0])
|
||||
cfg.ceiling_z = float(config.get('general_params', 'ceiling_z').split(',')[0])
|
||||
cfg.floor_z = float(config.get('general_params', 'floor_z').split(',')[0])
|
||||
cfg.player_start_z = float(config.get('general_params', 'player_start_z').split(',')[0])
|
||||
|
||||
return cfg
|
||||
|
||||
|
||||
def read_cfg(config_filename = 'configs/main.cfg', verbose = False):
|
||||
# Load from config file
|
||||
cfg = DotMap()
|
||||
@@ -20,6 +39,7 @@ def read_cfg(config_filename = 'configs/main.cfg', verbose = False):
|
||||
cfg.custom_load_path = str(config.get('general_params', 'custom_load_path'))
|
||||
cfg.env_type = config.get('general_params', 'env_type')
|
||||
cfg.env_name = config.get('general_params', 'env_name')
|
||||
cfg.phase = config.get('general_params', 'phase')
|
||||
|
||||
# [Simulation Parameters]
|
||||
if str(config.get('simulation_params', 'load_data')) =='True':
|
||||
|
||||
@@ -83,7 +83,7 @@ def indoor_complex():
|
||||
return orig_ip, level_name, crash_threshold, initZ
|
||||
|
||||
# Test condo indoor initial positions
|
||||
def indoor_condo():
|
||||
def indoor_cloud():
|
||||
# The environment can be downloaded from
|
||||
# https://drive.google.com/drive/u/2/folders/1u5teth6l4JW2IXAkZAg1CbDGR6zE-v6Z
|
||||
orig_ip = [
|
||||
|
||||
BIN
images/infer_keys.PNG
Normal file
BIN
images/infer_keys.PNG
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 18 KiB |
|
Before Width: | Height: | Size: 23 KiB After Width: | Height: | Size: 23 KiB |
332
main.py
332
main.py
@@ -2,13 +2,14 @@
|
||||
import sys
|
||||
from network.agent import DeepAgent
|
||||
from environments.initial_positions import *
|
||||
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import seaborn as sns
|
||||
import psutil
|
||||
from os import getpid
|
||||
from network.Memory import Memory
|
||||
from aux_functions import *
|
||||
from configs.read_cfg import read_cfg
|
||||
from configs.read_cfg import read_cfg, read_env_cfg
|
||||
|
||||
# TF Debug message suppressed
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
@@ -20,10 +21,10 @@ process = psutil.Process(getpid())
|
||||
cfg = read_cfg(config_filename='configs/config.cfg', verbose=True)
|
||||
|
||||
# Start the environment
|
||||
env_process = start_environment(env_name=cfg.env_name)
|
||||
env_process, env_folder = start_environment(env_name=cfg.env_name)
|
||||
|
||||
# Load PyGame Screen
|
||||
screen = pygame_connect()
|
||||
screen = pygame_connect(phase = cfg.phase)
|
||||
|
||||
# Load the initial positions for the environment
|
||||
reset_array, level_name, crash_threshold, initZ = initial_positions(cfg.env_name)
|
||||
@@ -35,11 +36,23 @@ cfg = save_network_path(cfg=cfg)
|
||||
ReplayMemory = Memory(cfg.buffer_len)
|
||||
|
||||
# Connect to Unreal Engine and get the drone handle: client
|
||||
client, old_posit = connect_drone(ip_address=cfg.ip_address)
|
||||
client, old_posit = connect_drone(ip_address=cfg.ip_address, phase=cfg.phase)
|
||||
|
||||
# Define DQN agents
|
||||
fig_z=[]
|
||||
fig_nav=[]
|
||||
agent = DeepAgent(cfg, client, name='DQN')
|
||||
target_agent = DeepAgent(cfg, client, name='Target')
|
||||
if cfg.phase=='train':
|
||||
target_agent = DeepAgent(cfg, client, name='Target')
|
||||
|
||||
elif cfg.phase == 'infer':
|
||||
env_cfg = read_env_cfg(config_filename=env_folder+'config.cfg')
|
||||
nav_x=[]
|
||||
nav_y=[]
|
||||
altitude=[]
|
||||
f_z, fig_z, ax_z, line_z, fig_nav, ax_nav, nav = initialize_infer(env_cfg=env_cfg, client=client, env_folder=env_folder)
|
||||
nav_text = ax_nav.text(0, 0, '')
|
||||
|
||||
|
||||
# Initialize variables
|
||||
iter = 0
|
||||
@@ -75,151 +88,194 @@ f = open(log_path, 'w')
|
||||
|
||||
while active:
|
||||
try:
|
||||
|
||||
active, automate, cfg.lr, client = check_user_input(active, automate, cfg.lr, cfg.epsilon, agent, cfg.network_path, client, old_posit, initZ)
|
||||
active, automate, cfg.lr, client = check_user_input(active, automate, cfg.lr, cfg.epsilon, agent, cfg.network_path, client, old_posit, initZ, cfg.phase, fig_z, fig_nav, env_folder)
|
||||
|
||||
if automate:
|
||||
start_time = time.time()
|
||||
if switch_env:
|
||||
posit1_old = client.simGetVehiclePose()
|
||||
times_switch=times_switch+1
|
||||
level_state[level] = current_state
|
||||
level_posit[level] = posit1_old
|
||||
last_crash_array[level] = last_crash
|
||||
ret_array[level] = ret
|
||||
distance_array[level] = distance
|
||||
epi_env_array[int(level/3)] = episode
|
||||
|
||||
level = (level + 1) % len(reset_array)
|
||||
if cfg.phase == 'train':
|
||||
|
||||
print('Transferring to level: ', level ,' - ', level_name[level])
|
||||
start_time = time.time()
|
||||
if switch_env:
|
||||
posit1_old = client.simGetVehiclePose()
|
||||
times_switch=times_switch+1
|
||||
level_state[level] = current_state
|
||||
level_posit[level] = posit1_old
|
||||
last_crash_array[level] = last_crash
|
||||
ret_array[level] = ret
|
||||
distance_array[level] = distance
|
||||
epi_env_array[int(level/3)] = episode
|
||||
|
||||
if times_switch < len(reset_array):
|
||||
reset_to_initial(level, reset_array, client)
|
||||
else:
|
||||
current_state = level_state[level]
|
||||
posit1_old = level_posit[level]
|
||||
level = (level + 1) % len(reset_array)
|
||||
|
||||
reset_to_initial(level, reset_array, client)
|
||||
client.simSetVehiclePose(posit1_old, ignore_collison=True)
|
||||
time.sleep(0.1)
|
||||
print('Transferring to level: ', level ,' - ', level_name[level])
|
||||
|
||||
last_crash = last_crash_array[level]
|
||||
ret = ret_array[level]
|
||||
distance = distance_array[level]
|
||||
episode = epi_env_array[int(level/3)]
|
||||
xxx = client.simGetVehiclePose()
|
||||
# environ = environ^True
|
||||
|
||||
|
||||
action1, action_type, cfg.epsilon, qvals = policy(cfg.epsilon, current_state, iter, cfg.epsilon_saturation, cfg.epsilon_model, cfg.wait_before_train, cfg.num_actions, agent)
|
||||
|
||||
action_word1 = translate_action(action1, cfg.num_actions)
|
||||
# Take the action
|
||||
agent.take_action(action1, cfg.num_actions)
|
||||
time.sleep(0.05)
|
||||
|
||||
posit = client.simGetVehiclePose()
|
||||
|
||||
new_state = agent.get_state()
|
||||
new_depth1, thresh = agent.get_depth()
|
||||
|
||||
# Get GPS information
|
||||
posit = client.simGetVehiclePose()
|
||||
orientation = posit.orientation
|
||||
position = posit.position
|
||||
old_p = np.array([old_posit.position.x_val, old_posit.position.y_val])
|
||||
new_p = np.array([position.x_val, position.y_val])
|
||||
# calculate distance
|
||||
distance = distance + np.linalg.norm(new_p - old_p)
|
||||
old_posit = posit
|
||||
|
||||
reward, crash = agent.reward_gen(new_depth1, action1, crash_threshold, thresh)
|
||||
|
||||
ret = ret+reward
|
||||
agent_state1 = agent.GetAgentState()
|
||||
|
||||
if agent_state1.has_collided:
|
||||
num_collisions = num_collisions + 1
|
||||
print('crash')
|
||||
crash = True
|
||||
reward = -1
|
||||
data_tuple=[]
|
||||
data_tuple.append([current_state, action1, new_state, reward, crash])
|
||||
err = get_errors(data_tuple, choose, ReplayMemory, cfg.input_size, agent, target_agent, cfg.gamma, cfg.Q_clip)
|
||||
ReplayMemory.add(err, data_tuple)
|
||||
|
||||
# Train if sufficient frames have been stored
|
||||
if iter > cfg.wait_before_train:
|
||||
if iter%cfg.train_interval==0:
|
||||
# Train the RL network
|
||||
old_states, Qvals, actions, err, idx = minibatch_double(data_tuple, cfg.batch_size, choose, ReplayMemory, cfg.input_size, agent, target_agent, cfg.gamma, cfg.Q_clip)
|
||||
|
||||
for i in range(cfg.batch_size):
|
||||
ReplayMemory.update(idx[i], err[i])
|
||||
|
||||
if print_qval:
|
||||
print(Qvals)
|
||||
|
||||
if choose:
|
||||
# Double-DQN
|
||||
target_agent.train_n(old_states, Qvals, actions, cfg.batch_size, cfg.dropout_rate, cfg.lr, cfg.epsilon, iter)
|
||||
if times_switch < len(reset_array):
|
||||
reset_to_initial(level, reset_array, client)
|
||||
else:
|
||||
agent.train_n(old_states, Qvals,actions, cfg.batch_size, cfg.dropout_rate, cfg.lr, cfg.epsilon, iter)
|
||||
current_state = level_state[level]
|
||||
posit1_old = level_posit[level]
|
||||
|
||||
if iter % cfg.update_target_interval == 0:
|
||||
agent.take_action([-1], cfg.num_actions)
|
||||
print('Switching Target Network')
|
||||
choose = not choose
|
||||
agent.save_network(cfg.network_path)
|
||||
reset_to_initial(level, reset_array, client)
|
||||
client.simSetVehiclePose(posit1_old, ignore_collison=True)
|
||||
time.sleep(0.1)
|
||||
|
||||
iter += 1
|
||||
|
||||
time_exec = time.time()-start_time
|
||||
|
||||
mem_percent = process.memory_info()[0]/2.**30
|
||||
|
||||
s_log = 'Level :{:>2d}: Iter: {:>6d}/{:<5d} {:<8s}-{:>5s} Eps: {:<1.4f} lr: {:>1.8f} Ret = {:<+6.4f} Last Crash = {:<5d} t={:<1.3f} Mem = {:<5.4f} Reward: {:<+1.4f} '.format(
|
||||
int(level),iter, episode,
|
||||
action_word1,
|
||||
action_type,
|
||||
cfg.epsilon,
|
||||
cfg.lr,
|
||||
ret,
|
||||
last_crash,
|
||||
time_exec,
|
||||
mem_percent,
|
||||
reward)
|
||||
|
||||
print(s_log)
|
||||
f.write(s_log+'\n')
|
||||
|
||||
last_crash=last_crash+1
|
||||
|
||||
if crash:
|
||||
agent.return_plot(ret, episode, int(level/3), mem_percent, iter, distance)
|
||||
ret=0
|
||||
distance=0
|
||||
episode = episode + 1
|
||||
last_crash=0
|
||||
|
||||
reset_to_initial(level, reset_array, client)
|
||||
time.sleep(0.2)
|
||||
current_state =agent.get_state()
|
||||
else:
|
||||
current_state = new_state
|
||||
last_crash = last_crash_array[level]
|
||||
ret = ret_array[level]
|
||||
distance = distance_array[level]
|
||||
episode = epi_env_array[int(level/3)]
|
||||
xxx = client.simGetVehiclePose()
|
||||
# environ = environ^True
|
||||
|
||||
|
||||
if iter%cfg.switch_env_steps==0:
|
||||
switch_env=True
|
||||
else:
|
||||
switch_env=False
|
||||
action, action_type, cfg.epsilon, qvals = policy(cfg.epsilon, current_state, iter, cfg.epsilon_saturation, cfg.epsilon_model, cfg.wait_before_train, cfg.num_actions, agent)
|
||||
|
||||
if iter%cfg.max_iters==0:
|
||||
automate=False
|
||||
action_word = translate_action(action, cfg.num_actions)
|
||||
# Take the action
|
||||
agent.take_action(action, cfg.num_actions, phase=cfg.phase)
|
||||
time.sleep(0.05)
|
||||
|
||||
# if iter >140:
|
||||
# active=False
|
||||
posit = client.simGetVehiclePose()
|
||||
|
||||
new_state = agent.get_state()
|
||||
new_depth1, thresh = agent.get_depth()
|
||||
|
||||
# Get GPS information
|
||||
posit = client.simGetVehiclePose()
|
||||
orientation = posit.orientation
|
||||
position = posit.position
|
||||
old_p = np.array([old_posit.position.x_val, old_posit.position.y_val])
|
||||
new_p = np.array([position.x_val, position.y_val])
|
||||
# calculate distance
|
||||
distance = distance + np.linalg.norm(new_p - old_p)
|
||||
old_posit = posit
|
||||
|
||||
reward, crash = agent.reward_gen(new_depth1, action, crash_threshold, thresh)
|
||||
|
||||
ret = ret+reward
|
||||
agent_state1 = agent.GetAgentState()
|
||||
|
||||
if agent_state1.has_collided:
|
||||
num_collisions = num_collisions + 1
|
||||
print('crash')
|
||||
crash = True
|
||||
reward = -1
|
||||
data_tuple=[]
|
||||
data_tuple.append([current_state, action, new_state, reward, crash])
|
||||
err = get_errors(data_tuple, choose, ReplayMemory, cfg.input_size, agent, target_agent, cfg.gamma, cfg.Q_clip)
|
||||
ReplayMemory.add(err, data_tuple)
|
||||
|
||||
# Train if sufficient frames have been stored
|
||||
if iter > cfg.wait_before_train:
|
||||
if iter%cfg.train_interval==0:
|
||||
# Train the RL network
|
||||
old_states, Qvals, actions, err, idx = minibatch_double(data_tuple, cfg.batch_size, choose, ReplayMemory, cfg.input_size, agent, target_agent, cfg.gamma, cfg.Q_clip)
|
||||
|
||||
for i in range(cfg.batch_size):
|
||||
ReplayMemory.update(idx[i], err[i])
|
||||
|
||||
if print_qval:
|
||||
print(Qvals)
|
||||
|
||||
if choose:
|
||||
# Double-DQN
|
||||
target_agent.train_n(old_states, Qvals, actions, cfg.batch_size, cfg.dropout_rate, cfg.lr, cfg.epsilon, iter)
|
||||
else:
|
||||
agent.train_n(old_states, Qvals,actions, cfg.batch_size, cfg.dropout_rate, cfg.lr, cfg.epsilon, iter)
|
||||
|
||||
if iter % cfg.update_target_interval == 0:
|
||||
agent.take_action([-1], cfg.num_actions, phase=cfg.phase)
|
||||
print('Switching Target Network')
|
||||
choose = not choose
|
||||
agent.save_network(cfg.network_path)
|
||||
|
||||
iter += 1
|
||||
|
||||
time_exec = time.time()-start_time
|
||||
|
||||
mem_percent = process.memory_info()[0]/2.**30
|
||||
|
||||
s_log = 'Level :{:>2d}: Iter: {:>6d}/{:<5d} {:<8s}-{:>5s} Eps: {:<1.4f} lr: {:>1.8f} Ret = {:<+6.4f} Last Crash = {:<5d} t={:<1.3f} Mem = {:<5.4f} Reward: {:<+1.4f} '.format(
|
||||
int(level),iter, episode,
|
||||
action_word,
|
||||
action_type,
|
||||
cfg.epsilon,
|
||||
cfg.lr,
|
||||
ret,
|
||||
last_crash,
|
||||
time_exec,
|
||||
mem_percent,
|
||||
reward)
|
||||
|
||||
print(s_log)
|
||||
f.write(s_log+'\n')
|
||||
|
||||
last_crash=last_crash+1
|
||||
|
||||
if crash:
|
||||
agent.return_plot(ret, episode, int(level/3), mem_percent, iter, distance)
|
||||
ret=0
|
||||
distance=0
|
||||
episode = episode + 1
|
||||
last_crash=0
|
||||
|
||||
reset_to_initial(level, reset_array, client)
|
||||
time.sleep(0.2)
|
||||
current_state =agent.get_state()
|
||||
else:
|
||||
current_state = new_state
|
||||
|
||||
|
||||
if iter%cfg.switch_env_steps==0:
|
||||
switch_env=True
|
||||
else:
|
||||
switch_env=False
|
||||
|
||||
if iter%cfg.max_iters==0:
|
||||
automate=False
|
||||
|
||||
# if iter >140:
|
||||
# active=False
|
||||
elif cfg.phase == 'infer':
|
||||
# Inference phase
|
||||
agent_state = agent.GetAgentState()
|
||||
if agent_state.has_collided:
|
||||
print('Drone collided')
|
||||
print("Total distance traveled: ", np.round(distance, 2))
|
||||
active = False
|
||||
client.moveByVelocityAsync(vx=0, vy=0, vz=0, duration=1).join()
|
||||
ax_nav.plot(nav_x.pop(), nav_y.pop(), 'r*', linewidth=20)
|
||||
file_path = env_folder + 'results/'
|
||||
fig_z.savefig(file_path + 'altitude_variation.png', dpi=1000)
|
||||
fig_nav.savefig(file_path + 'navigation.png', dpi=1000)
|
||||
|
||||
print('Figures saved')
|
||||
else:
|
||||
posit = agent.client.simGetVehiclePose()
|
||||
distance = distance + np.linalg.norm(np.array([old_posit.position.x_val-posit.position.x_val,old_posit.position.y_val-posit.position.y_val]))
|
||||
altitude.append(-posit.position.z_val-f_z)
|
||||
|
||||
x_val = posit.position.x_val
|
||||
y_val = posit.position.y_val
|
||||
|
||||
nav_x.append(env_cfg.alpha*x_val+env_cfg.o_x)
|
||||
nav_y.append(env_cfg.alpha*y_val+env_cfg.o_y)
|
||||
nav.set_data(nav_x, nav_y)
|
||||
nav_text.remove()
|
||||
nav_text = ax_nav.text(25, 55, 'Distance: '+str(np.round(distance, 2)), style='italic',
|
||||
bbox={'facecolor': 'white', 'alpha': 0.5})
|
||||
|
||||
line_z.set_data(np.arange(len(altitude)), altitude)
|
||||
ax_z.set_xlim(0, len(altitude))
|
||||
fig_z.canvas.draw()
|
||||
fig_z.canvas.flush_events()
|
||||
|
||||
current_state = agent.get_state()
|
||||
action, action_type, cfg.epsilon, qvals = policy(1, current_state, iter,
|
||||
cfg.epsilon_saturation, 'inference',
|
||||
cfg.wait_before_train, cfg.num_actions, agent)
|
||||
# Take continuous action
|
||||
agent.take_action(action, cfg.num_actions, phase=cfg.phase)
|
||||
old_posit=posit
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -124,7 +124,7 @@ class DeepAgent():
|
||||
# self.action_array[action[0].astype(int)]+=1
|
||||
return action.astype(int)
|
||||
|
||||
def take_action(self, action, num_actions):
|
||||
def take_action(self, action, num_actions, phase):
|
||||
# Set Paramaters
|
||||
fov_v = 45 * np.pi / 180
|
||||
fov_h = 80 * np.pi / 180
|
||||
@@ -133,7 +133,6 @@ class DeepAgent():
|
||||
ignore_collision = False
|
||||
sqrt_num_actions = np.sqrt(num_actions)
|
||||
|
||||
|
||||
posit = self.client.simGetVehiclePose()
|
||||
pos = posit.position
|
||||
orientation = posit.orientation
|
||||
@@ -148,25 +147,42 @@ class DeepAgent():
|
||||
theta = fov_v/sqrt_num_actions * (theta_ind - (sqrt_num_actions - 1) / 2)
|
||||
psi = fov_h / sqrt_num_actions * (psi_ind - (sqrt_num_actions - 1) / 2)
|
||||
|
||||
noise_theta = (fov_v / sqrt_num_actions) / 6
|
||||
noise_psi = (fov_h / sqrt_num_actions) / 6
|
||||
|
||||
psi = psi + random.uniform(-1, 1)*noise_psi
|
||||
theta = theta + random.uniform(-1, 1)*noise_theta
|
||||
|
||||
# print('Theta: ', theta * 180 / np.pi, end='')
|
||||
# print(' Psi: ', psi * 180 / np.pi)
|
||||
|
||||
|
||||
if phase == 'train':
|
||||
noise_theta = (fov_v / sqrt_num_actions) / 6
|
||||
noise_psi = (fov_h / sqrt_num_actions) / 6
|
||||
|
||||
psi = psi + random.uniform(-1, 1) * noise_psi
|
||||
theta = theta + random.uniform(-1, 1) * noise_theta
|
||||
|
||||
x = pos.x_val + r * np.cos(alpha + psi)
|
||||
y = pos.y_val + r * np.sin(alpha + psi)
|
||||
z = pos.z_val + r * np.sin(theta) # -ve because Unreal has -ve z direction going upwards
|
||||
x = pos.x_val + r * np.cos(alpha + psi)
|
||||
y = pos.y_val + r * np.sin(alpha + psi)
|
||||
z = pos.z_val + r * np.sin(theta) # -ve because Unreal has -ve z direction going upwards
|
||||
|
||||
self.client.simSetVehiclePose(airsim.Pose(airsim.Vector3r(x, y, z), airsim.to_quaternion(0, 0, alpha + psi)),
|
||||
ignore_collison=ignore_collision)
|
||||
self.client.simSetVehiclePose(airsim.Pose(airsim.Vector3r(x, y, z), airsim.to_quaternion(0, 0, alpha + psi)),
|
||||
ignore_collison=ignore_collision)
|
||||
elif phase == 'infer':
|
||||
r_infer=0.5
|
||||
vx = r_infer * np.cos(alpha + psi)
|
||||
vy = r_infer * np.sin(alpha + psi)
|
||||
vz = r_infer * np.sin(theta)
|
||||
# TODO
|
||||
# Take average of previous velocities and current to smoothen out drone movement.
|
||||
self.client.moveByVelocityAsync(vx=vx, vy=vy, vz=vz, duration=1,
|
||||
drivetrain=airsim.DrivetrainType.MaxDegreeOfFreedom,
|
||||
yaw_mode=airsim.YawMode(is_rate=False,
|
||||
yaw_or_rate=180 * (alpha + psi) / np.pi))
|
||||
# self.client.moveByVelocityAsync(vx=0, vy=0, vz=0, duration=0.01).join()
|
||||
# print("")
|
||||
# print("Throttle:", throttle)
|
||||
# print('Yaw:', yaw)
|
||||
|
||||
# self.client.moveByAngleThrottleAsync(pitch=-0.015, roll=0, throttle=throttle, yaw_rate=yaw, duration=0.2).join()
|
||||
# self.client.moveByVelocityAsync(vx=0, vy=0, vz=0, duration=0.005)
|
||||
def get_depth(self):
|
||||
responses = self.client.simGetImages([airsim.ImageRequest(2, airsim.ImageType.DepthVis, False, False)])
|
||||
depth = []
|
||||
|
||||
171
scratch.py
171
scratch.py
@@ -1,10 +1,169 @@
|
||||
# Author: Aqeel Anwar(ICSRL)
|
||||
# Created: 11/8/2019, 4:02 PM
|
||||
# Email: aqeel.anwar@gatech.edu
|
||||
# # # Author: Aqeel Anwar(ICSRL)
|
||||
# # # Created: 11/8/2019, 4:02 PM
|
||||
# # # Email: aqeel.anwar@gatech.edu
|
||||
# #
|
||||
# #
|
||||
# # # import airsim
|
||||
# import os
|
||||
# import time
|
||||
# from aux_functions import *
|
||||
# from util.transformations import euler_from_quaternion
|
||||
# import airsim
|
||||
#
|
||||
# # Connect to Unreal Engine and get the drone handle: client
|
||||
# client, old_posit = connect_drone(ip_address='127.0.0.1', phase='infer')
|
||||
#
|
||||
#
|
||||
# # Async methods returns Future. Call join() to wait for task to complete.
|
||||
# # client.takeoffAsync().join()
|
||||
# pos = client.simGetVehiclePose()
|
||||
# # print(pos)
|
||||
# num_actions = 25
|
||||
# action = [20]
|
||||
# fov_v = 45 * np.pi / 180
|
||||
# fov_h = 80 * np.pi / 180
|
||||
# r = 0.4
|
||||
#
|
||||
# sqrt_num_actions = np.sqrt(num_actions)
|
||||
#
|
||||
#
|
||||
# for i in range(5):
|
||||
#
|
||||
# client.moveByVelocityAsync(vx=0, vy=0, vz=-1, duration=1).join()
|
||||
# posit = client.simGetVehiclePose()
|
||||
# pos = posit.position
|
||||
# orientation = posit.orientation
|
||||
# print("Z", pos.z_val)
|
||||
# cc=1
|
||||
|
||||
#
|
||||
|
||||
# quat = (orientation.w_val, orientation.x_val, orientation.y_val, orientation.z_val)
|
||||
# eulers = euler_from_quaternion(quat)
|
||||
# alpha = eulers[2]
|
||||
#
|
||||
# theta_ind = int(action[0] / sqrt_num_actions)
|
||||
# psi_ind = action[0] % sqrt_num_actions
|
||||
#
|
||||
# theta = fov_v/sqrt_num_actions * (theta_ind - (sqrt_num_actions - 1) / 2)
|
||||
# psi = fov_h / sqrt_num_actions * (psi_ind - (sqrt_num_actions - 1) / 2)
|
||||
#
|
||||
# vx = r * np.cos(alpha + psi)
|
||||
# vy = r * np.sin(alpha + psi)
|
||||
# vz = r * np.sin(theta)
|
||||
# print("ang:", 180*(alpha+psi)/np.pi)
|
||||
# client.moveByVelocityAsync(vx=vx, vy=vy, vz=vz, duration=3, drivetrain=airsim.DrivetrainType.MaxDegreeOfFreedom, yaw_mode=airsim.YawMode(is_rate=False, yaw_or_rate=180*(alpha+psi)/np.pi)).join()
|
||||
# client.moveByVelocityAsync(vx=0,vy=0,vz=0, duration=0.1).join()
|
||||
#
|
||||
# posit = client.simGetVehiclePose()
|
||||
# pos = posit.position
|
||||
# orientation = posit.orientation
|
||||
#
|
||||
# quat = (orientation.w_val, orientation.x_val, orientation.y_val, orientation.z_val)
|
||||
# eulers = euler_from_quaternion(quat)
|
||||
# alpha = eulers[2]
|
||||
# print(eulers)
|
||||
#
|
||||
#
|
||||
# # time.sleep(5)
|
||||
# # client.moveToPositionAsync(-10, 10, -10, 5).join()
|
||||
# print("Done")
|
||||
# #
|
||||
#
|
||||
#
|
||||
# def action_take(action):
|
||||
# action = [22]
|
||||
#
|
||||
# fov_v = 45 * np.pi / 180
|
||||
# fov_h = 80 * np.pi / 180
|
||||
#
|
||||
# sqrt_num_actions = 5
|
||||
# theta_ind = int(action[0] / sqrt_num_actions)
|
||||
# psi_ind = action[0] % sqrt_num_actions
|
||||
#
|
||||
# theta = fov_v / sqrt_num_actions * (theta_ind - (sqrt_num_actions - 1) / 2)
|
||||
# psi = fov_h / sqrt_num_actions * (psi_ind - (sqrt_num_actions - 1) / 2)
|
||||
#
|
||||
# yaw = psi
|
||||
# throttle = 0.596 + np.sin(theta)
|
||||
#
|
||||
# print("")
|
||||
# print("Throttle:", throttle)
|
||||
# print('Yaw:', yaw)
|
||||
#
|
||||
# return throttle, yaw
|
||||
|
||||
|
||||
import airsim
|
||||
from aux_functions import *
|
||||
import numpy as np
|
||||
# import seaborn as sns
|
||||
# import matplotlib.pyplot as plt
|
||||
# import time, random
|
||||
# # x, y, z = np.random.rand(3, 100)
|
||||
# plt.style.use('seaborn-pastel')
|
||||
# cmap = sns.cubehelix_palette(as_cmap=True)
|
||||
# plt.ion()
|
||||
# fig = plt.figure()
|
||||
# ax = fig.add_subplot(111)
|
||||
# x=[]
|
||||
# y=[]
|
||||
# points, = ax.plot(x, y)
|
||||
# # ax.set_ylim([0, 5])
|
||||
# # ax.set_xlim([0, 10])
|
||||
# img = plt.imread("unreal_envs/indoor_condo/floor.png")
|
||||
# ax.imshow(img)
|
||||
# plt.axis('off')
|
||||
# plt.title("Navigational map")
|
||||
# o_x = 1318.49
|
||||
# o_y = 769.635
|
||||
# # a = 10
|
||||
# # b = [3,5]
|
||||
# # plt.axis('off')
|
||||
# for i in range(10):
|
||||
# x.append(i)
|
||||
# y.append(random.randint(1,5))
|
||||
# points.set_data(x,y)
|
||||
# fig.canvas.draw()
|
||||
# fig.canvas.flush_events()
|
||||
# time.sleep(1)
|
||||
|
||||
|
||||
client, old_posit = connect_drone()
|
||||
import matplotlib.pyplot as plt
|
||||
import time, random
|
||||
|
||||
altitude=[]
|
||||
plt.ion()
|
||||
fig = plt.figure()
|
||||
ax1 = fig.add_subplot(111)
|
||||
line1, = ax1.plot(0, 0)
|
||||
plt.title("Altitude variation")
|
||||
|
||||
nav_x=[]
|
||||
nav_y=[]
|
||||
o_x = 1318.49
|
||||
o_y = 769.635
|
||||
alpha = 36.4
|
||||
fig2 = plt.figure()
|
||||
ax2 = fig2.add_subplot(111)
|
||||
img = plt.imread("unreal_envs/indoor_cloud/floor.png")
|
||||
ax2.imshow(img)
|
||||
plt.axis('off')
|
||||
plt.title("Navigational map")
|
||||
plt.plot(o_x, o_y, 'r*', linewidth=5)
|
||||
nav, = ax2.plot(o_x, o_y)
|
||||
|
||||
|
||||
|
||||
for i in range(100):
|
||||
nav_text = ax2.text(25, 55, 'Distance: ' + str(np.round(1.2333, 2)), style='italic',
|
||||
bbox={'facecolor': 'white', 'alpha': 0.5})
|
||||
nav_x.append(alpha * random.randint(0,5) + o_x)
|
||||
nav_y.append(alpha * random.randint(0,5) + o_y)
|
||||
nav.set_data(nav_x, nav_y)
|
||||
# line1.set_ydata(altitude)
|
||||
# print("alt: ", altitude)
|
||||
altitude.append(random.randint(0,5))
|
||||
line1.set_data(altitude, altitude)
|
||||
fig.canvas.draw()
|
||||
fig.canvas.flush_events()
|
||||
|
||||
time.sleep(0.2)
|
||||
Reference in New Issue
Block a user