Major update

Added inference interactive code
2023-04-23 23:24:24 +03:00 · 2020-02-04 17:52:15 -05:00
parent 1f4e4fe636
commit aaa3f1c23f
9 changed files with 483 additions and 182 deletions
--- a/aux_functions.py
+++ b/aux_functions.py
@@ -9,6 +9,7 @@ import time
 import airsim
 import pygame
 from configs.read_cfg import read_cfg
+import matplotlib.pyplot as plt

 def save_network_path(cfg):
    # Save the network to the directory network_path
@@ -25,27 +26,51 @@ def save_network_path(cfg):


 def start_environment(env_name):
-    path = os.path.dirname(os.path.abspath(__file__)) + "/unreal_envs/" + env_name + "/" + env_name + ".exe"
+    env_folder = os.path.dirname(os.path.abspath(__file__)) + "/unreal_envs/" + env_name + "/"
+    path = env_folder + env_name + ".exe"
    # print(path)
    env_process = subprocess.Popen(path)
    time.sleep(6)
    print("Successfully loaded environment: " + env_name)

-    return env_process
+    return env_process, env_folder


+def initialize_infer(env_cfg, client, env_folder):
+    c_z = (env_cfg.ceiling_z - env_cfg.player_start_z) / 100
+    f_z = (env_cfg.floor_z - env_cfg.player_start_z) / 100
+
+    plt.ion()
+    fig_z = plt.figure()
+    ax_z = fig_z.add_subplot(111)
+    line_z, = ax_z.plot(0, 0)
+    ax_z.set_ylim(0, c_z)
+    plt.title("Altitude variation")
+
+    start_posit = client.simGetVehiclePose()
+
+    fig_nav = plt.figure()
+    ax_nav = fig_nav.add_subplot(111)
+    img = plt.imread(env_folder+ "floor.png")
+    ax_nav.imshow(img)
+    plt.axis('off')
+    plt.title("Navigational map")
+    plt.plot(env_cfg.o_x, env_cfg.o_y, 'b*', linewidth=20)
+    nav, = ax_nav.plot(env_cfg.o_x, env_cfg.o_y)
+
+
+    return f_z, fig_z, ax_z, line_z, fig_nav, ax_nav, nav

 def translate_action(action, num_actions):
-    action_word = ['Forward', 'Right', 'Left', 'Sharp Right', 'Sharp Left']
+    # action_word = ['Forward', 'Right', 'Left', 'Sharp Right', 'Sharp Left']
    sqrt_num_actions = np.sqrt(num_actions)
-    ind = np.arange(sqrt_num_actions)
+    # ind = np.arange(sqrt_num_actions)
    if sqrt_num_actions % 2 == 0:
-        v_string = list('U'*int(sqrt_num_actions/2) + 'F'+ 'D'*int(sqrt_num_actions/2))
-        h_string = list('L' * int(sqrt_num_actions/2) + 'F' + 'R' * int(sqrt_num_actions/2))
+        v_string = list('U' * int((sqrt_num_actions - 1) / 2) + 'D' * int((sqrt_num_actions - 1) / 2))
+        h_string = list('L' * int((sqrt_num_actions - 1) / 2) + 'R' * int((sqrt_num_actions - 1) / 2))
    else:
-        v_string = list('U' * int((sqrt_num_actions-1)/2) + 'D' * int((sqrt_num_actions-1)/2))
-        h_string = list('L' * int((sqrt_num_actions-1)/2) + 'R' * int((sqrt_num_actions-1)/2))
-
+        v_string = list('U' * int(sqrt_num_actions / 2) + 'F' + 'D' * int(sqrt_num_actions / 2))
+        h_string = list('L' * int(sqrt_num_actions / 2) + 'F' + 'R' * int(sqrt_num_actions / 2))

    v_ind = int(action[0]/sqrt_num_actions)
    h_ind = int(action[0]%sqrt_num_actions)
@@ -153,14 +178,19 @@ def reset_to_initial(level, reset_array, client):
    time.sleep(0.1)


-def connect_drone(ip_address='127.0.0.0'):
+def connect_drone(ip_address='127.0.0.0', phase='infer'):
    print('------------------------------ Drone ------------------------------')
    client = airsim.MultirotorClient(ip=ip_address, timeout_value=10)
    client.confirmConnection()
    old_posit = client.simGetVehiclePose()
-    client.simSetVehiclePose(
-        airsim.Pose(airsim.Vector3r(0, 0, -2.2), old_posit.orientation),
-        ignore_collison=True)
+    if phase == 'train':
+        client.simSetVehiclePose(
+            airsim.Pose(airsim.Vector3r(0, 0, -2.2), old_posit.orientation),
+            ignore_collison=True)
+    elif phase == 'infer':
+        client.enableApiControl(True)
+        client.armDisarm(True)
+        client.takeoffAsync().join()

    return client, old_posit

@@ -181,13 +211,18 @@ def blit_text(surface, text, pos, font, color=pygame.Color('black')):
        x = pos[0]  # Reset the x.
        y += word_height  # Start on new row.

-def pygame_connect(H=925, W=380):
+def pygame_connect(phase):
    pygame.init()
-    screen_width = H
-    screen_height = W
-    screen = pygame.display.set_mode([screen_width, screen_height])
-    carImg = pygame.image.load('images/keys.png')
-    screen.blit(carImg, (0, 0))
+
+    if phase == 'train':
+        img_path = 'images/train_keys.png'
+    elif phase == 'infer':
+        img_path = 'images/infer_keys.png'
+    img = pygame.image.load(img_path)
+
+    screen = pygame.display.set_mode(img.get_rect().size)
+
+    screen.blit(img, (0, 0))
    pygame.display.set_caption('DLwithTL')
    # screen.fill((21, 116, 163))
    # text = 'Supported Keys:\n'
@@ -202,14 +237,15 @@ def pygame_connect(H=925, W=380):

    return screen

-def check_user_input(active, automate, lr, epsilon, agent, network_path, client, old_posit, initZ):
+def check_user_input(active, automate, lr, epsilon, agent, network_path, client, old_posit, initZ, phase, fig_z, fig_nav, env_folder):
    for event in pygame.event.get():

        if event.type == pygame.QUIT:
            active = False
            pygame.quit()

-        if event.type == pygame.KEYDOWN:
+        # Training keys control
+        if event.type == pygame.KEYDOWN and phase =='train':
            if event.key == pygame.K_l:
                # Load the parameters - epsilon
                cfg = read_cfg(config_filename='configs/config.cfg', verbose=False)
@@ -271,6 +307,18 @@ def check_user_input(active, automate, lr, epsilon, agent, network_path, client,
                    client.reset()
                # agent.take_action(action)

+        elif event.type == pygame.KEYDOWN and phase == 'infer':
+            if event.key == pygame.K_s:
+                # Save the figures
+                file_path = env_folder + 'results/'
+                fig_z.savefig(file_path+'altitude_variation.png', dpi=1000)
+                fig_nav.savefig(file_path+'navigation.png', dpi=1000)
+                print('Figures saved')
+
+            if event.key == pygame.K_BACKSPACE:
+                client.moveByVelocityAsync(vx=0, vy=0, vz=0, duration=0.1)
+                automate = automate ^ True
+
    return active, automate, lr, client


--- a/configs/config.cfg
+++ b/configs/config.cfg
@@ -3,7 +3,9 @@ run_name:                   Tello_indoor
 custom_load:                True
 custom_load_path:           models/trained/Indoor/indoor_condo/Imagenet/e2e/e2e
 env_type:                   Indoor
-env_name:                   indoor_condo
+env_name:                   indoor_cloud
+phase:                      infer
+

 [simulation_params]
 ip_address:                 127.0.0.1
@@ -14,11 +16,11 @@ load_data_path:             DeepNet/models/Tello_indoor/VanLeer/
 input_size:                 227
 num_actions:                25
 train_type:                 e2e
-wait_before_train:          40000
+wait_before_train:          30000
 max_iters:                  300000
 buffer_len:                 60000
 batch_size:                 32
-epsilon_saturation:         1
+epsilon_saturation:         200000
 crash_thresh:               1.3
 Q_clip:                     True
 train_interval:             3
@@ -26,5 +28,5 @@ update_target_interval:     80000
 gamma:                      0.99
 dropout_rate:               0.1
 learning_rate:              1e-6
-switch_env_steps:           3000000
+switch_env_steps:           3000
 epsilon_model:              exponential
--- a/configs/read_cfg.py
+++ b/configs/read_cfg.py
@@ -5,6 +5,25 @@
 import configparser as cp
 from dotmap import DotMap

+def read_env_cfg(config_filename = 'configs/main.cfg'):
+    # Load from config file
+    cfg = DotMap()
+
+    config = cp.ConfigParser()
+    config.read(config_filename)
+
+    cfg.run_name = config.get('general_params', 'env_name')
+    cfg.custom_load_path = str(config.get('general_params', 'floorplan'))
+    cfg.o_x = float(config.get('general_params', 'o_x').split(',')[0])
+    cfg.o_y = float(config.get('general_params', 'o_y').split(',')[0])
+    cfg.alpha = float(config.get('general_params', 'alpha').split(',')[0])
+    cfg.ceiling_z = float(config.get('general_params', 'ceiling_z').split(',')[0])
+    cfg.floor_z = float(config.get('general_params', 'floor_z').split(',')[0])
+    cfg.player_start_z = float(config.get('general_params', 'player_start_z').split(',')[0])
+
+    return cfg
+
+
 def read_cfg(config_filename = 'configs/main.cfg', verbose = False):
    # Load from config file
    cfg = DotMap()
@@ -20,6 +39,7 @@ def read_cfg(config_filename = 'configs/main.cfg', verbose = False):
    cfg.custom_load_path = str(config.get('general_params', 'custom_load_path'))
    cfg.env_type = config.get('general_params', 'env_type')
    cfg.env_name = config.get('general_params', 'env_name')
+    cfg.phase = config.get('general_params', 'phase')

    # [Simulation Parameters]
    if str(config.get('simulation_params', 'load_data')) =='True':
--- a/environments/initial_positions.py
+++ b/environments/initial_positions.py
@@ -83,7 +83,7 @@ def indoor_complex():
    return orig_ip, level_name, crash_threshold, initZ

 # Test condo indoor initial positions
-def indoor_condo():
+def indoor_cloud():
    # The environment can be downloaded from
    # https://drive.google.com/drive/u/2/folders/1u5teth6l4JW2IXAkZAg1CbDGR6zE-v6Z
    orig_ip = [
--- a/images/infer_keys.PNG
+++ b/images/infer_keys.PNG
--- a/images/train_keys.PNG
+++ b/images/train_keys.PNG
--- a/main.py
+++ b/main.py
@@ -2,13 +2,14 @@
 import sys
 from network.agent import DeepAgent
 from environments.initial_positions import *
-
-
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
 import psutil
 from os import getpid
 from network.Memory import Memory
 from aux_functions import *
-from configs.read_cfg import read_cfg
+from configs.read_cfg import read_cfg, read_env_cfg

 # TF Debug message suppressed
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
@@ -20,10 +21,10 @@ process = psutil.Process(getpid())
 cfg = read_cfg(config_filename='configs/config.cfg', verbose=True)

 # Start the environment
-env_process = start_environment(env_name=cfg.env_name)
+env_process, env_folder = start_environment(env_name=cfg.env_name)

 # Load PyGame Screen
-screen = pygame_connect()
+screen = pygame_connect(phase = cfg.phase)

 # Load the initial positions for the environment
 reset_array, level_name, crash_threshold, initZ = initial_positions(cfg.env_name)
@@ -35,11 +36,23 @@ cfg = save_network_path(cfg=cfg)
 ReplayMemory = Memory(cfg.buffer_len)

 # Connect to Unreal Engine and get the drone handle: client
-client, old_posit = connect_drone(ip_address=cfg.ip_address)
+client, old_posit = connect_drone(ip_address=cfg.ip_address, phase=cfg.phase)

 # Define DQN agents
+fig_z=[]
+fig_nav=[]
 agent = DeepAgent(cfg, client, name='DQN')
-target_agent = DeepAgent(cfg, client, name='Target')
+if cfg.phase=='train':
+    target_agent = DeepAgent(cfg, client, name='Target')
+
+elif cfg.phase == 'infer':
+    env_cfg = read_env_cfg(config_filename=env_folder+'config.cfg')
+    nav_x=[]
+    nav_y=[]
+    altitude=[]
+    f_z, fig_z, ax_z, line_z, fig_nav, ax_nav, nav = initialize_infer(env_cfg=env_cfg, client=client, env_folder=env_folder)
+    nav_text = ax_nav.text(0, 0, '')
+

 # Initialize variables
 iter = 0
@@ -75,151 +88,194 @@ f = open(log_path, 'w')

 while active:
    try:
-
-        active, automate, cfg.lr, client = check_user_input(active, automate, cfg.lr, cfg.epsilon, agent, cfg.network_path, client, old_posit, initZ)
+        active, automate, cfg.lr, client = check_user_input(active, automate, cfg.lr, cfg.epsilon, agent, cfg.network_path, client, old_posit, initZ, cfg.phase, fig_z, fig_nav, env_folder)

        if automate:
-            start_time = time.time()
-            if switch_env:
-                posit1_old = client.simGetVehiclePose()
-                times_switch=times_switch+1
-                level_state[level] = current_state
-                level_posit[level] = posit1_old
-                last_crash_array[level] = last_crash
-                ret_array[level] = ret
-                distance_array[level] = distance
-                epi_env_array[int(level/3)] = episode

-                level = (level + 1) % len(reset_array)
+            if cfg.phase == 'train':

-                print('Transferring to level: ', level ,' - ', level_name[level])
+                start_time = time.time()
+                if switch_env:
+                    posit1_old = client.simGetVehiclePose()
+                    times_switch=times_switch+1
+                    level_state[level] = current_state
+                    level_posit[level] = posit1_old
+                    last_crash_array[level] = last_crash
+                    ret_array[level] = ret
+                    distance_array[level] = distance
+                    epi_env_array[int(level/3)] = episode

-                if times_switch < len(reset_array):
-                    reset_to_initial(level, reset_array, client)
-                else:
-                    current_state = level_state[level]
-                    posit1_old = level_posit[level]
+                    level = (level + 1) % len(reset_array)

-                    reset_to_initial(level, reset_array, client)
-                    client.simSetVehiclePose(posit1_old, ignore_collison=True)
-                    time.sleep(0.1)
+                    print('Transferring to level: ', level ,' - ', level_name[level])

-                last_crash = last_crash_array[level]
-                ret = ret_array[level]
-                distance = distance_array[level]
-                episode = epi_env_array[int(level/3)]
-                xxx = client.simGetVehiclePose()
-                # environ = environ^True
-
-
-            action1, action_type, cfg.epsilon, qvals = policy(cfg.epsilon, current_state, iter, cfg.epsilon_saturation, cfg.epsilon_model,  cfg.wait_before_train, cfg.num_actions, agent)
-
-            action_word1 = translate_action(action1, cfg.num_actions)
-            # Take the action
-            agent.take_action(action1, cfg.num_actions)
-            time.sleep(0.05)
-
-            posit = client.simGetVehiclePose()
-
-            new_state = agent.get_state()
-            new_depth1, thresh = agent.get_depth()
-
-            # Get GPS information
-            posit = client.simGetVehiclePose()
-            orientation = posit.orientation
-            position = posit.position
-            old_p = np.array([old_posit.position.x_val, old_posit.position.y_val])
-            new_p = np.array([position.x_val, position.y_val])
-            # calculate distance
-            distance = distance + np.linalg.norm(new_p - old_p)
-            old_posit = posit
-
-            reward, crash = agent.reward_gen(new_depth1, action1, crash_threshold, thresh)
-
-            ret = ret+reward
-            agent_state1 = agent.GetAgentState()
-
-            if agent_state1.has_collided:
-                num_collisions = num_collisions + 1
-                print('crash')
-                crash = True
-                reward = -1
-            data_tuple=[]
-            data_tuple.append([current_state, action1, new_state, reward, crash])
-            err = get_errors(data_tuple, choose, ReplayMemory, cfg.input_size, agent, target_agent, cfg.gamma, cfg.Q_clip)
-            ReplayMemory.add(err, data_tuple)
-
-            # Train if sufficient frames have been stored
-            if iter > cfg.wait_before_train:
-                if iter%cfg.train_interval==0:
-                # Train the RL network
-                    old_states, Qvals, actions, err, idx = minibatch_double(data_tuple, cfg.batch_size, choose, ReplayMemory, cfg.input_size, agent, target_agent, cfg.gamma, cfg.Q_clip)
-
-                    for i in range(cfg.batch_size):
-                        ReplayMemory.update(idx[i], err[i])
-
-                    if print_qval:
-                        print(Qvals)
-
-                    if choose:
-                        # Double-DQN
-                        target_agent.train_n(old_states, Qvals, actions, cfg.batch_size, cfg.dropout_rate, cfg.lr, cfg.epsilon, iter)
+                    if times_switch < len(reset_array):
+                        reset_to_initial(level, reset_array, client)
                    else:
-                        agent.train_n(old_states, Qvals,actions,  cfg.batch_size, cfg.dropout_rate, cfg.lr, cfg.epsilon, iter)
+                        current_state = level_state[level]
+                        posit1_old = level_posit[level]

-                if iter % cfg.update_target_interval == 0:
-                    agent.take_action([-1], cfg.num_actions)
-                    print('Switching Target Network')
-                    choose = not choose
-                    agent.save_network(cfg.network_path)
+                        reset_to_initial(level, reset_array, client)
+                        client.simSetVehiclePose(posit1_old, ignore_collison=True)
+                        time.sleep(0.1)

-            iter += 1
-
-            time_exec = time.time()-start_time
-
-            mem_percent = process.memory_info()[0]/2.**30
-
-            s_log = 'Level :{:>2d}: Iter: {:>6d}/{:<5d} {:<8s}-{:>5s} Eps: {:<1.4f} lr: {:>1.8f} Ret = {:<+6.4f} Last Crash = {:<5d} t={:<1.3f} Mem = {:<5.4f}  Reward: {:<+1.4f}  '.format(
-                    int(level),iter, episode,
-                    action_word1,
-                    action_type,
-                    cfg.epsilon,
-                    cfg.lr,
-                    ret,
-                    last_crash,
-                    time_exec,
-                    mem_percent,
-                    reward)
-
-            print(s_log)
-            f.write(s_log+'\n')
-
-            last_crash=last_crash+1
-
-            if crash:
-                agent.return_plot(ret, episode, int(level/3), mem_percent, iter, distance)
-                ret=0
-                distance=0
-                episode = episode + 1
-                last_crash=0
-
-                reset_to_initial(level, reset_array, client)
-                time.sleep(0.2)
-                current_state =agent.get_state()
-            else:
-                current_state = new_state
+                    last_crash = last_crash_array[level]
+                    ret = ret_array[level]
+                    distance = distance_array[level]
+                    episode = epi_env_array[int(level/3)]
+                    xxx = client.simGetVehiclePose()
+                    # environ = environ^True


-            if iter%cfg.switch_env_steps==0:
-                switch_env=True
-            else:
-                switch_env=False
+                action, action_type, cfg.epsilon, qvals = policy(cfg.epsilon, current_state, iter, cfg.epsilon_saturation, cfg.epsilon_model,  cfg.wait_before_train, cfg.num_actions, agent)

-            if iter%cfg.max_iters==0:
-                automate=False
+                action_word = translate_action(action, cfg.num_actions)
+                # Take the action
+                agent.take_action(action, cfg.num_actions, phase=cfg.phase)
+                time.sleep(0.05)

-            # if iter >140:
-            #     active=False
+                posit = client.simGetVehiclePose()
+
+                new_state = agent.get_state()
+                new_depth1, thresh = agent.get_depth()
+
+                # Get GPS information
+                posit = client.simGetVehiclePose()
+                orientation = posit.orientation
+                position = posit.position
+                old_p = np.array([old_posit.position.x_val, old_posit.position.y_val])
+                new_p = np.array([position.x_val, position.y_val])
+                # calculate distance
+                distance = distance + np.linalg.norm(new_p - old_p)
+                old_posit = posit
+
+                reward, crash = agent.reward_gen(new_depth1, action, crash_threshold, thresh)
+
+                ret = ret+reward
+                agent_state1 = agent.GetAgentState()
+
+                if agent_state1.has_collided:
+                    num_collisions = num_collisions + 1
+                    print('crash')
+                    crash = True
+                    reward = -1
+                data_tuple=[]
+                data_tuple.append([current_state, action, new_state, reward, crash])
+                err = get_errors(data_tuple, choose, ReplayMemory, cfg.input_size, agent, target_agent, cfg.gamma, cfg.Q_clip)
+                ReplayMemory.add(err, data_tuple)
+
+                # Train if sufficient frames have been stored
+                if iter > cfg.wait_before_train:
+                    if iter%cfg.train_interval==0:
+                    # Train the RL network
+                        old_states, Qvals, actions, err, idx = minibatch_double(data_tuple, cfg.batch_size, choose, ReplayMemory, cfg.input_size, agent, target_agent, cfg.gamma, cfg.Q_clip)
+
+                        for i in range(cfg.batch_size):
+                            ReplayMemory.update(idx[i], err[i])
+
+                        if print_qval:
+                            print(Qvals)
+
+                        if choose:
+                            # Double-DQN
+                            target_agent.train_n(old_states, Qvals, actions, cfg.batch_size, cfg.dropout_rate, cfg.lr, cfg.epsilon, iter)
+                        else:
+                            agent.train_n(old_states, Qvals,actions,  cfg.batch_size, cfg.dropout_rate, cfg.lr, cfg.epsilon, iter)
+
+                    if iter % cfg.update_target_interval == 0:
+                        agent.take_action([-1], cfg.num_actions, phase=cfg.phase)
+                        print('Switching Target Network')
+                        choose = not choose
+                        agent.save_network(cfg.network_path)
+
+                iter += 1
+
+                time_exec = time.time()-start_time
+
+                mem_percent = process.memory_info()[0]/2.**30
+
+                s_log = 'Level :{:>2d}: Iter: {:>6d}/{:<5d} {:<8s}-{:>5s} Eps: {:<1.4f} lr: {:>1.8f} Ret = {:<+6.4f} Last Crash = {:<5d} t={:<1.3f} Mem = {:<5.4f}  Reward: {:<+1.4f}  '.format(
+                        int(level),iter, episode,
+                        action_word,
+                        action_type,
+                        cfg.epsilon,
+                        cfg.lr,
+                        ret,
+                        last_crash,
+                        time_exec,
+                        mem_percent,
+                        reward)
+
+                print(s_log)
+                f.write(s_log+'\n')
+
+                last_crash=last_crash+1
+
+                if crash:
+                    agent.return_plot(ret, episode, int(level/3), mem_percent, iter, distance)
+                    ret=0
+                    distance=0
+                    episode = episode + 1
+                    last_crash=0
+
+                    reset_to_initial(level, reset_array, client)
+                    time.sleep(0.2)
+                    current_state =agent.get_state()
+                else:
+                    current_state = new_state
+
+
+                if iter%cfg.switch_env_steps==0:
+                    switch_env=True
+                else:
+                    switch_env=False
+
+                if iter%cfg.max_iters==0:
+                    automate=False
+
+                # if iter >140:
+                #     active=False
+            elif cfg.phase == 'infer':
+                # Inference phase
+                agent_state = agent.GetAgentState()
+                if agent_state.has_collided:
+                    print('Drone collided')
+                    print("Total distance traveled: ", np.round(distance, 2))
+                    active = False
+                    client.moveByVelocityAsync(vx=0, vy=0, vz=0, duration=1).join()
+                    ax_nav.plot(nav_x.pop(), nav_y.pop(), 'r*', linewidth=20)
+                    file_path = env_folder + 'results/'
+                    fig_z.savefig(file_path + 'altitude_variation.png', dpi=1000)
+                    fig_nav.savefig(file_path + 'navigation.png', dpi=1000)
+
+                    print('Figures saved')
+                else:
+                    posit = agent.client.simGetVehiclePose()
+                    distance = distance + np.linalg.norm(np.array([old_posit.position.x_val-posit.position.x_val,old_posit.position.y_val-posit.position.y_val]))
+                    altitude.append(-posit.position.z_val-f_z)
+
+                    x_val = posit.position.x_val
+                    y_val = posit.position.y_val
+
+                    nav_x.append(env_cfg.alpha*x_val+env_cfg.o_x)
+                    nav_y.append(env_cfg.alpha*y_val+env_cfg.o_y)
+                    nav.set_data(nav_x, nav_y)
+                    nav_text.remove()
+                    nav_text = ax_nav.text(25, 55, 'Distance: '+str(np.round(distance, 2)), style='italic',
+                                           bbox={'facecolor': 'white', 'alpha': 0.5})
+
+                    line_z.set_data(np.arange(len(altitude)), altitude)
+                    ax_z.set_xlim(0, len(altitude))
+                    fig_z.canvas.draw()
+                    fig_z.canvas.flush_events()
+
+                    current_state = agent.get_state()
+                    action, action_type, cfg.epsilon, qvals = policy(1, current_state, iter,
+                                                                      cfg.epsilon_saturation, 'inference',
+                                                                      cfg.wait_before_train, cfg.num_actions, agent)
+                    # Take continuous action
+                    agent.take_action(action, cfg.num_actions, phase=cfg.phase)
+                    old_posit=posit



--- a/network/agent.py
+++ b/network/agent.py
@@ -124,7 +124,7 @@ class DeepAgent():
            # self.action_array[action[0].astype(int)]+=1
        return action.astype(int)

-    def take_action(self, action, num_actions):
+    def take_action(self, action, num_actions, phase):
        # Set Paramaters
        fov_v = 45 * np.pi / 180
        fov_h = 80 * np.pi / 180
@@ -133,7 +133,6 @@ class DeepAgent():
        ignore_collision = False
        sqrt_num_actions = np.sqrt(num_actions)

-
        posit = self.client.simGetVehiclePose()
        pos = posit.position
        orientation = posit.orientation
@@ -148,25 +147,42 @@ class DeepAgent():
        theta = fov_v/sqrt_num_actions * (theta_ind - (sqrt_num_actions - 1) / 2)
        psi = fov_h / sqrt_num_actions * (psi_ind - (sqrt_num_actions - 1) / 2)

-        noise_theta = (fov_v / sqrt_num_actions) / 6
-        noise_psi = (fov_h / sqrt_num_actions) / 6
-
-        psi = psi + random.uniform(-1, 1)*noise_psi
-        theta = theta + random.uniform(-1, 1)*noise_theta

        # print('Theta: ', theta * 180 / np.pi, end='')
        # print(' Psi: ', psi * 180 / np.pi)


+        if phase == 'train':
+            noise_theta = (fov_v / sqrt_num_actions) / 6
+            noise_psi = (fov_h / sqrt_num_actions) / 6

+            psi = psi + random.uniform(-1, 1) * noise_psi
+            theta = theta + random.uniform(-1, 1) * noise_theta

-        x = pos.x_val + r * np.cos(alpha + psi)
-        y = pos.y_val + r * np.sin(alpha + psi)
-        z = pos.z_val + r * np.sin(theta)  # -ve because Unreal has -ve z direction going upwards
+            x = pos.x_val + r * np.cos(alpha + psi)
+            y = pos.y_val + r * np.sin(alpha + psi)
+            z = pos.z_val + r * np.sin(theta)  # -ve because Unreal has -ve z direction going upwards

-        self.client.simSetVehiclePose(airsim.Pose(airsim.Vector3r(x, y, z), airsim.to_quaternion(0, 0, alpha + psi)),
-                                 ignore_collison=ignore_collision)
+            self.client.simSetVehiclePose(airsim.Pose(airsim.Vector3r(x, y, z), airsim.to_quaternion(0, 0, alpha + psi)),
+                                     ignore_collison=ignore_collision)
+        elif phase == 'infer':
+            r_infer=0.5
+            vx = r_infer * np.cos(alpha + psi)
+            vy = r_infer * np.sin(alpha + psi)
+            vz = r_infer * np.sin(theta)
+            # TODO
+            # Take average of previous velocities and current to smoothen out drone movement.
+            self.client.moveByVelocityAsync(vx=vx, vy=vy, vz=vz, duration=1,
+                                       drivetrain=airsim.DrivetrainType.MaxDegreeOfFreedom,
+                                       yaw_mode=airsim.YawMode(is_rate=False,
+                                                               yaw_or_rate=180 * (alpha + psi) / np.pi))
+            # self.client.moveByVelocityAsync(vx=0, vy=0, vz=0, duration=0.01).join()
+            # print("")
+            # print("Throttle:", throttle)
+            # print('Yaw:', yaw)

+            # self.client.moveByAngleThrottleAsync(pitch=-0.015, roll=0, throttle=throttle, yaw_rate=yaw, duration=0.2).join()
+            # self.client.moveByVelocityAsync(vx=0, vy=0, vz=0, duration=0.005)
    def get_depth(self):
        responses = self.client.simGetImages([airsim.ImageRequest(2, airsim.ImageType.DepthVis, False, False)])
        depth = []
--- a/scratch.py
+++ b/scratch.py
@@ -1,10 +1,169 @@
-# Author: Aqeel Anwar(ICSRL)
-# Created: 11/8/2019, 4:02 PM
-# Email: aqeel.anwar@gatech.edu
+# # # Author: Aqeel Anwar(ICSRL)
+# # # Created: 11/8/2019, 4:02 PM
+# # # Email: aqeel.anwar@gatech.edu
+# #
+# #
+# # # import airsim
+# import os
+# import time
+# from aux_functions import *
+# from util.transformations import euler_from_quaternion
+# import airsim
+#
+# # Connect to Unreal Engine and get the drone handle: client
+# client, old_posit = connect_drone(ip_address='127.0.0.1', phase='infer')
+#
+#
+# # Async methods returns Future. Call join() to wait for task to complete.
+# # client.takeoffAsync().join()
+# pos = client.simGetVehiclePose()
+# # print(pos)
+# num_actions = 25
+# action = [20]
+# fov_v = 45 * np.pi / 180
+# fov_h = 80 * np.pi / 180
+# r = 0.4
+#
+# sqrt_num_actions = np.sqrt(num_actions)
+#
+#
+# for i in range(5):
+#
+#     client.moveByVelocityAsync(vx=0, vy=0, vz=-1, duration=1).join()
+#     posit = client.simGetVehiclePose()
+#     pos = posit.position
+#     orientation = posit.orientation
+#     print("Z", pos.z_val)
+#     cc=1
+
+#
+
+# quat = (orientation.w_val, orientation.x_val, orientation.y_val, orientation.z_val)
+# eulers = euler_from_quaternion(quat)
+# alpha = eulers[2]
+#
+# theta_ind = int(action[0] / sqrt_num_actions)
+# psi_ind = action[0] % sqrt_num_actions
+#
+# theta = fov_v/sqrt_num_actions * (theta_ind - (sqrt_num_actions - 1) / 2)
+# psi = fov_h / sqrt_num_actions * (psi_ind - (sqrt_num_actions - 1) / 2)
+#
+# vx = r * np.cos(alpha + psi)
+# vy = r * np.sin(alpha + psi)
+# vz = r * np.sin(theta)
+# print("ang:", 180*(alpha+psi)/np.pi)
+# client.moveByVelocityAsync(vx=vx, vy=vy, vz=vz, duration=3, drivetrain=airsim.DrivetrainType.MaxDegreeOfFreedom, yaw_mode=airsim.YawMode(is_rate=False, yaw_or_rate=180*(alpha+psi)/np.pi)).join()
+# client.moveByVelocityAsync(vx=0,vy=0,vz=0, duration=0.1).join()
+#
+# posit = client.simGetVehiclePose()
+# pos = posit.position
+# orientation = posit.orientation
+#
+# quat = (orientation.w_val, orientation.x_val, orientation.y_val, orientation.z_val)
+# eulers = euler_from_quaternion(quat)
+# alpha = eulers[2]
+# print(eulers)
+#
+#
+# # time.sleep(5)
+# # client.moveToPositionAsync(-10, 10, -10, 5).join()
+# print("Done")
+# #
+#
+#
+# def action_take(action):
+#     action = [22]
+#
+#     fov_v = 45 * np.pi / 180
+#     fov_h = 80 * np.pi / 180
+#
+#     sqrt_num_actions = 5
+#     theta_ind = int(action[0] / sqrt_num_actions)
+#     psi_ind = action[0] % sqrt_num_actions
+#
+#     theta = fov_v / sqrt_num_actions * (theta_ind - (sqrt_num_actions - 1) / 2)
+#     psi = fov_h / sqrt_num_actions * (psi_ind - (sqrt_num_actions - 1) / 2)
+#
+#     yaw = psi
+#     throttle = 0.596 + np.sin(theta)
+#
+#     print("")
+#     print("Throttle:", throttle)
+#     print('Yaw:', yaw)
+#
+#     return throttle, yaw


-import airsim
-from aux_functions import *
+import numpy as np
+# import seaborn as sns
+# import matplotlib.pyplot as plt
+# import time, random
+# # x, y, z = np.random.rand(3, 100)
+# plt.style.use('seaborn-pastel')
+# cmap = sns.cubehelix_palette(as_cmap=True)
+# plt.ion()
+# fig = plt.figure()
+# ax = fig.add_subplot(111)
+# x=[]
+# y=[]
+# points, = ax.plot(x, y)
+# # ax.set_ylim([0, 5])
+# # ax.set_xlim([0, 10])
+# img = plt.imread("unreal_envs/indoor_condo/floor.png")
+# ax.imshow(img)
+# plt.axis('off')
+# plt.title("Navigational map")
+# o_x = 1318.49
+# o_y = 769.635
+# # a = 10
+# # b = [3,5]
+# # plt.axis('off')
+# for i in range(10):
+#     x.append(i)
+#     y.append(random.randint(1,5))
+#     points.set_data(x,y)
+#     fig.canvas.draw()
+#     fig.canvas.flush_events()
+#     time.sleep(1)


-client, old_posit = connect_drone()
+import matplotlib.pyplot as plt
+import time, random
+
+altitude=[]
+plt.ion()
+fig = plt.figure()
+ax1 = fig.add_subplot(111)
+line1, = ax1.plot(0, 0)
+plt.title("Altitude variation")
+
+nav_x=[]
+nav_y=[]
+o_x = 1318.49
+o_y = 769.635
+alpha = 36.4
+fig2 = plt.figure()
+ax2 = fig2.add_subplot(111)
+img = plt.imread("unreal_envs/indoor_cloud/floor.png")
+ax2.imshow(img)
+plt.axis('off')
+plt.title("Navigational map")
+plt.plot(o_x, o_y, 'r*', linewidth=5)
+nav, = ax2.plot(o_x, o_y)
+
+
+
+for i in range(100):
+    nav_text = ax2.text(25, 55, 'Distance: ' + str(np.round(1.2333, 2)), style='italic',
+                           bbox={'facecolor': 'white', 'alpha': 0.5})
+    nav_x.append(alpha * random.randint(0,5) + o_x)
+    nav_y.append(alpha * random.randint(0,5) + o_y)
+    nav.set_data(nav_x, nav_y)
+    # line1.set_ydata(altitude)
+    # print("alt: ", altitude)
+    altitude.append(random.randint(0,5))
+    line1.set_data(altitude, altitude)
+    fig.canvas.draw()
+    fig.canvas.flush_events()
+
+    time.sleep(0.2)