Commit 87a8f0b7 authored by Volkan Kayatas's avatar Volkan Kayatas
Browse files

Added random episode generator and episode to edgelist function, updated outdated project structure

parent f9a816aa
{
"python.pythonPath": "/Users/Vikram/anaconda3/bin/python"
}
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
......@@ -6,7 +6,7 @@ import sys
import argparse
import numpy as np
import paho.mqtt.client as mqtt
#import paho.mqtt.client as mqtt
from keras.layers import *
from keras.models import Sequential
......@@ -20,7 +20,7 @@ from matplotlib import pyplot as plt
from variable import *
import os
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
# os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
np.random.seed(1)
DEFAULT_SIZE_ROW = 10
DEFAULT_SIZE_COLUMN = 10
......@@ -39,18 +39,19 @@ obstacles_loc_2 = [20, 21, 22, 23, 24, 25, 49,
obstacles_loc_3 = [20, 21, 22, 23, 24, 25, 49,
50, 55, 56, 57, 58, 59, 79, 80, 81, 82]
obstacles_loc_4 = [12,29,30,31,32,33,37,38,39,44,51,57,65,70,71,72,73,79]
obstacles_loc_4 = [12, 29, 30, 31, 32, 33, 37, 38, 39, 44, 51, 57, 65, 70, 71, 72, 73, 79]
obstacles_loc_6 = [22, 25, 26, 27, 47, 67, 60, 82, 83, 84, 85, 107, 108, 121, 128, 141, 142, 146, 164, 165, 168, 181,
32, 35, 36, 37, 57, 77, 70, 92, 93, 94, 95, 117, 118, 131, 138, 151, 152, 156, 174, 175, 178, 191,
222, 225,226,227,247,267,260,282,283,285,307,308,321,328,341,342,346,365,368,381,
232,235,236,237,257,277,270,292,293,294,295,317,318,331,338,351,352,356,374,375,378,391]
222, 225, 226, 227, 247, 267, 260, 282, 283, 285, 307, 308, 321, 328, 341, 342, 346, 365, 368, 381,
232, 235, 236, 237, 257, 277, 270, 292, 293, 294, 295, 317, 318, 331, 338, 351, 352, 356, 374, 375,
378, 391]
obstacles_loc_7 = [22, 25, 26, 31, 41, 70, 71, 85, 90, 131, 132, 142, 167, 170, 177,
32, 35, 36, 37, 57, 77, 70, 92, 93, 94, 95, 117, 118, 131, 138, 151, 152, 156, 174, 179, 190,
211, 230, 247,267,260,310,311,312,326,329,340,343,356,357,378,371,
212,215,226,224,248,261,271,282,286,298,301,308,311,316,351,332,366,369,384,385,318,391]
211, 230, 247, 267, 260, 310, 311, 312, 326, 329, 340, 343, 356, 357, 378, 371,
212, 215, 226, 224, 248, 261, 271, 282, 286, 298, 301, 308, 311, 316, 351, 332, 366, 369, 384, 385,
318, 391]
canvas_list = []
storage_value = []
......@@ -61,23 +62,22 @@ EMPTY = 1.0
TARGET = 0.75
START = 0.5
#Run params
# Run params
START_STATE = 1
MAX_EPISODES = 60
EMBTOGGLE = 2
DIMENSION = 20
TARGET_LOC = 399
EMBEDPATH = "./Embeddings/"
RESULTPATH = "./Results/LM2/"
VALSPATH = "./Results/Vals/"
STPSPATH = "./Results/STP2/"
REWPATH = "./Results/RW2/"
EMBEDPATH = "./Old/Old_Embeddings/"
RESULTPATH = "./Old/Old_Results/LM2/"
VALSPATH = "./Old/Old_Results/Vals/"
STPSPATH = "./Old/Old_Results/STP2/"
REWPATH = "./Old/Old_Results/RW2/"
GRID = 20
row_num = GRID
col_num = GRID
DEBUG = False
EPSILON_REDUCE = True
RANDOM_MODE = False
......@@ -102,13 +102,11 @@ MEMORY_LEN = 1000
DISCOUNT_RATE = 0.95
BATCH_SIZE = 50
#Save params
# Save params
state_index = -1
rew_arr = []
rewardAxis = np.zeros((10,60))
stepsAxis = np.zeros((10,60))
rewardAxis = np.zeros((10, 60))
stepsAxis = np.zeros((10, 60))
partRew = [[] for i in range(10)]
globalTotSteps = 0
......@@ -132,9 +130,9 @@ class DQNAgent:
# Neural Net for Deep-Q learning Model
model = Sequential()
if(EMBTOGGLE == 2):
if (EMBTOGGLE == 2):
model.add(Conv2D(16, kernel_size=(3, 3), strides=(1, 1),
activation='relu',input_shape= (GRID, GRID,1)))
activation='relu', input_shape=(GRID, GRID, 1)))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(1, 1)))
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(Flatten())
......@@ -144,7 +142,7 @@ class DQNAgent:
optimizer='adam')
return model
elif(EMBTOGGLE == 0):
elif (EMBTOGGLE == 0):
model.add(Dense(64, input_shape=(
self.state_size,), activation='relu'))
else:
......@@ -160,17 +158,17 @@ class DQNAgent:
(current_state, action, reward, next_state, game_over))
def replay(self, batch_size):
#print("doing")
# print("doing")
memory_size = len(self.memory)
batch_size = min(memory_size, batch_size)
minibatch = random.sample(self.memory, batch_size)
if(EMBTOGGLE == 0):
if (EMBTOGGLE == 0):
inputs = np.zeros((batch_size, self.state_size))
elif(EMBTOGGLE == 2):
elif (EMBTOGGLE == 2):
inputs = np.zeros((batch_size, GRID, GRID, 1))
else:
inputs = np.zeros((batch_size, DIMENSION))
#inputs = np.zeros((batch_size, 2*DIMENSION))
# inputs = np.zeros((batch_size, 2*DIMENSION))
targets = np.zeros((batch_size, self.num_actions))
i = 0
for state, action, reward, next_state, done in minibatch:
......@@ -269,7 +267,7 @@ class Environment:
self.current_state = (row_x, col_y, STATE_START)
self.cs = row_x * self.col_number + col_y
self.visited = set()
self.min_reward = -25
self.min_reward = -5
self.free_cells = [(r, c) for r in range(self.row_number) for c in range(self.col_number) if
self._map[r, c] == 1.0]
......@@ -278,14 +276,13 @@ class Environment:
for row, col in itertools.product(range(self.row_number), range(self.col_number)):
storage_value[row, col] = self._map[row, col]
def valid_actions(self, cell=None):
if cell is None:
row, col, mode = self.current_state
else:
row, col = cell
state = self.col_number*row + col
state = self.col_number * row + col
return self.adj_dir[state]
def update_state(self, action):
......@@ -295,7 +292,7 @@ class Environment:
self.visited.add((current_row, current_col))
valid_actions = self.valid_actions()
#print(nrow,ncol)
# print(nrow,ncol)
if not valid_actions:
nmode = STATE_BLOCKED
elif action in valid_actions:
......@@ -321,13 +318,11 @@ class Environment:
# invalid action
nmode = STATE_INVALID
if(self.cs in obstacles_loc):
if (self.cs in obstacles_loc):
nmode = STATE_BLOCKED
self.current_state = (nrow, ncol, nmode)
self.cs = nrow*self.col_number + ncol
self.cs = nrow * self.col_number + ncol
# Action define:
# 0: LEFT
......@@ -369,7 +364,6 @@ class Environment:
return STATE_WIN
return STATE_VALID
def get_reward(self):
current_row, current_col, mode = self.current_state
target_row, target_col = self.target
......@@ -388,10 +382,10 @@ class Environment:
state = self.cs
embed = np.zeros(self.dimension)
embed[:self.dimension] = self.model1[str(state)]
#embed[self.dimension:] = self.model2[str(state)]
# embed[self.dimension:] = self.model2[str(state)]
return embed
def generate_embeddings_custom(self,state):
def generate_embeddings_custom(self, state):
temp = self.cs
state = state
embed = np.zeros(self.dimension)
......@@ -401,9 +395,9 @@ class Environment:
def deepQLearning(model, env, state, args, randomMode=False, **opt):
global state_index, rewardAxis, state_index,globalTotSteps
global state_index, rewardAxis, state_index, globalTotSteps
episodes = opt.get('n_epoch', MAX_EPISODES)
#print(obstacles_loc)
# print(obstacles_loc)
print(args)
batch_size = opt.get('batch_size', BATCH_SIZE)
......@@ -422,7 +416,7 @@ def deepQLearning(model, env, state, args, randomMode=False, **opt):
totRew = 0
for episode in range(episodes):
#print("\nEpisode: ",episode)
# print("\nEpisode: ",episode)
loss = 0.0
env.reset()
game_over = False
......@@ -432,42 +426,41 @@ def deepQLearning(model, env, state, args, randomMode=False, **opt):
list_action = []
next_state = env.map.reshape((1, -1))
#while end state is not reached or cumulative reward doesn't reach minimum
# while end state is not reached or cumulative reward doesn't reach minimum
while not game_over:
#print(env.cs,end=' ')
print("cs: "+str(env.cs))
valid_actions = env.valid_actions()
if not valid_actions:
game_over = True
#print(env.map)
# print(env.map)
continue
current_state = next_state
#Embedding for current state
# Embedding for current state
cs = (env.generate_embedding()).reshape((1, -1))
# Get best action from current state
if np.random.rand() < model.epsilon:
action = random.choice(valid_actions)
else:
if(EMBTOGGLE == 0):
if (EMBTOGGLE == 0):
action = model.predict(current_state)
elif EMBTOGGLE == 2:
current_state = np.reshape(current_state,(1,GRID,GRID,))
current_state = np.expand_dims(current_state,-1)
current_state = np.reshape(current_state, (1, GRID, GRID,))
current_state = np.expand_dims(current_state, -1)
action = model.predict(current_state)
else:
action = model.predict(cs)
# Apply action, get reward and new envstate
next_state, reward, game_status = env.act(action)
totRew += reward
ns = (env.generate_embedding()).reshape((1, -1))
#print(env.cs,reward,end=' -> ')
# print(env.cs,reward,end=' -> ')
#print(env.cs,end=' ')
# print(env.cs,end=' ')
if game_status == STATE_WIN:
x, y, _ = env.current_state
storage_value[x, y] = TARGET
......@@ -481,27 +474,27 @@ def deepQLearning(model, env, state, args, randomMode=False, **opt):
else:
game_over = False
if(game_over == True):
print(env.total_reward)
if (game_over == True):
print("Total Reward: " + str(env.total_reward))
rewardAxis[state_index][episode] = env.total_reward
stepsAxis[state_index][episode] = totStps
#rew_arr.append(env.total_reward)
# rew_arr.append(env.total_reward)
if DEBUG:
print("--------------------------------------")
print(np.reshape(current_state, newshape=(4, 4)))
print("action = {},valid_action = {},reward = {}, game_over = {}".format(action, valid_actions,
reward, game_over))
#print(np.reshape(next_state, newshape=(4, 4)))
# print(np.reshape(next_state, newshape=(4, 4)))
list_action.append(action)
#Store episode (experience)
if(EMBTOGGLE == 1):
# Store episode (experience)
if (EMBTOGGLE == 1):
model.remember(cs, action, reward, ns, game_over)
elif(EMBTOGGLE == 2):
current_state = np.reshape(current_state, (1,GRID,GRID,))
next_state = np.reshape(next_state, (1,GRID,GRID,))
current_state = np.expand_dims(current_state,-1)
next_state = np.expand_dims(next_state,-1)
elif (EMBTOGGLE == 2):
current_state = np.reshape(current_state, (1, GRID, GRID,))
next_state = np.reshape(next_state, (1, GRID, GRID,))
current_state = np.expand_dims(current_state, -1)
next_state = np.expand_dims(next_state, -1)
model.remember(current_state, action,
reward, next_state, game_over)
......@@ -511,7 +504,7 @@ def deepQLearning(model, env, state, args, randomMode=False, **opt):
n_step += 1
totStps += 1
if(totStps %50 == 0):
if (totStps % 50 == 0):
partRew[state_index].append(totRew)
loss = model.replay(batch_size)
......@@ -534,29 +527,27 @@ def deepQLearning(model, env, state, args, randomMode=False, **opt):
break
def getRowCol(obs):
return (obs//GRID, obs % GRID)
return (obs // GRID, obs % GRID)
def create_environment(start_row, start_col, args):
global obstacles_loc
for obstacle in obstacles_loc:
(row, col) = getRowCol(obstacle)
row = obstacle//GRID
row = obstacle // GRID
col = obstacle % GRID
storage_value[row, col] = OBSTACLE
storage_value[start_row, start_col] = START
TRow = TARGET_LOC//GRID
TCol = TARGET_LOC%GRID
TRow = TARGET_LOC // GRID
TCol = TARGET_LOC % GRID
storage_value[TRow, TCol] = TARGET
row_num = GRID
col_num = GRID
env = Environment(row_num, col_num,args)
env = Environment(row_num, col_num, args)
for row, col in itertools.product(range(row_num), range(col_num)):
if storage_value[row, col] == START:
......@@ -568,7 +559,7 @@ def create_environment(start_row, start_col, args):
num_states = env.observation_space
for i in range(GRID*GRID):
for i in range(GRID * GRID):
env.adj_list.append([])
env.adj_dir.append([])
......@@ -584,25 +575,24 @@ def create_environment(start_row, start_col, args):
# if(state+GRID<=GRID*GRID):
# env.adj_list[state].append(state+GRID)
#print(env.adj_list[0])
# print(env.adj_list[0])
with open(args.edgelist) as f:
for line in f:
line = line.rstrip().split(' ')
if(int(line[1]) not in env.adj_list[int(line[0])]):
if (int(line[1]) not in env.adj_list[int(line[0])]):
env.adj_list[int(line[0])].append(int(line[1]))
if(int(line[0]) not in env.adj_list[int(line[1])]):
if (int(line[0]) not in env.adj_list[int(line[1])]):
env.adj_list[int(line[1])].append(int(line[0]))
#print(env.adj_list[0])
# print(env.adj_list[0])
for state in range(GRID*GRID):
for state in range(GRID * GRID):
for next_state in env.adj_list[state]:
if(next_state == state - 1):
if (next_state == state - 1):
env.adj_dir[state].append(0)
elif(next_state == state-GRID):
elif (next_state == state - GRID):
env.adj_dir[state].append(1)
elif(next_state == state + 1):
elif (next_state == state + 1):
env.adj_dir[state].append(2)
else:
env.adj_dir[state].append(3)
......@@ -618,42 +608,39 @@ def create_environment(start_row, start_col, args):
# print(i,end=' ')
# print(env.adj_list[i][j])
#print(len(env.adj_list))
# print(len(env.adj_list))
return env
def printEdgelist(args):
f = open(args.edgelist,'w')
f = open(args.edgelist, 'w')
for row in range(row_num):
for col in range(col_num):
vertexList = []
dirList = []
state = col_num*row + col
if(state in obstacles_loc):
state = col_num * row + col
if (state in obstacles_loc):
continue
if((state%row_num != 0)):
f.write('{} {}\n'.format(state,state-1))
if(((state+1)%row_num != 0) ):
f.write('{} {}\n'.format(state,state+1))
if((state > row_num) ):
f.write('{} {}\n'.format(state,state-row_num))
if((state+row_num < GRID*GRID)):
f.write('{} {}\n'.format(state,state+row_num))
if ((state % row_num != 0)):
f.write('{} {}\n'.format(state, state - 1))
if (((state + 1) % row_num != 0)):
f.write('{} {}\n'.format(state, state + 1))
if ((state > row_num)):
f.write('{} {}\n'.format(state, state - row_num))
if ((state + row_num < GRID * GRID)):
f.write('{} {}\n'.format(state, state + row_num))
def trainDQN(args):
#update state, valid actions, set collision
# update state, valid actions, set collision
start_row = 0
start_col = 0
env = create_environment(start_row, start_col,args)
env = create_environment(start_row, start_col, args)
if env is None:
return
global state_index,partRew
global state_index, partRew
state_index = -1
for _ in range(int(args.iterations)):
......@@ -670,27 +657,24 @@ def trainDQN(args):
env.reset()
deepQLearning(model, env, state, args)
if(_ == int(args.iterations) -1 ):
if (_ == int(args.iterations) - 1):
partRew = np.array(partRew)
res_path = RESULTPATH + str(args.savepath)
vals_path = VALSPATH + str(args.savepath)
steps_path = STPSPATH + str(args.savepath)
partrew_path = REWPATH + str(args.savepath)
np.save(res_path,rewardAxis)
np.save(steps_path,stepsAxis)
np.save(partrew_path,partRew)
np.save(res_path, rewardAxis)
np.save(steps_path, stepsAxis)
np.save(partrew_path, partRew)
#vals = np.zeros((GRID*GRID,4))
# vals = np.zeros((GRID*GRID,4))
#if(i not in obstacles_loc):
# if(i not in obstacles_loc):
# pred = model.model.predict((env.generate_embeddings_custom(i)).reshape((1, -1)))
#for j in range(4):
# for j in range(4):
# vals[i][j] = pred[0][j]
#np.save(vals_path,vals)
# np.save(vals_path,vals)
pass
......@@ -704,27 +688,28 @@ if __name__ == "__main__":
parser.add_argument("-iter", "--iterations", help="Number of iterations")
parser.add_argument("-target", "--target", help="Location of target")
parser.add_argument("-el", "--edgelist", help="edgelist of the maze")
parser.add_argument("-dim","--dimension",help="Dimension")
parser.add_argument("-dim", "--dimension", help="Dimension")
args = parser.parse_args()
DIMENSION = int(args.dimension)
if (args.maze == "1"):
obstacles_loc = obstacles_loc_1
elif(args.maze == "2"):
elif (args.maze == "2"):
obstacles_loc = obstacles_loc_2
elif(args.maze == "3"):
elif (args.maze == "3"):
obstacles_loc = obstacles_loc_3
elif(args.maze == "4"):
elif (args.maze == "4"):
obstacles_loc = obstacles_loc_4
elif(args.maze == "6"):
elif (args.maze == "6"):
obstacles_loc = obstacles_loc_6
elif(args.maze == "7"):
elif (args.maze == "7"):
obstacles_loc = obstacles_loc_7
TARGET_LOC = int(args.target)
for row, col in itertools.product(range(GRID), range(GRID)):
storage_value.append(NOT_USE)
storage_value = np.array(storage_value, dtype=np.float).reshape(GRID, GRID)
#printEdgelist(args)
# printEdgelist(args)
trainDQN(args)
\ No newline at end of file
This diff is collapsed.
import logging
import os
from io import open
from time import time
from six.moves import range
from six import iterkeys
from collections import Iterable
import random
import numpy as np
from Utils import defaultdict
logger = logging.getLogger("Episode generator")
"""
Helper class to generate episodes from a given maze.
"""
class Graph(defaultdict):
"""Efficient basic implementation of nx `Graph' – Undirected graphs with self loops"""
def __init__(self):
super(Graph, self).__init__(list)
def nodes(self):
return self.keys()
def make_undirected(self):
t0 = time()
for v in list(self):
for other in self[v]:
if v != other:
self[other].append(v)
t1 = time()
logger.info('make_directed: added missing edges {}s'.format(t1 - t0))
self.make_consistent()
return self
def make_consistent(self):
t0 = time()
for k in iterkeys(self):
self[k] = list(sorted(set(self[k])))
t1 = time()
logger.info('make_consistent: made consistent in {}s'.format(t1 - t0))
self.remove_self_loops()
return self
def remove_self_loops(self):
removed = 0
t0 = time()
for x in self:
if x in self[x]:
self[x].remove(x)
removed += 1
t1 = time()
logger.info('remove_self_loops: removed {} loops in {}s'.format(removed, (t1 - t0)))
return self
def has_edge(self, v1, v2):
if v2 in self[v1] or v1 in self[v2]:
return True
return False
def degree(self, nodes=None):
if isinstance(nodes, Iterable):
return {v: len(self[v]) for v in nodes}