Skip to content

Commit

Permalink
Train every 5, for 50 episodes, with agents at the center of the grid…
Browse files Browse the repository at this point in the history
…, trying both rmsprop and adam
  • Loading branch information
ferielamira1 committed May 21, 2024
1 parent 6cb83f3 commit 7a837f3
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 66 deletions.
11 changes: 7 additions & 4 deletions abm/data/metaprotocol/experiments/MADRLExp0N3R100T5M6.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
num_agents = [3]
reloc_speed = [3.0]
reloc_th = [2.0]
seed=[0]

optimizers = ["ADAM", "RMSPROP"]

criteria_exp = [

Expand All @@ -29,19 +32,19 @@
Constant("WINDOW_PAD", 30),
Constant("N_EPISODES", 100),
Constant("T", 20000),
Constant("SEED", 0),
Tunable("SEED", values_override=seed),
Constant("TRAIN", 1),
Constant("TRAIN_EVERY", 5),
Constant("TRAIN_EVERY", 5 ),
Constant("PRETRAINED", 0),
Constant("BATCH_SIZE", 128),
Constant("REPLAY_MEMORY_CAPACITY", 50000),
Constant("GAMMA", 0.99),
Constant("LR", 1e-05),
Constant("EPSILON_START", 1.0),
Constant("EPSILON_END", 0.001),
Constant("EPSILON_END", 0.01),
Constant("EPSILON_DECAY", 50000),
Constant("TAU", 0.01),
Constant("OPTIMIZER", "ADAM"),
Tunable("OPTIMIZER", values_override=optimizers ),
Constant("PRETRAINED_MODELS_DIR", ""),
Constant("BRAIN_TYPE", "DQN"),
#Constant("ISE_W", 1.0),
Expand Down
9 changes: 6 additions & 3 deletions abm/data/metaprotocol/experiments/MADRLExp0N3R10T5M6.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
num_agents = [3]
reloc_speed = [3.0]
reloc_th = [2.0]
seed=[0]

optimizers = ["ADAM", "RMSPROP"]

criteria_exp = [

Expand All @@ -29,7 +32,7 @@
Constant("WINDOW_PAD", 30),
Constant("N_EPISODES", 100),
Constant("T", 20000),
Constant("SEED", 0),
Tunable("SEED", values_override=seed),
Constant("TRAIN", 1),
Constant("TRAIN_EVERY", 5),
Constant("PRETRAINED", 0),
Expand All @@ -38,10 +41,10 @@
Constant("GAMMA", 0.99),
Constant("LR", 1e-05),
Constant("EPSILON_START", 1.0),
Constant("EPSILON_END", 0.001),
Constant("EPSILON_END", 0.01),
Constant("EPSILON_DECAY", 50000),
Constant("TAU", 0.01),
Constant("OPTIMIZER", "ADAM"),
Tunable("OPTIMIZER", values_override=optimizers),
Constant("PRETRAINED_MODELS_DIR", ""),
Constant("BRAIN_TYPE", "DQN"),
#Constant("ISE_W", 1.0),
Expand Down
8 changes: 5 additions & 3 deletions abm/data/metaprotocol/experiments/MADRLExp0N3R3T5M6.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
num_agents = [3]
reloc_speed = [3.0]
reloc_th = [2.0]
seed = [0]
optimizers = ["ADAM", "RMSPROP"]

criteria_exp = [

Expand All @@ -27,10 +29,10 @@
Constant("WINDOW_PAD", 30),
Constant("N_EPISODES", 100),
Constant("T", 20000),
Constant("SEED", 0),
Tunable("SEED", values_override=seed),
Constant("TRAIN", 1),
Constant("TRAIN_EVERY", 5),
Constant("PRETRAINED", 1),
Constant("PRETRAINED", 0),
Constant("BATCH_SIZE", 128),
Constant("REPLAY_MEMORY_CAPACITY", 50000),
Constant("GAMMA", 0.99),
Expand All @@ -39,7 +41,7 @@
Constant("EPSILON_END", 0.01),
Constant("EPSILON_DECAY", 50000),
Constant("TAU", 0.01),
Constant("OPTIMIZER", "ADAM"),
Tunable("OPTIMIZER", values_override=optimizers),
Constant("PRETRAINED_MODELS_DIR", ""),
Constant("BRAIN_TYPE", "DQN"),
#Constant("ISE_W", 1.0),
Expand Down
8 changes: 5 additions & 3 deletions abm/data/metaprotocol/experiments/MADRLExp0N3R50T5M6.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
num_agents = [3]
reloc_speed = [3.0]
reloc_th = [2.0]
seed=[0]
optimizers = ["ADAM", "RMSPROP"]

criteria_exp = [

Expand All @@ -29,7 +31,7 @@
Constant("WINDOW_PAD", 30),
Constant("N_EPISODES", 100),
Constant("T", 20000),
Constant("SEED", 0),
Tunable("SEED", values_override=seed),
Constant("TRAIN", 1),
Constant("TRAIN_EVERY", 5),
Constant("PRETRAINED", 0),
Expand All @@ -38,10 +40,10 @@
Constant("GAMMA", 0.99),
Constant("LR", 1e-05),
Constant("EPSILON_START", 1.0),
Constant("EPSILON_END", 0.001),
Constant("EPSILON_END", 0.01),
Constant("EPSILON_DECAY", 50000),
Constant("TAU", 0.01),
Constant("OPTIMIZER", "ADAM"),
Tunable("OPTIMIZER", values_override=optimizers),
Constant("PRETRAINED_MODELS_DIR", ""),
Constant("BRAIN_TYPE", "DQN"),
#Constant("ISE_W", 1.0),
Expand Down
58 changes: 15 additions & 43 deletions abm/projects/madrl_foraging/madrl_agent/brain.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ def __init__(self, input_size, output_size):
self.layer4 = nn.Linear(128, output_size)

# Initialize weights
#init.kaiming_uniform_(self.layer1.weight, mode='fan_in', nonlinearity='relu')
#init.kaiming_uniform_(self.layer2.weight, mode='fan_in', nonlinearity='relu')
#init.kaiming_uniform_(self.layer3.weight, mode='fan_in', nonlinearity='relu')
#init.kaiming_uniform_(self.layer4.weight, mode='fan_in', nonlinearity='relu')
init.kaiming_uniform_(self.layer1.weight, mode='fan_in', nonlinearity='relu')
init.kaiming_uniform_(self.layer2.weight, mode='fan_in', nonlinearity='relu')
init.kaiming_uniform_(self.layer3.weight, mode='fan_in', nonlinearity='relu')
init.kaiming_uniform_(self.layer4.weight, mode='fan_in', nonlinearity='relu')

def forward(self, state):
x = F.relu(self.layer1(state))
Expand Down Expand Up @@ -207,8 +207,17 @@ def optimize(self):
# This is merged based on the mask, such that we'll have either the expected
# state value or 0 in case the state was final.
next_state_values = torch.zeros(self.batch_size, device=device)
with torch.no_grad():
next_state_values[non_final_mask] = self.target_q_network(non_final_next_states).detach().max(1).values
if self.brain_type=="DDQN":
# Double DQN update
if sum(non_final_mask) > 0:
next_state_actions = self.q_network(non_final_next_states).max(1)[1].unsqueeze(1)
next_state_values[non_final_mask] = self.target_q_network(non_final_next_states).gather(1,
next_state_actions).squeeze().detach()
else:
# Standard DQN update
with torch.no_grad():
next_state_values[non_final_mask] = self.target_q_network(non_final_next_states).detach().max(1).values

# Compute the expected Q values
expected_state_action_values = (next_state_values * self.gamma) + reward_batch

Expand Down Expand Up @@ -239,40 +248,3 @@ def update_target_network(self):
target_net_state_dict[key] = policy_net_state_dict[key]*self.tau + target_net_state_dict[key]*(1-self.tau)
self.target_q_network.load_state_dict(target_net_state_dict)

class DDQNAgent(DQNAgent):
def __init__(self, state_size, action_size):
super().__init__(state_size, action_size)

def optimize(self):
if len(self.replay_memory) < self.batch_size:
return None

transitions = self.replay_memory.sample(self.batch_size)
batch = Transition(*zip(*transitions))

non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
batch.next_state)), device=device, dtype=torch.bool)
non_final_next_states = torch.cat([s for s in batch.next_state if s is not None])
state_batch = torch.cat(batch.state)
action_batch = torch.cat(batch.action)
reward_batch = torch.cat(batch.reward)

state_action_values = self.q_network(state_batch).gather(1, action_batch)

# DDQN changes
next_state_values = torch.zeros(self.batch_size, device=device)
if sum(non_final_mask) > 0:
next_state_actions = self.q_network(non_final_next_states).max(1)[1].unsqueeze(1) # Use q_network to select actions
next_state_values[non_final_mask] = self.target_q_network(non_final_next_states).gather(1, next_state_actions).squeeze().detach() # Use target_q_network to evaluate the value of selected actions

expected_state_action_values = (next_state_values * self.gamma) + reward_batch

loss = F.mse_loss(state_action_values, expected_state_action_values.unsqueeze(1))

self.optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(self.q_network.parameters(), 1.0)
self.optimizer.step()

return loss.item()

16 changes: 6 additions & 10 deletions abm/projects/madrl_foraging/madrl_agent/madrl_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import matplotlib.pyplot as plt

#matplotlib.use('agg')
from abm.projects.madrl_foraging.madrl_agent.brain import DQNAgent, DDQNAgent
from abm.projects.madrl_foraging.madrl_agent.brain import DQNAgent
from abm.agent.agent import Agent
from abm.projects.madrl_foraging.madrl_contrib import madrl_learning_params as learning_params

Expand Down Expand Up @@ -53,13 +53,7 @@ def __init__(self,train,**kwargs):
self.total_discov= 0
self.new_discovery = 0
#create the policy network
if learning_params.brain_type == "DQN":
print("DQN Agent")
self.policy_network = DQNAgent(state_size=self.v_field_res+ 1, action_size=3)
elif learning_params.brain_type == "DDQN":
print("DDQN Agent")

self.policy_network = DDQNAgent(state_size=self.v_field_res+ 1, action_size=3)
self.policy_network = DQNAgent(state_size=self.v_field_res+ 1, action_size=3)

if learning_params.pretrained and learning_params.pretrained_models_dir!="":
print("Loading pretrained model")
Expand Down Expand Up @@ -248,8 +242,10 @@ def reset(self):
Reset relevant values of the agent after each train episode.
"""
# Reset position and orientation
x=np.random.randint(self.window_pad - self.radius, self.WIDTH + self.window_pad - self.radius)
y=np.random.randint(self.window_pad - self.radius, self.HEIGHT + self.window_pad - self.radius)
#x=np.random.randint(self.window_pad - self.radius, self.WIDTH + self.window_pad - self.radius)
#y=np.random.randint(self.window_pad - self.radius, self.HEIGHT + self.window_pad - self.radius)
x = self.WIDTH // 2
y = self.HEIGHT // 2
self.position = np.array((x,y), dtype=np.float64)
self.orientation = np.random.uniform(0, 2 * np.pi)
# Reset agent state variables
Expand Down
19 changes: 19 additions & 0 deletions abm/simulation/sims.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,7 @@ def add_new_agent(self, id, x, y, orient, with_proove=False, behave_params=None)
self.agents.add(agent)
agent_proven = True

'''
def create_agents(self):
"""Creating agents according to how the simulation class was initialized"""
for i in range(self.N):
Expand All @@ -542,6 +543,24 @@ def create_agents(self):
self.add_new_agent(i, x, y, orient)
else:
self.add_new_agent(i, x, y, orient, behave_params=self.agent_behave_param_list[i])
'''


def create_agents(self):
"""Creating agents according to how the simulation class was initialized"""
center_x = self.WIDTH // 2
center_y = self.HEIGHT // 2

for i in range(self.N):
# All agents will be placed at the center of the grid
x = center_x
y = center_y
orient = np.random.uniform(0, 2 * np.pi)
if not self.heterogen_agents:
# create agents according to environment variables homogeneously
self.add_new_agent(i, x, y, orient)
else:
self.add_new_agent(i, x, y, orient, behave_params=self.agent_behave_param_list[i])

def create_resources(self):
"""Creating resource patches according to how the simulation class was initialized"""
Expand Down

0 comments on commit 7a837f3

Please sign in to comment.