We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent ce4a828 commit a7b14b8Copy full SHA for a7b14b8
1 file changed
tutorial-contents/405_DQN_Reinforcement_learning.py
@@ -58,7 +58,7 @@ def choose_action(self, x):
58
# input only one sample
59
if np.random.uniform() < EPSILON: # greedy
60
actions_value = self.eval_net.forward(x)
61
- action = torch.max(actions_value, 1)[1].data.numpy()[0] # return the argmax
+ action = torch.max(actions_value, 1)[1].data.numpy()[0, 0] # return the argmax
62
else: # random
63
action = np.random.randint(0, N_ACTIONS)
64
return action
0 commit comments