fix action shape

MorvanZhou · Morvan Zhou · commit a7b14b809134 · 2017-10-27T09:06:10.000+11:00
diff --git a/tutorial-contents/405_DQN_Reinforcement_learning.py b/tutorial-contents/405_DQN_Reinforcement_learning.py
@@ -58,7 +58,7 @@ def choose_action(self, x):
         # input only one sample
         if np.random.uniform() < EPSILON:   # greedy
             actions_value = self.eval_net.forward(x)
-            action = torch.max(actions_value, 1)[1].data.numpy()[0]     # return the argmax
+            action = torch.max(actions_value, 1)[1].data.numpy()[0, 0]     # return the argmax
         else:   # random
             action = np.random.randint(0, N_ACTIONS)
         return action