cb ws

2025-07-27 20:56:37 +03:00
parent bd986f4534
commit 9e1684f9f8
7 changed files with 531 additions and 112 deletions
--- a/NN/models/dqn_agent.py
+++ b/NN/models/dqn_agent.py
@@ -271,10 +271,16 @@ class DQNAgent:
        else:
            self.device = device
        
+        logger.info(f"DQN Agent using device: {self.device}")
+        
        # Initialize models with RL-specific network architecture
        self.policy_net = DQNNetwork(self.state_dim, self.n_actions).to(self.device)
        self.target_net = DQNNetwork(self.state_dim, self.n_actions).to(self.device)
        
+        # Ensure models are on the correct device
+        self.policy_net = self.policy_net.to(self.device)
+        self.target_net = self.target_net.to(self.device)
+        
        # Initialize the target network with the same weights as the policy network
        self.target_net.load_state_dict(self.policy_net.state_dict())
        
@@ -997,11 +1003,19 @@ class DQNAgent:
        
        # Convert to tensors with proper validation
        try:
-            states = torch.FloatTensor(np.array(states)).to(self.device)
-            actions = torch.LongTensor(np.array(actions)).to(self.device)
-            rewards = torch.FloatTensor(np.array(rewards)).to(self.device)
-            next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
-            dones = torch.FloatTensor(np.array(dones)).to(self.device)
+            # Ensure all data is on CPU first, then move to device
+            states_array = np.array(states, dtype=np.float32)
+            actions_array = np.array(actions, dtype=np.int64)
+            rewards_array = np.array(rewards, dtype=np.float32)
+            next_states_array = np.array(next_states, dtype=np.float32)
+            dones_array = np.array(dones, dtype=np.float32)
+            
+            # Convert to tensors and move to device
+            states = torch.from_numpy(states_array).to(self.device)
+            actions = torch.from_numpy(actions_array).to(self.device)
+            rewards = torch.from_numpy(rewards_array).to(self.device)
+            next_states = torch.from_numpy(next_states_array).to(self.device)
+            dones = torch.from_numpy(dones_array).to(self.device)
            
            # Final validation of tensor shapes
            if states.shape[0] == 0 or actions.shape[0] == 0: