work with order execution - we are forced to do limit orders over the API

2025-07-14 13:36:07 +03:00
parent d7205a9745
commit f861559319
4 changed files with 387 additions and 182 deletions
--- a/NN/models/dqn_agent.py
+++ b/NN/models/dqn_agent.py
@@ -807,6 +807,17 @@ class DQNAgent:
        if isinstance(expected_dim, tuple):
            expected_dim = np.prod(expected_dim)
        
+        # Debug: Check what dimensions we're actually seeing
+        if sanitized_states:
+            actual_dims = [len(state) for state in sanitized_states[:5]]  # Check first 5
+            logger.debug(f"DQN State dimensions - Expected: {expected_dim}, Actual samples: {actual_dims}")
+            
+            # If all states have a consistent dimension different from expected, use that
+            unique_dims = list(set(len(state) for state in sanitized_states))
+            if len(unique_dims) == 1 and unique_dims[0] != expected_dim:
+                logger.warning(f"All states have dimension {unique_dims[0]} but expected {expected_dim}. Using actual dimension.")
+                expected_dim = unique_dims[0]
+        
        # Filter out states with wrong dimensions and fix them
        valid_states = []
        valid_next_states = []
@@ -1076,162 +1087,165 @@ class DQNAgent:
            # Zero gradients
            self.optimizer.zero_grad()
            
-            # Forward pass with amp autocasting
-            with torch.cuda.amp.autocast():
-                # Get current Q values and extrema predictions
-                current_q_values, current_extrema_pred, current_price_pred, hidden_features, current_advanced_pred = self.policy_net(states)
-                current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
-                
-                # Get next Q values from target network
-                with torch.no_grad():
-                    next_q_values, next_extrema_pred, next_price_pred, next_hidden_features, next_advanced_pred = self.target_net(next_states)
-                    next_q_values = next_q_values.max(1)[0]
+            # Forward pass with amp autocasting  
+            import warnings
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", FutureWarning)
+                with torch.cuda.amp.autocast():
+                    # Get current Q values and extrema predictions
+                    current_q_values, current_extrema_pred, current_price_pred, hidden_features, current_advanced_pred = self.policy_net(states)
+                    current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
                    
-                    # Check for dimension mismatch and fix it
-                    if rewards.shape[0] != next_q_values.shape[0]:
-                        # Log the shape mismatch for debugging
-                        logger.warning(f"Shape mismatch detected: rewards {rewards.shape}, next_q_values {next_q_values.shape}")
-                        # Use the smaller size to prevent index errors
-                        min_size = min(rewards.shape[0], next_q_values.shape[0])
-                        rewards = rewards[:min_size]
-                        dones = dones[:min_size]
-                        next_q_values = next_q_values[:min_size]
-                        current_q_values = current_q_values[:min_size]
-                    
-                    target_q_values = rewards + (1 - dones) * self.gamma * next_q_values
-                
-                # Compute Q-value loss (primary task)
-                q_loss = nn.MSELoss()(current_q_values, target_q_values)
-                
-                # Initialize loss with q_loss
-                loss = q_loss
-                
-                # Try to extract price from current and next states
-                try:
-                    # Extract price feature from sequence data (if available)
-                    if len(states.shape) == 3:  # [batch, seq, features]
-                        current_prices = states[:, -1, -1]  # Last timestep, last feature
-                        next_prices = next_states[:, -1, -1]
-                    else:  # [batch, features]
-                        current_prices = states[:, -1]  # Last feature
-                        next_prices = next_states[:, -1]
-                    
-                    # Calculate price change for different timeframes
-                    immediate_changes = (next_prices - current_prices) / current_prices
-                    
-                    # Get the actual batch size for this calculation
-                    actual_batch_size = states.shape[0]
-                    
-                    # Create price direction labels - simplified for training
-                    # 0 = down, 1 = sideways, 2 = up
-                    immediate_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 1  # Default: sideways
-                    midterm_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 1
-                    longterm_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 1
-                    
-                    # Immediate term direction (1s, 1m)
-                    immediate_up = (immediate_changes > 0.0005)
-                    immediate_down = (immediate_changes < -0.0005)
-                    immediate_labels[immediate_up] = 2    # Up
-                    immediate_labels[immediate_down] = 0  # Down
-                    
-                    # For mid and long term, we can only approximate during training
-                    # In a real system, we'd need historical data to validate these
-                    # Here we'll use the immediate term with increasing thresholds as approximation
-                    
-                    # Mid-term (1h) - use slightly higher threshold
-                    midterm_up = (immediate_changes > 0.001)
-                    midterm_down = (immediate_changes < -0.001)
-                    midterm_labels[midterm_up] = 2    # Up
-                    midterm_labels[midterm_down] = 0  # Down
-                    
-                    # Long-term (1d) - use even higher threshold
-                    longterm_up = (immediate_changes > 0.002)
-                    longterm_down = (immediate_changes < -0.002)
-                    longterm_labels[longterm_up] = 2    # Up
-                    longterm_labels[longterm_down] = 0  # Down
-                    
-                    # Generate target values for price change regression
-                    # For simplicity, we'll use the immediate change and scaled versions for longer timeframes
-                    price_value_targets = torch.zeros((actual_batch_size, 4), device=self.device)
-                    price_value_targets[:, 0] = immediate_changes
-                    price_value_targets[:, 1] = immediate_changes * 2.0  # Approximate 1h change
-                    price_value_targets[:, 2] = immediate_changes * 4.0  # Approximate 1d change
-                    price_value_targets[:, 3] = immediate_changes * 6.0  # Approximate 1w change
-                    
-                    # Calculate loss for price direction prediction (classification)
-                    if len(current_price_pred['immediate'].shape) > 1 and current_price_pred['immediate'].shape[0] >= actual_batch_size:
-                        # Slice predictions to match the adjusted batch size
-                        immediate_pred = current_price_pred['immediate'][:actual_batch_size]
-                        midterm_pred = current_price_pred['midterm'][:actual_batch_size]
-                        longterm_pred = current_price_pred['longterm'][:actual_batch_size]
-                        price_values_pred = current_price_pred['values'][:actual_batch_size]
+                    # Get next Q values from target network
+                    with torch.no_grad():
+                        next_q_values, next_extrema_pred, next_price_pred, next_hidden_features, next_advanced_pred = self.target_net(next_states)
+                        next_q_values = next_q_values.max(1)[0]
                        
-                        # Compute losses for each task
-                        immediate_loss = nn.CrossEntropyLoss()(immediate_pred, immediate_labels)
-                        midterm_loss = nn.CrossEntropyLoss()(midterm_pred, midterm_labels)
-                        longterm_loss = nn.CrossEntropyLoss()(longterm_pred, longterm_labels)
+                        # Check for dimension mismatch and fix it
+                        if rewards.shape[0] != next_q_values.shape[0]:
+                            # Log the shape mismatch for debugging
+                            logger.warning(f"Shape mismatch detected: rewards {rewards.shape}, next_q_values {next_q_values.shape}")
+                            # Use the smaller size to prevent index errors
+                            min_size = min(rewards.shape[0], next_q_values.shape[0])
+                            rewards = rewards[:min_size]
+                            dones = dones[:min_size]
+                            next_q_values = next_q_values[:min_size]
+                            current_q_values = current_q_values[:min_size]
                        
-                        # MSE loss for price value regression
-                        price_value_loss = nn.MSELoss()(price_values_pred, price_value_targets)
-                        
-                        # Combine all price prediction losses
-                        price_loss = immediate_loss + 0.7 * midterm_loss + 0.5 * longterm_loss + 0.3 * price_value_loss
-                        
-                        # Create extrema labels (same as before)
-                        extrema_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 2  # Default: neither
-                        
-                        # Identify potential bottoms (significant negative change)
-                        bottoms = (immediate_changes < -0.003)
-                        extrema_labels[bottoms] = 0
-                        
-                        # Identify potential tops (significant positive change)
-                        tops = (immediate_changes > 0.003)
-                        extrema_labels[tops] = 1
-                        
-                        # Calculate extrema prediction loss
-                        if len(current_extrema_pred.shape) > 1 and current_extrema_pred.shape[0] >= actual_batch_size:
-                            current_extrema_pred = current_extrema_pred[:actual_batch_size]
-                            extrema_loss = nn.CrossEntropyLoss()(current_extrema_pred, extrema_labels)
-                            
-                            # Combined loss with all components
-                            # Primary task: Q-value learning (RL objective)
-                            # Secondary tasks: extrema detection and price prediction (supervised objectives)
-                            loss = q_loss + 0.3 * extrema_loss + 0.3 * price_loss
-                            
-                            # Log loss components occasionally
-                            if random.random() < 0.01:  # Log 1% of the time
-                                logger.info(
-                                    f"Mixed precision losses: Q-loss={q_loss.item():.4f}, "
-                                    f"Extrema-loss={extrema_loss.item():.4f}, "
-                                    f"Price-loss={price_loss.item():.4f}"
-                                )
-                except Exception as e:
-                    # Fallback if price extraction fails
-                    logger.warning(f"Failed to calculate price prediction loss: {str(e)}. Using only Q-value loss.")
-                    # Just use Q-value loss
+                        target_q_values = rewards + (1 - dones) * self.gamma * next_q_values
+                    
+                    # Compute Q-value loss (primary task)
+                    q_loss = nn.MSELoss()(current_q_values, target_q_values)
+                    
+                    # Initialize loss with q_loss
                    loss = q_loss
-            
-            # Backward pass with scaled gradients
-            self.scaler.scale(loss).backward()
-            
-            # Gradient clipping on scaled gradients
-            self.scaler.unscale_(self.optimizer)
-            torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
-            
-            # Update with scaler
-            self.scaler.step(self.optimizer)
-            self.scaler.update()
-            
-            # Update target network if needed
-            self.update_count += 1
-            if self.update_count % self.target_update == 0:
-                self.target_net.load_state_dict(self.policy_net.state_dict())
-            
-            # Track and decay epsilon
-            self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
-            
-            return loss.item()
-            
+                    
+                    # Try to extract price from current and next states
+                    try:
+                        # Extract price feature from sequence data (if available)
+                        if len(states.shape) == 3:  # [batch, seq, features]
+                            current_prices = states[:, -1, -1]  # Last timestep, last feature
+                            next_prices = next_states[:, -1, -1]
+                        else:  # [batch, features]
+                            current_prices = states[:, -1]  # Last feature
+                            next_prices = next_states[:, -1]
+                        
+                        # Calculate price change for different timeframes
+                        immediate_changes = (next_prices - current_prices) / current_prices
+                        
+                        # Get the actual batch size for this calculation
+                        actual_batch_size = states.shape[0]
+                        
+                        # Create price direction labels - simplified for training
+                        # 0 = down, 1 = sideways, 2 = up
+                        immediate_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 1  # Default: sideways
+                        midterm_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 1
+                        longterm_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 1
+                        
+                        # Immediate term direction (1s, 1m)
+                        immediate_up = (immediate_changes > 0.0005)
+                        immediate_down = (immediate_changes < -0.0005)
+                        immediate_labels[immediate_up] = 2    # Up
+                        immediate_labels[immediate_down] = 0  # Down
+                        
+                        # For mid and long term, we can only approximate during training
+                        # In a real system, we'd need historical data to validate these
+                        # Here we'll use the immediate term with increasing thresholds as approximation
+                        
+                        # Mid-term (1h) - use slightly higher threshold
+                        midterm_up = (immediate_changes > 0.001)
+                        midterm_down = (immediate_changes < -0.001)
+                        midterm_labels[midterm_up] = 2    # Up
+                        midterm_labels[midterm_down] = 0  # Down
+                        
+                        # Long-term (1d) - use even higher threshold
+                        longterm_up = (immediate_changes > 0.002)
+                        longterm_down = (immediate_changes < -0.002)
+                        longterm_labels[longterm_up] = 2    # Up
+                        longterm_labels[longterm_down] = 0  # Down
+                        
+                        # Generate target values for price change regression
+                        # For simplicity, we'll use the immediate change and scaled versions for longer timeframes
+                        price_value_targets = torch.zeros((actual_batch_size, 4), device=self.device)
+                        price_value_targets[:, 0] = immediate_changes
+                        price_value_targets[:, 1] = immediate_changes * 2.0  # Approximate 1h change
+                        price_value_targets[:, 2] = immediate_changes * 4.0  # Approximate 1d change
+                        price_value_targets[:, 3] = immediate_changes * 6.0  # Approximate 1w change
+                        
+                        # Calculate loss for price direction prediction (classification)
+                        if len(current_price_pred['immediate'].shape) > 1 and current_price_pred['immediate'].shape[0] >= actual_batch_size:
+                            # Slice predictions to match the adjusted batch size
+                            immediate_pred = current_price_pred['immediate'][:actual_batch_size]
+                            midterm_pred = current_price_pred['midterm'][:actual_batch_size]
+                            longterm_pred = current_price_pred['longterm'][:actual_batch_size]
+                            price_values_pred = current_price_pred['values'][:actual_batch_size]
+                            
+                            # Compute losses for each task
+                            immediate_loss = nn.CrossEntropyLoss()(immediate_pred, immediate_labels)
+                            midterm_loss = nn.CrossEntropyLoss()(midterm_pred, midterm_labels)
+                            longterm_loss = nn.CrossEntropyLoss()(longterm_pred, longterm_labels)
+                            
+                            # MSE loss for price value regression
+                            price_value_loss = nn.MSELoss()(price_values_pred, price_value_targets)
+                            
+                            # Combine all price prediction losses
+                            price_loss = immediate_loss + 0.7 * midterm_loss + 0.5 * longterm_loss + 0.3 * price_value_loss
+                            
+                            # Create extrema labels (same as before)
+                            extrema_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 2  # Default: neither
+                            
+                            # Identify potential bottoms (significant negative change)
+                            bottoms = (immediate_changes < -0.003)
+                            extrema_labels[bottoms] = 0
+                            
+                            # Identify potential tops (significant positive change)
+                            tops = (immediate_changes > 0.003)
+                            extrema_labels[tops] = 1
+                            
+                            # Calculate extrema prediction loss
+                            if len(current_extrema_pred.shape) > 1 and current_extrema_pred.shape[0] >= actual_batch_size:
+                                current_extrema_pred = current_extrema_pred[:actual_batch_size]
+                                extrema_loss = nn.CrossEntropyLoss()(current_extrema_pred, extrema_labels)
+                                
+                                # Combined loss with all components
+                                # Primary task: Q-value learning (RL objective)
+                                # Secondary tasks: extrema detection and price prediction (supervised objectives)
+                                loss = q_loss + 0.3 * extrema_loss + 0.3 * price_loss
+                                
+                                # Log loss components occasionally
+                                if random.random() < 0.01:  # Log 1% of the time
+                                    logger.info(
+                                        f"Mixed precision losses: Q-loss={q_loss.item():.4f}, "
+                                        f"Extrema-loss={extrema_loss.item():.4f}, "
+                                        f"Price-loss={price_loss.item():.4f}"
+                                    )
+                    except Exception as e:
+                        # Fallback if price extraction fails
+                        logger.warning(f"Failed to calculate price prediction loss: {str(e)}. Using only Q-value loss.")
+                        # Just use Q-value loss
+                        loss = q_loss
+                
+                # Backward pass with scaled gradients
+                self.scaler.scale(loss).backward()
+                
+                # Gradient clipping on scaled gradients
+                self.scaler.unscale_(self.optimizer)
+                torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
+                
+                # Update with scaler
+                self.scaler.step(self.optimizer)
+                self.scaler.update()
+                
+                # Update target network if needed
+                self.update_count += 1
+                if self.update_count % self.target_update == 0:
+                    self.target_net.load_state_dict(self.policy_net.state_dict())
+                
+                # Track and decay epsilon
+                self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
+                
+                return loss.item()
+                
        except Exception as e:
            logger.error(f"Error in mixed precision training: {str(e)}")
            logger.warning("Falling back to standard precision training")
@@ -1565,6 +1579,14 @@ class DQNAgent:
        try:
            # If state is already a numpy array, return it
            if isinstance(state, np.ndarray):
+                # Check for empty array
+                if state.size == 0:
+                    logger.warning("Received empty numpy array state. Using fallback dimensions.")
+                    expected_size = getattr(self, 'state_size', getattr(self, 'state_dim', 403))
+                    if isinstance(expected_size, tuple):
+                        expected_size = np.prod(expected_size)
+                    return np.zeros(int(expected_size), dtype=np.float32)
+                
                # Check for non-numeric data and handle it
                if state.dtype == object:
                    # Convert object array to float array
@@ -1581,6 +1603,14 @@ class DQNAgent:
            
            # If state is a list or tuple, convert to array
            elif isinstance(state, (list, tuple)):
+                # Check for empty list/tuple
+                if len(state) == 0:
+                    logger.warning("Received empty list/tuple state. Using fallback dimensions.")
+                    expected_size = getattr(self, 'state_size', getattr(self, 'state_dim', 403))
+                    if isinstance(expected_size, tuple):
+                        expected_size = np.prod(expected_size)
+                    return np.zeros(int(expected_size), dtype=np.float32)
+                
                # Recursively sanitize each element
                sanitized = []
                for item in state:
@@ -1591,7 +1621,18 @@ class DQNAgent:
                        sanitized.append(sanitized_row)
                    else:
                        sanitized.append(self._extract_numeric_value(item))
-                return np.array(sanitized, dtype=np.float32)
+                
+                result = np.array(sanitized, dtype=np.float32)
+                
+                # Check if result is empty and provide fallback
+                if result.size == 0:
+                    logger.warning("Sanitized state resulted in empty array. Using fallback dimensions.")
+                    expected_size = getattr(self, 'state_size', getattr(self, 'state_dim', 403))
+                    if isinstance(expected_size, tuple):
+                        expected_size = np.prod(expected_size)
+                    return np.zeros(int(expected_size), dtype=np.float32)
+                
+                return result
            
            # If state is a dict, try to extract values
            elif isinstance(state, dict):