RL training in NN working

2025-03-31 11:12:45 +03:00 · 2025-03-31 11:12:45 +03:00 · 0ad9484d56
commit 0ad9484d56
parent 4eac14022c
3 changed files with 449 additions and 167 deletions
--- a/NN/models/simple_cnn.py
+++ b/NN/models/simple_cnn.py
@ -13,10 +13,9 @@ logger = logging.getLogger(__name__)

 class CNNModelPyTorch(nn.Module):
    """
-    CNN model for trading signals
-    Simplified version for RL training
+    CNN model for trading with multiple timeframes
    """
-    def __init__(self, window_size: int, num_features: int, output_size: int, timeframes: List[str]):
+    def __init__(self, window_size, num_features, output_size, timeframes):
        super(CNNModelPyTorch, self).__init__()
        
        self.window_size = window_size
@ -24,12 +23,33 @@ class CNNModelPyTorch(nn.Module):
        self.output_size = output_size
        self.timeframes = timeframes
        
+        # Calculate total input features across all timeframes
+        self.total_features = num_features * len(timeframes)
+        
        # Device configuration
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        logger.info(f"Using device: {self.device}")
        
-        # Build model
-        self.build_model()
+        # Convolutional layers
+        self.conv1 = nn.Conv1d(self.total_features, 64, kernel_size=3, padding=1)
+        self.bn1 = nn.BatchNorm1d(64)
+        
+        self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
+        self.bn2 = nn.BatchNorm1d(128)
+        
+        self.conv3 = nn.Conv1d(128, 256, kernel_size=3, padding=1)
+        self.bn3 = nn.BatchNorm1d(256)
+        
+        # Calculate size after convolutions
+        conv_output_size = window_size * 256
+        
+        # Fully connected layers
+        self.fc1 = nn.Linear(conv_output_size, 512)
+        self.fc2 = nn.Linear(512, 256)
+        
+        # Advantage and Value streams (Dueling DQN architecture)
+        self.fc3 = nn.Linear(256, output_size)  # Advantage stream
+        self.value_fc = nn.Linear(256, 1)  # Value stream
        
        # Initialize optimizer and scheduler
        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
@ -40,36 +60,6 @@ class CNNModelPyTorch(nn.Module):
        # Move model to device
        self.to(self.device)
        
-    def build_model(self):
-        """Build the CNN architecture"""
-        # First Convolutional Layer
-        self.conv1 = nn.Conv1d(
-            in_channels=self.num_features * len(self.timeframes),
-            out_channels=32,
-            kernel_size=3,
-            padding=1
-        )
-        self.bn1 = nn.BatchNorm1d(32)
-        
-        # Second Convolutional Layer
-        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=1)
-        self.bn2 = nn.BatchNorm1d(64)
-        
-        # Third Convolutional Layer
-        self.conv3 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
-        self.bn3 = nn.BatchNorm1d(128)
-        
-        # Calculate size after convolutions
-        conv_out_size = self.window_size * 128
-        
-        # Fully connected layers
-        self.fc1 = nn.Linear(conv_out_size, 512)
-        self.fc2 = nn.Linear(512, 256)
-        self.fc3 = nn.Linear(256, self.output_size)
-        
-        # Additional output for value estimation
-        self.value_fc = nn.Linear(256, 1)
-    
    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """Forward pass through the network"""
        # Ensure input is on the correct device
--- a/NN/train_rl.py
+++ b/NN/train_rl.py
@ -8,6 +8,7 @@ import os
 import sys
 import pandas as pd
 import gym
+import json

 # Add parent directory to path
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@ -28,19 +29,31 @@ logging.basicConfig(
    ]
 )

-class RLTradingEnvironment(TradingEnvironment):
-    """Extended trading environment that reshapes state for CNN"""
-    def __init__(self, data, window_size, num_features, num_timeframes, **kwargs):
-        # Set attributes before parent initialization
+class RLTradingEnvironment(gym.Env):
+    """
+    Reinforcement Learning environment for trading with technical indicators
+    from multiple timeframes
+    """
+    def __init__(self, features_1m, features_5m, features_15m, window_size=20, trading_fee=0.001):
+        super().__init__()
+        
+        # Initialize attributes before parent class
        self.window_size = window_size
-        self.num_features = num_features
-        self.num_timeframes = num_timeframes
-        self.feature_dim = num_features * num_timeframes
+        self.num_features = features_1m.shape[1] - 1  # Exclude close price
+        self.num_timeframes = 3  # 1m, 5m, 15m
+        self.feature_dim = self.num_features * self.num_timeframes
        
-        # Initialize parent class
-        super().__init__(data=data, **kwargs)
+        # Store features from different timeframes
+        self.features_1m = features_1m
+        self.features_5m = features_5m
+        self.features_15m = features_15m
        
-        # Update observation space for CNN
+        # Trading parameters
+        self.initial_balance = 1.0
+        self.trading_fee = trading_fee
+        
+        # Define action and observation spaces
+        self.action_space = gym.spaces.Discrete(3)  # 0: Buy, 1: Sell, 2: Hold
        self.observation_space = gym.spaces.Box(
            low=-np.inf, 
            high=np.inf, 
@ -48,145 +61,354 @@ class RLTradingEnvironment(TradingEnvironment):
            dtype=np.float32
        )
        
+        # State variables
+        self.reset()
+    
+    def reset(self):
+        """Reset the environment to initial state"""
+        self.balance = self.initial_balance
+        self.position = 0.0  # Amount of asset held
+        self.current_step = self.window_size
+        self.trades = 0
+        self.wins = 0
+        self.losses = 0
+        self.trade_history = []
+        
+        # Get initial observation
+        observation = self._get_observation()
+        return observation
+    
    def _get_observation(self):
-        """Get current observation reshaped for CNN"""
-        # Get flattened observation from parent class
-        flat_obs = super()._get_observation()
+        """
+        Get the current state observation.
+        Combine features from multiple timeframes, reshaped for the CNN.
+        """
+        # Calculate indices for each timeframe
+        idx_1m = self.current_step
+        idx_5m = idx_1m // 5
+        idx_15m = idx_1m // 15
        
-        # Extract features (exclude close price)
-        features = flat_obs[:-1]  # Remove close price
+        # Extract feature windows from each timeframe
+        window_1m = self.features_1m[idx_1m - self.window_size:idx_1m]
        
-        # Calculate number of complete windows
-        n_windows = len(features) // self.feature_dim
-        if n_windows < self.window_size:
-            # Pad with zeros if not enough data
-            padding = np.zeros((self.window_size - n_windows, self.feature_dim))
-            reshaped = np.vstack([
-                padding,
-                features[-(n_windows * self.feature_dim):].reshape(n_windows, self.feature_dim)
+        # Handle 5m timeframe
+        start_5m = max(0, idx_5m - self.window_size)
+        window_5m = self.features_5m[start_5m:idx_5m]
+        
+        # Handle 15m timeframe
+        start_15m = max(0, idx_15m - self.window_size)
+        window_15m = self.features_15m[start_15m:idx_15m]
+        
+        # Pad if needed (for 5m and 15m)
+        if len(window_5m) < self.window_size:
+            padding = np.zeros((self.window_size - len(window_5m), window_5m.shape[1]))
+            window_5m = np.vstack([padding, window_5m])
+            
+        if len(window_15m) < self.window_size:
+            padding = np.zeros((self.window_size - len(window_15m), window_15m.shape[1]))
+            window_15m = np.vstack([padding, window_15m])
+        
+        # Combine features from all timeframes
+        combined_features = np.hstack([
+            window_1m.reshape(self.window_size, -1),
+            window_5m.reshape(self.window_size, -1),
+            window_15m.reshape(self.window_size, -1)
        ])
+        
+        # Convert to float32 and handle any NaN values
+        combined_features = np.nan_to_num(combined_features, nan=0.0).astype(np.float32)
+        
+        return combined_features
+    
+    def step(self, action):
+        """
+        Take an action in the environment and return the next state, reward, done flag, and info
+        
+        Args:
+            action (int): 0 = Buy, 1 = Sell, 2 = Hold
+            
+        Returns:
+            tuple: (observation, reward, done, info)
+        """
+        # Get current and next price
+        current_price = self.features_1m[self.current_step, -1]  # Close price is last column
+        next_price = self.features_1m[self.current_step + 1, -1]
+        
+        # Handle zero or negative prices
+        if current_price <= 0:
+            current_price = 1e-8  # Small positive number
+        if next_price <= 0:
+            next_price = current_price  # Use current price if next price is invalid
+            
+        price_change = (next_price - current_price) / current_price
+        
+        # Default reward is slightly negative to discourage inaction
+        reward = -0.0001
+        done = False
+        
+        # Execute action
+        if action == 0:  # BUY
+            if self.position == 0:  # Only buy if not already in position
+                self.position = self.balance * (1 - self.trading_fee)
+                self.balance = 0
+                self.trades += 1
+                reward = 0  # Neutral reward for entering position
+                self.trade_entry_price = current_price
+                
+        elif action == 1:  # SELL
+            if self.position > 0:  # Only sell if in position
+                # Calculate position value at current price
+                position_value = self.position * (1 + price_change)
+                self.balance = position_value * (1 - self.trading_fee)
+                
+                # Calculate profit/loss from trade
+                profit_pct = (next_price - self.trade_entry_price) / self.trade_entry_price
+                reward = profit_pct * 10  # Scale reward by profit percentage
+                
+                # Update win/loss count
+                if profit_pct > 0:
+                    self.wins += 1
                else:
-            # Take the most recent window_size windows
-            start_idx = (n_windows - self.window_size) * self.feature_dim
-            reshaped = features[start_idx:].reshape(self.window_size, self.feature_dim)
+                    self.losses += 1
                
-        return reshaped.astype(np.float32)
+                # Record trade
+                self.trade_history.append({
+                    'entry_price': self.trade_entry_price,
+                    'exit_price': next_price,
+                    'profit_pct': profit_pct
+                })
                
-def train_rl():
+                # Reset position
+                self.position = 0
+        
+        # else: (action == 2 - HOLD) - no position change
+        
+        # Move to next step
+        self.current_step += 1
+        
+        # Check if done
+        if self.current_step >= len(self.features_1m) - 1:
+            done = True
+            
+            # Apply final evaluation
+            if self.position > 0:
+                # Force close position at the end
+                position_value = self.position * (1 + price_change)
+                self.balance = position_value * (1 - self.trading_fee)
+                profit_pct = (next_price - self.trade_entry_price) / self.trade_entry_price
+                reward += profit_pct * 10
+                
+                # Update win/loss count
+                if profit_pct > 0:
+                    self.wins += 1
+                else:
+                    self.losses += 1
+        
+        # Get the next observation
+        observation = self._get_observation()
+        
+        # Calculate metrics for info
+        total_value = self.balance + self.position * next_price
+        gain = (total_value - self.initial_balance) / self.initial_balance
+        self.win_rate = self.wins / max(1, self.trades)
+        
+        info = {
+            'balance': self.balance,
+            'position': self.position,
+            'total_value': total_value,
+            'gain': gain,
+            'trades': self.trades,
+            'win_rate': self.win_rate
+        }
+        
+        return observation, reward, done, info
+
+def train_rl(env_class=None, num_episodes=5000, max_steps=2000, save_path="NN/models/saved/dqn_agent"):
    """
-    Train the RL model using the DQN agent
+    Train DQN agent for RL-based trading with extended training and monitoring
    """
-    # Initialize data interface with BTC/USDT and multiple timeframes
-    timeframes = ['1m', '5m', '15m']
+    logger.info("Starting extended RL training for trading...")
+    
+    # Environment setup
    window_size = 20
-    data_interface = DataInterface(symbol="BTC/USDT", timeframes=timeframes)
+    timeframes = ["1m", "5m", "15m"]
+    trading_fee = 0.001
    
-    # Get training data
-    X_train, y_train, X_val, y_val, train_prices, val_prices = data_interface.prepare_training_data()
-    if X_train is None:
-        logger.error("Failed to get training data")
-        return
+    # Ensure save directory exists
+    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    
-    # Calculate feature dimensions
-    num_features = X_train.shape[2]  # Number of features per timeframe
-    total_features = num_features * len(timeframes)  # Total features across all timeframes
+    # Setup TensorBoard for monitoring
+    writer = SummaryWriter(f'runs/rl_training_{datetime.now().strftime("%Y%m%d_%H%M%S")}')
    
-    # Flatten features for environment
-    n_samples = X_train.shape[0]
-    flattened_features = X_train.reshape(n_samples, window_size, -1)  # Reshape to (batch, window, features)
+    # Data loading
+    data_interface = DataInterface(
+        symbol="BTC/USDT",
+        timeframes=timeframes
+    )
    
-    # Create DataFrame with features as separate columns
-    feature_columns = [f'feature_{i}' for i in range(flattened_features.shape[2])]
-    df = pd.DataFrame(flattened_features.reshape(n_samples, -1), columns=feature_columns * window_size)
-    df['close'] = train_prices
+    # Get training data for each timeframe with more data
+    features_1m = data_interface.get_training_data("1m", n_candles=5000)
+    features_5m = data_interface.get_training_data("5m", n_candles=2500)
+    features_15m = data_interface.get_training_data("15m", n_candles=2500)
+    
+    if features_1m is None or features_5m is None or features_15m is None:
+        logger.error("Failed to load training data")
+        return None
+    
+    # Convert DataFrames to numpy arrays, excluding timestamp column
+    features_1m = features_1m.drop('timestamp', axis=1, errors='ignore').values
+    features_5m = features_5m.drop('timestamp', axis=1, errors='ignore').values
+    features_15m = features_15m.drop('timestamp', axis=1, errors='ignore').values
+    
+    # Calculate number of features per timeframe
+    num_features = features_1m.shape[1]  # Number of features after dropping timestamp
    
    # Create environment
    env = RLTradingEnvironment(
-        data=df,
+        features_1m=features_1m,
+        features_5m=features_5m,
+        features_15m=features_15m,
        window_size=window_size,
-        num_features=num_features,
-        num_timeframes=len(timeframes),
-        initial_balance=10000,
-        fee_rate=0.001,
-        max_steps=1000
+        trading_fee=trading_fee
    )
    
-    # Create DQN agent
+    # Create agent with adjusted parameters for longer training
+    state_size = window_size
+    action_size = 3
    agent = DQNAgent(
-        state_size=window_size,  # First dimension of observation space
-        action_size=env.action_space.n,
+        state_size=state_size,
+        action_size=action_size,
        window_size=window_size,
        num_features=num_features,
        timeframes=timeframes,
-        learning_rate=0.001,
-        gamma=0.99,
+        learning_rate=0.0005,  # Reduced learning rate for stability
+        gamma=0.99,  # Increased discount factor
        epsilon=1.0,
        epsilon_min=0.01,
-        epsilon_decay=0.995,
-        memory_size=10000,
-        batch_size=32,
-        target_update=10
+        epsilon_decay=0.999,  # Slower epsilon decay
+        memory_size=50000,  # Increased memory size
+        batch_size=128  # Increased batch size
    )
    
-    # Training parameters
-    episodes = 1000
-    max_steps = 1000
+    # Variables to track best performance
    best_reward = float('-inf')
-    best_model_path = 'NN/models/saved/best_rl_model.pth'
+    best_episode = 0
+    best_pnl = float('-inf')
+    best_win_rate = 0.0
    
-    # Create models directory if it doesn't exist
-    os.makedirs(os.path.dirname(best_model_path), exist_ok=True)
+    # Training metrics
+    episode_rewards = []
+    episode_pnls = []
+    episode_win_rates = []
+    episode_trades = []
+    
+    # Check if previous best model exists and load it
+    best_model_path = f"{save_path}_best"
+    if os.path.exists(f"{best_model_path}_policy.pt"):
+        try:
+            logger.info(f"Loading previous best model from {best_model_path}")
+            agent.load(best_model_path)
+            metadata_path = f"{best_model_path}_metadata.json"
+            if os.path.exists(metadata_path):
+                with open(metadata_path, 'r') as f:
+                    metadata = json.load(f)
+                    best_reward = metadata.get('best_reward', best_reward)
+                    best_episode = metadata.get('best_episode', best_episode)
+                    best_pnl = metadata.get('best_pnl', best_pnl)
+                    best_win_rate = metadata.get('best_win_rate', best_win_rate)
+                    logger.info(f"Loaded previous best metrics - Reward: {best_reward:.4f}, PnL: {best_pnl:.4f}, Win Rate: {best_win_rate:.4f}")
+        except Exception as e:
+            logger.error(f"Error loading previous best model: {e}")
    
    # Training loop
-    for episode in range(episodes):
+    try:
+        for episode in range(1, num_episodes + 1):
            state = env.reset()
            total_reward = 0
+            done = False
+            steps = 0
            
-        for step in range(max_steps):
-            # Get action from agent
+            while not done and steps < max_steps:
                action = agent.act(state)
-            
-            # Take action in environment
                next_state, reward, done, info = env.step(action)
-            
-            # Store experience in agent's memory
                agent.remember(state, action, reward, next_state, done)
                
-            # Train agent
-            if len(agent.memory) > agent.batch_size:
+                # Learn from experience
                loss = agent.replay()
-                if loss is not None:
-                    logger.debug(f"Training loss: {loss:.4f}")
                
-            # Update state and reward
                state = next_state
                total_reward += reward
+                steps += 1
            
-            if done:
-                break
+            # Calculate episode metrics
+            episode_rewards.append(total_reward)
+            episode_pnls.append(info['gain'])
+            episode_win_rates.append(info['win_rate'])
+            episode_trades.append(info['trades'])
            
-        # Update epsilon
-        agent.epsilon = max(agent.epsilon_min, agent.epsilon * agent.epsilon_decay)
+            # Log to TensorBoard
+            writer.add_scalar('Reward/episode', total_reward, episode)
+            writer.add_scalar('PnL/episode', info['gain'], episode)
+            writer.add_scalar('WinRate/episode', info['win_rate'], episode)
+            writer.add_scalar('Trades/episode', info['trades'], episode)
+            writer.add_scalar('Epsilon/episode', agent.epsilon, episode)
            
-        # Log episode results
-        logger.info(f"Episode: {episode + 1}/{episodes}")
-        logger.info(f"Total Reward: {total_reward:.2f}")
-        logger.info(f"Final Balance: {info['balance']:.2f}")
-        logger.info(f"Max Drawdown: {info['max_drawdown']:.2%}")
-        logger.info(f"Win Rate: {info['win_rate']:.2%}")
-        logger.info(f"Epsilon: {agent.epsilon:.4f}")
-        
-        # Save best model
-        if total_reward > best_reward:
+            # Save the best model based on multiple metrics (only every 50 episodes)
+            is_better = False
+            if episode % 50 == 0:  # Only check for saving every 50 episodes
+                if (info['gain'] > best_pnl and info['win_rate'] > 0.5) or \
+                   (info['gain'] > best_pnl * 1.1) or \
+                   (info['win_rate'] > best_win_rate * 1.1):
                    best_reward = total_reward
+                    best_episode = episode
+                    best_pnl = info['gain']
+                    best_win_rate = info['win_rate']
                    agent.save(best_model_path)
-            logger.info(f"New best model saved with reward: {best_reward:.2f}")
+                    is_better = True
                    
-        # Save checkpoint every 100 episodes
-        if (episode + 1) % 100 == 0:
-            checkpoint_path = f'NN/models/saved/rl_model_episode_{episode + 1}.pth'
-            agent.save(checkpoint_path)
-            logger.info(f"Checkpoint saved at episode {episode + 1}")
+                    # Save metadata about the best model
+                    metadata = {
+                        'best_reward': best_reward,
+                        'best_episode': best_episode,
+                        'best_pnl': best_pnl,
+                        'best_win_rate': best_win_rate,
+                        'date': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+                    }
+                    with open(f"{best_model_path}_metadata.json", 'w') as f:
+                        json.dump(metadata, f)
+            
+            # Log training progress
+            if episode % 10 == 0:
+                avg_reward = sum(episode_rewards[-10:]) / 10
+                avg_pnl = sum(episode_pnls[-10:]) / 10
+                avg_win_rate = sum(episode_win_rates[-10:]) / 10
+                avg_trades = sum(episode_trades[-10:]) / 10
+                
+                status = "NEW BEST!" if is_better else ""
+                logger.info(f"Episode {episode}/{num_episodes} {status}")
+                logger.info(f"Metrics (last 10 episodes):")
+                logger.info(f"  Reward: {avg_reward:.4f}")
+                logger.info(f"  PnL: {avg_pnl:.4f}")
+                logger.info(f"  Win Rate: {avg_win_rate:.4f}")
+                logger.info(f"  Trades: {avg_trades:.2f}")
+                logger.info(f"  Epsilon: {agent.epsilon:.4f}")
+                logger.info(f"Best so far - PnL: {best_pnl:.4f}, Win Rate: {best_win_rate:.4f}")
+    
+    except KeyboardInterrupt:
+        logger.info("Training interrupted by user. Saving best model...")
+    
+    # Close TensorBoard writer
+    writer.close()
+    
+    # Final logs
+    logger.info(f"Training completed. Best model from episode {best_episode}")
+    logger.info(f"Best metrics:")
+    logger.info(f"  Reward: {best_reward:.4f}")
+    logger.info(f"  PnL: {best_pnl:.4f}")
+    logger.info(f"  Win Rate: {best_win_rate:.4f}")
+    
+    # Return the agent for potential further use
+    return agent

 if __name__ == "__main__":
    train_rl() 
--- a/NN/utils/data_interface.py
+++ b/NN/utils/data_interface.py
@ -303,6 +303,76 @@ class DataInterface:
        """
        return len(self.timeframes) * 5  # OHLCV features for each timeframe

+    def get_features(self, timeframe, n_candles=1000):
+        """
+        Get feature data with technical indicators for a specific timeframe.
+        
+        Args:
+            timeframe (str): Timeframe to get features for ('1m', '5m', etc.)
+            n_candles (int): Number of candles to get
+            
+        Returns:
+            np.ndarray: Array of feature data including technical indicators
+                        and the close price as the last column
+        """
+        # Get historical data
+        df = self.get_historical_data(timeframe=timeframe, n_candles=n_candles)
+        
+        if df is None or df.empty:
+            logger.error(f"No data available for {self.symbol} {timeframe}")
+            return None
+            
+        # Add technical indicators
+        df = self.add_technical_indicators(df)
+        
+        # Drop NaN values that might have been introduced by indicators
+        df = df.dropna()
+        
+        # Extract features (all columns except timestamp)
+        features = df.drop('timestamp', axis=1).values
+        
+        logger.info(f"Prepared {len(features)} {timeframe} feature rows with {features.shape[1]} features")
+        return features
+        
+    def add_technical_indicators(self, df):
+        """
+        Add technical indicators to the dataframe.
+        
+        Args:
+            df (pd.DataFrame): DataFrame with OHLCV data
+            
+        Returns:
+            pd.DataFrame: DataFrame with added technical indicators
+        """
+        # Make a copy to avoid modifying the original
+        df_copy = df.copy()
+        
+        # Basic price indicators
+        df_copy['returns'] = df_copy['close'].pct_change()
+        df_copy['log_returns'] = np.log(df_copy['close']/df_copy['close'].shift(1))
+        
+        # Moving Averages
+        df_copy['sma_7'] = ta.trend.sma_indicator(df_copy['close'], window=7)
+        df_copy['sma_25'] = ta.trend.sma_indicator(df_copy['close'], window=25)
+        df_copy['sma_99'] = ta.trend.sma_indicator(df_copy['close'], window=99)
+        
+        # MACD
+        macd = ta.trend.MACD(df_copy['close'])
+        df_copy['macd'] = macd.macd()
+        df_copy['macd_signal'] = macd.macd_signal()
+        df_copy['macd_diff'] = macd.macd_diff()
+        
+        # RSI
+        df_copy['rsi'] = ta.momentum.rsi(df_copy['close'], window=14)
+        
+        # Bollinger Bands
+        bollinger = ta.volatility.BollingerBands(df_copy['close'])
+        df_copy['bb_high'] = bollinger.bollinger_hband()
+        df_copy['bb_low'] = bollinger.bollinger_lband()
+        df_copy['bb_pct'] = bollinger.bollinger_pband()
+        
+        return df_copy
+
    def calculate_pnl(self, predictions, actual_prices, position_size=1.0):
        """
        Robust PnL calculator that handles:
@ -557,33 +627,33 @@ class DataInterface:
        # Calculate technical indicators
        try:
            # Add RSI (14)
-            df['rsi'] = ta.rsi(df['close'], length=14)
+            df['rsi'] = ta.momentum.rsi(df['close'], window=14)
            
            # Add MACD
-            macd = ta.macd(df['close'])
-            df['macd'] = macd['MACD_12_26_9']
-            df['macd_signal'] = macd['MACDs_12_26_9']
-            df['macd_hist'] = macd['MACDh_12_26_9']
+            macd = ta.trend.MACD(df['close'])
+            df['macd'] = macd.macd()
+            df['macd_signal'] = macd.macd_signal()
+            df['macd_hist'] = macd.macd_diff()
            
            # Add Bollinger Bands
-            bbands = ta.bbands(df['close'], length=20)
-            df['bb_upper'] = bbands['BBU_20_2.0']
-            df['bb_middle'] = bbands['BBM_20_2.0']
-            df['bb_lower'] = bbands['BBL_20_2.0']
+            bbands = ta.volatility.BollingerBands(df['close'])
+            df['bb_upper'] = bbands.bollinger_hband()
+            df['bb_middle'] = bbands.bollinger_mavg()
+            df['bb_lower'] = bbands.bollinger_lband()
            
            # Add ATR (Average True Range)
-            df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14)
+            df['atr'] = ta.volatility.average_true_range(df['high'], df['low'], df['close'], window=14)
            
            # Add moving averages
-            df['sma_20'] = ta.sma(df['close'], length=20)
-            df['sma_50'] = ta.sma(df['close'], length=50)
-            df['ema_20'] = ta.ema(df['close'], length=20)
+            df['sma_20'] = ta.trend.sma_indicator(df['close'], window=20)
+            df['sma_50'] = ta.trend.sma_indicator(df['close'], window=50)
+            df['ema_20'] = ta.trend.ema_indicator(df['close'], window=20)
            
            # Add OBV (On-Balance Volume)
-            df['obv'] = ta.obv(df['close'], df['volume'])
+            df['obv'] = ta.volume.on_balance_volume(df['close'], df['volume'])
            
            # Add momentum indicators
-            df['mom'] = ta.mom(df['close'], length=10)
+            df['mom'] = ta.momentum.roc(df['close'], window=10)
            
            # Normalize price to previous close
            df['close_norm'] = df['close'] / df['close'].shift(1) - 1