train works

2025-03-31 03:20:12 +03:00
parent 8981ad0691
commit 1610d5bd49
10 changed files with 2554 additions and 406 deletions
--- a/NN/utils/data_interface.py
+++ b/NN/utils/data_interface.py
@@ -209,11 +209,20 @@ class DataInterface:
        price_changes = (next_close - curr_close) / curr_close
        
        # Define thresholds for price movement classification
-        threshold = 0.001  # 0.1% threshold
+        threshold = 0.0005  # 0.05% threshold - smaller to encourage more signals
        y = np.zeros(len(price_changes), dtype=int)
        y[price_changes > threshold] = 2  # Up
+        y[price_changes < -threshold] = 0  # Down
        y[(price_changes >= -threshold) & (price_changes <= threshold)] = 1  # Neutral
        
+        # Log the target distribution to understand our data better
+        sell_count = np.sum(y == 0)
+        hold_count = np.sum(y == 1)
+        buy_count = np.sum(y == 2)
+        total_count = len(y)
+        logger.info(f"Target distribution for {self.symbol} {self.timeframes[0]}: SELL: {sell_count} ({sell_count/total_count:.2%}), " +
+                     f"HOLD: {hold_count} ({hold_count/total_count:.2%}), BUY: {buy_count} ({buy_count/total_count:.2%})")
+        
        logger.info(f"Created features - X shape: {X.shape}, y shape: {y.shape}")
        return X, y, timestamps[window_size:]
    
@@ -295,73 +304,107 @@ class DataInterface:

    def calculate_pnl(self, predictions, actual_prices, position_size=1.0):
        """
-        Calculate PnL and win rates based on predictions and actual price movements.
+        Robust PnL calculator that handles:
+        - Action predictions (0=SELL, 1=HOLD, 2=BUY)
+        - Probability predictions (array of [sell_prob, hold_prob, buy_prob])
+        - Single price array or OHLC data
        
        Args:
-            predictions: Array of predicted actions (0=SELL, 1=HOLD, 2=BUY) or probabilities
-            actual_prices: Array of actual close prices
-            position_size: Position size for each trade
+            predictions: Array of predicted actions or probabilities  
+            actual_prices: Array of actual prices (can be 1D or 2D OHLC format)
+            position_size: Position size multiplier
            
        Returns:
-            tuple: (pnl, win_rate, trades) where:
-                pnl is the total profit and loss
-                win_rate is the ratio of winning trades
-                trades is a list of trade dictionaries
+            tuple: (total_pnl, win_rate, trades)
        """
-        # Ensure we have enough prices for the predictions
-        if len(actual_prices) <= 1:
-            logger.error("Not enough price data for PnL calculation")
+        # Convert inputs to numpy arrays if they aren't already
+        try:
+            predictions = np.array(predictions)
+            actual_prices = np.array(actual_prices)
+        except Exception as e:
+            logger.error(f"Error converting inputs: {str(e)}")
            return 0.0, 0.0, []
+
+        # Validate input shapes
+        if len(predictions.shape) > 2 or len(actual_prices.shape) > 2:
+            logger.error("Invalid input dimensions")
+            return 0.0, 0.0, []
+
+        # Convert OHLC data to close prices if needed
+        if len(actual_prices.shape) == 2 and actual_prices.shape[1] >= 4:
+            prices = actual_prices[:, 3]  # Use close prices
+        else:
+            prices = actual_prices
            
-        # Adjust predictions length to match available price data
-        n_prices = len(actual_prices) - 1  # We need current and next price for each prediction
-        if len(predictions) > n_prices:
-            predictions = predictions[:n_prices]
-        elif len(predictions) < n_prices:
-            n_prices = len(predictions)
-            actual_prices = actual_prices[:n_prices + 1]  # +1 to include the next price
+        # Handle case where prices is 2D with single column
+        if len(prices.shape) == 2 and prices.shape[1] == 1:
+            prices = prices.flatten()
+
+        # Convert probabilities to actions if needed
+        if len(predictions.shape) == 2 and predictions.shape[1] > 1:
+            actions = np.argmax(predictions, axis=1)
+        else:
+            actions = predictions
            
-        pnl = 0.0
-        trades = 0
-        wins = 0
-        trade_history = []
+        # Ensure we have enough prices
+        if len(prices) < 2:
+            logger.error("Not enough price data")
+            return 0.0, 0.0, []
+
+        # Trim to matching length
+        min_length = min(len(actions), len(prices)-1)
+        actions = actions[:min_length]
+        prices = prices[:min_length+1]
        
-        for i in range(len(predictions)):
-            pred = predictions[i]
-            current_price = actual_prices[i]
-            next_price = actual_prices[i + 1]
-            
-            # Calculate price change percentage
+        pnl = 0.0
+        wins = 0
+        trades = []
+        
+        for i in range(min_length):
+            current_price = prices[i]
+            next_price = prices[i+1]
+            action = actions[i]
+
+            # Skip HOLD actions
+            if action == 1:
+                continue
+                
            price_change = (next_price - current_price) / current_price
            
-            # Calculate PnL based on prediction
-            if pred == 2:  # Buy
+            if action == 2:  # BUY
                trade_pnl = price_change * position_size
-                trades += 1
-                if trade_pnl > 0:
-                    wins += 1
-                trade_history.append({
-                    'type': 'buy',
-                    'price': current_price,
-                    'pnl': trade_pnl,
-                    'timestamp': self.dataframes[self.timeframes[0]]['timestamp'].iloc[i] if self.dataframes[self.timeframes[0]] is not None else None
-                })
-            elif pred == 0:  # Sell
+                trade_type = 'BUY'
+                is_win = price_change > 0
+            elif action == 0:  # SELL
                trade_pnl = -price_change * position_size
-                trades += 1
-                if trade_pnl > 0:
-                    wins += 1
-                trade_history.append({
-                    'type': 'sell',
-                    'price': current_price,
-                    'pnl': trade_pnl,
-                    'timestamp': self.dataframes[self.timeframes[0]]['timestamp'].iloc[i] if self.dataframes[self.timeframes[0]] is not None else None
-                })
+                trade_type = 'SELL' 
+                is_win = price_change < 0
+            else:
+                continue  # Invalid action
+
+            pnl += trade_pnl
+            wins += int(is_win)
            
-            pnl += trade_pnl if pred in [0, 2] else 0
-            
-        win_rate = wins / trades if trades > 0 else 0.0
-        return pnl, win_rate, trade_history
+            # Track trade details
+            trades.append({
+                'type': trade_type,
+                'entry': current_price,
+                'exit': next_price,
+                'pnl': trade_pnl,
+                'win': is_win,
+                'duration': 1  # In number of candles
+            })
+        
+        win_rate = wins / len(trades) if trades else 0.0
+        
+        # Add timestamps to trades if available
+        if hasattr(self, 'dataframes') and self.timeframes and self.timeframes[0] in self.dataframes:
+            df = self.dataframes[self.timeframes[0]]
+            if df is not None and 'timestamp' in df.columns:
+                for i, trade in enumerate(trades[:len(df)]):
+                    trade['timestamp'] = df['timestamp'].iloc[i]
+        
+        return pnl, win_rate, trades

    def get_future_prices(self, prices, n_candles=3):
        """