data normalizations

2025-09-02 18:51:49 +03:00
parent 1c013f2806
commit 6dcb82c184
4 changed files with 251 additions and 322 deletions
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@@ -16,6 +16,7 @@ import logging
 import time
 import threading
 import numpy as np
+import pandas as pd
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional, Any, Tuple, Union
 from dataclasses import dataclass, field
@@ -2364,12 +2365,262 @@ class TradingOrchestrator:
                logger.info("Initializing ExtremaTrainer with historical context...")
                self.extrema_trainer.initialize_context_data()
            
+            # CRITICAL: Initialize ALL models with historical data
+            self._initialize_models_with_historical_data(loaded_data)
+            
            logger.info(f"🎯 Historical data loading complete: {total_candles} total candles loaded")
            logger.info(f"📊 Available datasets: {list(loaded_data.keys())}")
            
        except Exception as e:
            logger.error(f"Error in historical data loading: {e}")

+    def _initialize_models_with_historical_data(self, loaded_data: Dict[str, Any]):
+        """Initialize all NN models with historical data and multi-symbol support"""
+        try:
+            logger.info("Initializing models with historical data and multi-symbol support...")
+            
+            # Prepare multi-symbol feature matrices
+            symbol_features = self._prepare_multi_symbol_features(loaded_data)
+            
+            # Initialize CNN with multi-symbol data
+            if hasattr(self, 'cnn_model') and self.cnn_model:
+                logger.info("Initializing CNN with multi-symbol historical features...")
+                self._initialize_cnn_with_data(symbol_features)
+            
+            # Initialize DQN with multi-symbol states
+            if hasattr(self, 'rl_agent') and self.rl_agent:
+                logger.info("Initializing DQN with multi-symbol state vectors...")
+                self._initialize_dqn_with_data(symbol_features)
+            
+            # Initialize Transformer with sequence data
+            if hasattr(self, 'transformer_model') and self.transformer_model:
+                logger.info("Initializing Transformer with multi-symbol sequences...")
+                self._initialize_transformer_with_data(symbol_features)
+            
+            # Initialize Decision Fusion with comprehensive features
+            if hasattr(self, 'decision_fusion') and self.decision_fusion:
+                logger.info("Initializing Decision Fusion with multi-symbol features...")
+                self._initialize_decision_with_data(symbol_features)
+                
+            logger.info("✅ All models initialized with historical multi-symbol data")
+            
+        except Exception as e:
+            logger.error(f"Error initializing models with historical data: {e}")
+
+    def _prepare_multi_symbol_features(self, loaded_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Prepare normalized multi-symbol feature matrices"""
+        try:
+            symbol_features = {
+                'ETH/USDT': {'1m': None, '1h': None, '1d': None},
+                'BTC/USDT': {'1m': None}
+            }
+            
+            # Process each symbol's data with symbol-specific normalization
+            for data_key, df in loaded_data.items():
+                if df is None or df.empty:
+                    continue
+                    
+                # Extract symbol and timeframe
+                if '_1m' in data_key:
+                    symbol = data_key.replace('_1m', '')
+                    timeframe = '1m'
+                elif '_1h' in data_key:
+                    symbol = data_key.replace('_1h', '')
+                    timeframe = '1h'
+                elif '_1d' in data_key:
+                    symbol = data_key.replace('_1d', '')
+                    timeframe = '1d'
+                else:
+                    continue
+                
+                # Apply symbol-grouped normalization
+                normalized_df = self._apply_symbol_grouped_normalization(df, symbol)
+                
+                if normalized_df is not None:
+                    symbol_features[symbol][timeframe] = normalized_df
+                    logger.debug(f"Prepared normalized features for {symbol} {timeframe}")
+            
+            return symbol_features
+            
+        except Exception as e:
+            logger.error(f"Error preparing multi-symbol features: {e}")
+            return {}
+
+    def _apply_symbol_grouped_normalization(self, df: pd.DataFrame, symbol: str) -> pd.DataFrame:
+        """Apply symbol-grouped normalization with consistent ranges across timeframes"""
+        try:
+            df_norm = df.copy()
+            
+            # Get symbol-specific price ranges for consistent normalization
+            symbol_price_ranges = {
+                'ETH/USDT': {'min': 1000, 'max': 5000},   # ETH price range
+                'BTC/USDT': {'min': 90000, 'max': 120000}  # BTC price range
+            }
+            
+            if symbol in symbol_price_ranges:
+                price_range = symbol_price_ranges[symbol]
+                range_size = price_range['max'] - price_range['min']
+                
+                # Normalize price columns to [0, 1] range specific to symbol
+                price_cols = ['open', 'high', 'low', 'close']
+                for col in price_cols:
+                    if col in df_norm.columns:
+                        df_norm[col] = (df_norm[col] - price_range['min']) / range_size
+                        df_norm[col] = np.clip(df_norm[col], 0, 1)  # Ensure [0,1] range
+                
+                # Normalize volume to [0, 1] using log scale
+                if 'volume' in df_norm.columns:
+                    df_norm['volume'] = np.log1p(df_norm['volume'])
+                    vol_max = df_norm['volume'].max()
+                    if vol_max > 0:
+                        df_norm['volume'] = df_norm['volume'] / vol_max
+                
+                logger.debug(f"Applied symbol-grouped normalization for {symbol}")
+            
+            # Fill any NaN values
+            df_norm = df_norm.fillna(0)
+            
+            return df_norm
+            
+        except Exception as e:
+            logger.error(f"Error in symbol-grouped normalization for {symbol}: {e}")
+            return df
+
+    def _initialize_cnn_with_data(self, symbol_features: Dict[str, Any]):
+        """Initialize CNN with multi-symbol feature matrix"""
+        try:
+            # Create combined feature matrix: [ETH_1m, ETH_1h, ETH_1d, BTC_1m]
+            combined_features = []
+            
+            # ETH features (1m, 1h, 1d)
+            for timeframe in ['1m', '1h', '1d']:
+                eth_data = symbol_features.get('ETH/USDT', {}).get(timeframe)
+                if eth_data is not None and not eth_data.empty:
+                    # Use last 60 candles for CNN input
+                    recent_data = eth_data.tail(60)
+                    features = recent_data[['open', 'high', 'low', 'close', 'volume']].values
+                    combined_features.append(features.flatten())
+            
+            # BTC features (1m)
+            btc_data = symbol_features.get('BTC/USDT', {}).get('1m')
+            if btc_data is not None and not btc_data.empty:
+                recent_data = btc_data.tail(60)
+                features = recent_data[['open', 'high', 'low', 'close', 'volume']].values
+                combined_features.append(features.flatten())
+            
+            if combined_features:
+                # Concatenate all features
+                full_features = np.concatenate(combined_features)
+                logger.info(f"CNN initialized with {len(full_features)} multi-symbol features")
+                
+                # Store for model access
+                if not hasattr(self, 'model_historical_features'):
+                    self.model_historical_features = {}
+                self.model_historical_features['cnn'] = full_features
+                
+        except Exception as e:
+            logger.error(f"Error initializing CNN with historical data: {e}")
+
+    def _initialize_dqn_with_data(self, symbol_features: Dict[str, Any]):
+        """Initialize DQN with multi-symbol state vectors"""
+        try:
+            # Create comprehensive state vector combining all symbols and timeframes
+            state_components = []
+            
+            for symbol in ['ETH/USDT', 'BTC/USDT']:
+                timeframes = ['1m', '1h', '1d'] if symbol == 'ETH/USDT' else ['1m']
+                
+                for timeframe in timeframes:
+                    data = symbol_features.get(symbol, {}).get(timeframe)
+                    if data is not None and not data.empty:
+                        # Extract key features for state
+                        latest = data.iloc[-1]
+                        state_features = [
+                            latest['close'],  # Current price
+                            latest['volume'], # Current volume
+                            data['close'].pct_change().iloc[-1] if len(data) > 1 else 0,  # Price change
+                        ]
+                        state_components.extend(state_features)
+            
+            if state_components:
+                # Pad or truncate to expected DQN state size
+                target_size = 100  # DQN expects 100-dimensional state
+                if len(state_components) < target_size:
+                    state_components.extend([0] * (target_size - len(state_components)))
+                else:
+                    state_components = state_components[:target_size]
+                
+                state_vector = np.array(state_components, dtype=np.float32)
+                logger.info(f"DQN initialized with {len(state_vector)} dimensional multi-symbol state")
+                
+                # Store for model access
+                if not hasattr(self, 'model_historical_features'):
+                    self.model_historical_features = {}
+                self.model_historical_features['dqn'] = state_vector
+                
+        except Exception as e:
+            logger.error(f"Error initializing DQN with historical data: {e}")
+
+    def _initialize_transformer_with_data(self, symbol_features: Dict[str, Any]):
+        """Initialize Transformer with multi-symbol sequence data"""
+        try:
+            # Prepare sequence data for transformer
+            sequences = []
+            
+            # ETH sequences
+            for timeframe in ['1m', '1h', '1d']:
+                eth_data = symbol_features.get('ETH/USDT', {}).get(timeframe)
+                if eth_data is not None and not eth_data.empty:
+                    # Use last 150 points as sequence
+                    sequence = eth_data.tail(150)[['open', 'high', 'low', 'close', 'volume']].values
+                    sequences.append(sequence)
+            
+            # BTC sequence
+            btc_data = symbol_features.get('BTC/USDT', {}).get('1m')
+            if btc_data is not None and not btc_data.empty:
+                sequence = btc_data.tail(150)[['open', 'high', 'low', 'close', 'volume']].values
+                sequences.append(sequence)
+            
+            if sequences:
+                logger.info(f"Transformer initialized with {len(sequences)} multi-symbol sequences")
+                
+                # Store for model access
+                if not hasattr(self, 'model_historical_features'):
+                    self.model_historical_features = {}
+                self.model_historical_features['transformer'] = sequences
+                
+        except Exception as e:
+            logger.error(f"Error initializing Transformer with historical data: {e}")
+
+    def _initialize_decision_with_data(self, symbol_features: Dict[str, Any]):
+        """Initialize Decision Fusion with comprehensive multi-symbol features"""
+        try:
+            # Aggregate all available features for decision fusion
+            all_features = {}
+            
+            for symbol in symbol_features:
+                for timeframe in symbol_features[symbol]:
+                    data = symbol_features[symbol][timeframe]
+                    if data is not None and not data.empty:
+                        key = f"{symbol}_{timeframe}"
+                        all_features[key] = {
+                            'latest_price': data['close'].iloc[-1],
+                            'volume': data['volume'].iloc[-1],
+                            'price_change': data['close'].pct_change().iloc[-1] if len(data) > 1 else 0,
+                            'volatility': data['close'].std() if len(data) > 1 else 0
+                        }
+            
+            if all_features:
+                logger.info(f"Decision Fusion initialized with {len(all_features)} symbol-timeframe combinations")
+                
+                # Store for model access
+                if not hasattr(self, 'model_historical_features'):
+                    self.model_historical_features = {}
+                self.model_historical_features['decision'] = all_features
+                
+        except Exception as e:
+            logger.error(f"Error initializing Decision Fusion with historical data: {e}")
+
    def get_ohlcv_data(self, symbol: str, timeframe: str, limit: int = 300) -> List:
        """Get OHLCV data for a symbol with specified timeframe and limit."""
        try:
--- a/core/reward_calculator.py
+++ b/core/reward_calculator.py
@@ -0,0 +1,177 @@
+"""
+Improved Reward Function for RL Trading Agent
+
+This module provides a more sophisticated reward function for the RL trading agent
+that incorporates realistic trading fees, penalties for excessive trading, and
+rewards for successful holding of positions.
+"""
+
+import numpy as np
+from datetime import datetime, timedelta
+from collections import deque
+import logging
+
+logger = logging.getLogger(__name__)
+
+class RewardCalculator:
+    def __init__(self, base_fee_rate=0.001, reward_scaling=10.0, risk_aversion=0.1):
+        self.base_fee_rate = base_fee_rate
+        self.reward_scaling = reward_scaling
+        self.risk_aversion = risk_aversion
+        self.trade_pnls = []
+        self.returns = []
+        self.trade_timestamps = []
+        self.frequency_threshold = 10  # Trades per minute threshold for penalty
+        self.max_frequency_penalty = 0.05
+    
+    def record_pnl(self, pnl):
+        """Record P&L for risk adjustment calculations"""
+        self.trade_pnls.append(pnl)
+        if len(self.trade_pnls) > 100:
+            self.trade_pnls.pop(0)
+
+    def record_trade(self, action):
+        """Record trade action for frequency penalty calculations"""
+        from time import time
+        self.trade_timestamps.append(time())
+        if len(self.trade_timestamps) > 100:
+            self.trade_timestamps.pop(0)
+    
+    def _calculate_frequency_penalty(self):
+        """Calculate penalty for high-frequency trading"""
+        if len(self.trade_timestamps) < 2:
+            return 0.0
+        time_span = self.trade_timestamps[-1] - self.trade_timestamps[0]
+        if time_span <= 0:
+            return 0.0
+        trades_per_minute = (len(self.trade_timestamps) / time_span) * 60
+        if trades_per_minute > self.frequency_threshold:
+            penalty = min(self.max_frequency_penalty, (trades_per_minute - self.frequency_threshold) * 0.001)
+        return penalty
+        return 0.0
+    
+    def _calculate_risk_adjustment(self, reward):
+        """Adjust rewards based on risk (simple Sharpe ratio implementation)"""
+        if len(self.trade_pnls) < 5:
+            return reward
+        pnl_array = np.array(self.trade_pnls)
+        mean_return = np.mean(pnl_array)
+        std_return = np.std(pnl_array)
+        if std_return == 0:
+            return reward
+        sharpe = mean_return / std_return
+        adjustment_factor = np.clip(1.0 + 0.5 * sharpe, 0.5, 2.0)
+        return reward * adjustment_factor
+    
+    def _calculate_holding_reward(self, position_held_time, price_change):
+        """Calculate reward for holding a position"""
+        base_holding_reward = 0.0005 * (position_held_time / 60.0)
+        if price_change > 0:
+            return base_holding_reward * 2
+        elif price_change < 0:
+            return base_holding_reward * 0.5
+        return base_holding_reward
+
+    def calculate_basic_reward(self, pnl, confidence):
+        """Calculate basic training reward based on P&L and confidence"""
+        try:
+            # Reward based on net PnL after fees and confidence alignment
+            base_reward = pnl
+            # Stronger penalty for confident wrong decisions
+            if pnl < 0 and confidence >= 0.6:
+                confidence_adjustment = -confidence * 3.0
+            elif pnl > 0 and confidence >= 0.6:
+                confidence_adjustment = confidence * 1.0
+            else:
+                confidence_adjustment = 0.0
+            final_reward = base_reward + confidence_adjustment
+            # Reduce tanh compression so small PnL changes are not flattened
+            normalized_reward = np.tanh(final_reward / 2.5)
+            logger.debug(f"Basic reward calculation: P&L={pnl:.4f}, confidence={confidence:.2f}, reward={normalized_reward:.4f}")
+            return float(normalized_reward)
+        except Exception as e:
+            logger.error(f"Error calculating basic reward: {e}")
+            return 0.0
+
+    def calculate_enhanced_reward(self, action, price_change, position_held_time=0, volatility=None, is_profitable=False, confidence=0.0, predicted_change=0.0, actual_change=0.0, current_pnl=0.0, symbol='UNKNOWN'):
+        """Calculate enhanced reward for trading actions"""
+        fee = self.base_fee_rate
+        frequency_penalty = self._calculate_frequency_penalty()
+        if action == 0:  # Buy
+            reward = -fee - frequency_penalty
+        elif action == 1:  # Sell
+            profit_pct = price_change
+            net_profit = profit_pct - (fee * 2)
+            reward = net_profit * self.reward_scaling
+            reward -= frequency_penalty
+            self.record_pnl(net_profit)
+        else:  # Hold
+            if is_profitable:
+                reward = self._calculate_holding_reward(position_held_time, price_change)
+            else:
+                reward = -0.0001
+        if action in [0, 1] and predicted_change != 0:
+            if (action == 0 and actual_change > 0) or (action == 1 and actual_change < 0):
+                reward += abs(actual_change) * 5.0
+            else:
+                reward -= abs(predicted_change) * 2.0
+        reward += current_pnl * 0.1
+        if volatility is not None:
+            reward -= abs(volatility) * 100
+        if self.risk_aversion > 0 and len(self.returns) > 1:
+            returns_std = np.std(self.returns)
+            reward -= returns_std * self.risk_aversion
+        self.record_trade(action)
+        return reward
+
+    def calculate_prediction_reward(self, symbol, predicted_direction, actual_direction, confidence, predicted_change, actual_change, current_pnl=0.0, position_duration=0.0):
+        """Calculate reward for prediction accuracy"""
+        reward = 0.0
+        if predicted_direction == actual_direction:
+            reward += 1.0 * confidence
+        else:
+            reward -= 0.5
+        if predicted_direction == actual_direction and abs(predicted_change) > 0.001:
+            reward += abs(actual_change) * 5.0
+        if predicted_direction != actual_direction and abs(predicted_change) > 0.001:
+            reward -= abs(predicted_change) * 2.0
+        reward += current_pnl * 0.1
+        # Dynamic adjustment based on recent PnL (loss cutting incentive)
+        if hasattr(self, 'pnl_history') and symbol in self.pnl_history and self.pnl_history[symbol]:
+            latest_pnl_entry = self.pnl_history[symbol][-1]
+            latest_pnl_value = latest_pnl_entry.get('pnl', 0.0) if isinstance(latest_pnl_entry, dict) else 0.0
+            if latest_pnl_value < 0 and position_duration > 60:
+                reward -= (abs(latest_pnl_value) * 0.2)
+            pnl_values = [entry.get('pnl', 0.0) for entry in self.pnl_history[symbol] if isinstance(entry, dict)]
+            best_pnl = max(pnl_values) if pnl_values else 0.0
+            if best_pnl < 0.0:
+                reward -= 0.1
+        return reward
+
+# Example usage:
+if __name__ == "__main__":
+    # Create calculator instance
+    reward_calc = RewardCalculator()
+    
+    # Example reward for a buy action
+    buy_reward = reward_calc.calculate_enhanced_reward(action=0, price_change=0)
+    print(f"Buy action reward: {buy_reward:.5f}")
+    
+    # Record a trade for frequency tracking
+    reward_calc.record_trade(0)
+    
+    # Wait a bit and make another trade to test frequency penalty
+    import time
+    time.sleep(0.1)
+    
+    # Example reward for a sell action with profit
+    sell_reward = reward_calc.calculate_enhanced_reward(action=1, price_change=0.015, position_held_time=60)
+    print(f"Sell action reward (with profit): {sell_reward:.5f}")
+    
+    # Example reward for a hold action on profitable position
+    hold_reward = reward_calc.calculate_enhanced_reward(action=2, price_change=0.01, position_held_time=30, is_profitable=True)
+    print(f"Hold action reward (profitable): {hold_reward:.5f}")
+    
+    # Example reward for a hold action on unprofitable position
+    hold_reward_neg = reward_calc.calculate_enhanced_reward(action=2, price_change=-0.01, position_held_time=30, is_profitable=False)
+    print(f"Hold action reward (unprofitable): {hold_reward_neg:.5f}")