data normalizations

2025-09-02 18:51:49 +03:00
parent 1c013f2806
commit 6dcb82c184
4 changed files with 251 additions and 322 deletions
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@@ -16,6 +16,7 @@ import logging
 import time
 import threading
 import numpy as np
+import pandas as pd
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional, Any, Tuple, Union
 from dataclasses import dataclass, field
@@ -2364,12 +2365,262 @@ class TradingOrchestrator:
                logger.info("Initializing ExtremaTrainer with historical context...")
                self.extrema_trainer.initialize_context_data()
            
+            # CRITICAL: Initialize ALL models with historical data
+            self._initialize_models_with_historical_data(loaded_data)
+            
            logger.info(f"🎯 Historical data loading complete: {total_candles} total candles loaded")
            logger.info(f"📊 Available datasets: {list(loaded_data.keys())}")
            
        except Exception as e:
            logger.error(f"Error in historical data loading: {e}")

+    def _initialize_models_with_historical_data(self, loaded_data: Dict[str, Any]):
+        """Initialize all NN models with historical data and multi-symbol support"""
+        try:
+            logger.info("Initializing models with historical data and multi-symbol support...")
+            
+            # Prepare multi-symbol feature matrices
+            symbol_features = self._prepare_multi_symbol_features(loaded_data)
+            
+            # Initialize CNN with multi-symbol data
+            if hasattr(self, 'cnn_model') and self.cnn_model:
+                logger.info("Initializing CNN with multi-symbol historical features...")
+                self._initialize_cnn_with_data(symbol_features)
+            
+            # Initialize DQN with multi-symbol states
+            if hasattr(self, 'rl_agent') and self.rl_agent:
+                logger.info("Initializing DQN with multi-symbol state vectors...")
+                self._initialize_dqn_with_data(symbol_features)
+            
+            # Initialize Transformer with sequence data
+            if hasattr(self, 'transformer_model') and self.transformer_model:
+                logger.info("Initializing Transformer with multi-symbol sequences...")
+                self._initialize_transformer_with_data(symbol_features)
+            
+            # Initialize Decision Fusion with comprehensive features
+            if hasattr(self, 'decision_fusion') and self.decision_fusion:
+                logger.info("Initializing Decision Fusion with multi-symbol features...")
+                self._initialize_decision_with_data(symbol_features)
+                
+            logger.info("✅ All models initialized with historical multi-symbol data")
+            
+        except Exception as e:
+            logger.error(f"Error initializing models with historical data: {e}")
+
+    def _prepare_multi_symbol_features(self, loaded_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Prepare normalized multi-symbol feature matrices"""
+        try:
+            symbol_features = {
+                'ETH/USDT': {'1m': None, '1h': None, '1d': None},
+                'BTC/USDT': {'1m': None}
+            }
+            
+            # Process each symbol's data with symbol-specific normalization
+            for data_key, df in loaded_data.items():
+                if df is None or df.empty:
+                    continue
+                    
+                # Extract symbol and timeframe
+                if '_1m' in data_key:
+                    symbol = data_key.replace('_1m', '')
+                    timeframe = '1m'
+                elif '_1h' in data_key:
+                    symbol = data_key.replace('_1h', '')
+                    timeframe = '1h'
+                elif '_1d' in data_key:
+                    symbol = data_key.replace('_1d', '')
+                    timeframe = '1d'
+                else:
+                    continue
+                
+                # Apply symbol-grouped normalization
+                normalized_df = self._apply_symbol_grouped_normalization(df, symbol)
+                
+                if normalized_df is not None:
+                    symbol_features[symbol][timeframe] = normalized_df
+                    logger.debug(f"Prepared normalized features for {symbol} {timeframe}")
+            
+            return symbol_features
+            
+        except Exception as e:
+            logger.error(f"Error preparing multi-symbol features: {e}")
+            return {}
+
+    def _apply_symbol_grouped_normalization(self, df: pd.DataFrame, symbol: str) -> pd.DataFrame:
+        """Apply symbol-grouped normalization with consistent ranges across timeframes"""
+        try:
+            df_norm = df.copy()
+            
+            # Get symbol-specific price ranges for consistent normalization
+            symbol_price_ranges = {
+                'ETH/USDT': {'min': 1000, 'max': 5000},   # ETH price range
+                'BTC/USDT': {'min': 90000, 'max': 120000}  # BTC price range
+            }
+            
+            if symbol in symbol_price_ranges:
+                price_range = symbol_price_ranges[symbol]
+                range_size = price_range['max'] - price_range['min']
+                
+                # Normalize price columns to [0, 1] range specific to symbol
+                price_cols = ['open', 'high', 'low', 'close']
+                for col in price_cols:
+                    if col in df_norm.columns:
+                        df_norm[col] = (df_norm[col] - price_range['min']) / range_size
+                        df_norm[col] = np.clip(df_norm[col], 0, 1)  # Ensure [0,1] range
+                
+                # Normalize volume to [0, 1] using log scale
+                if 'volume' in df_norm.columns:
+                    df_norm['volume'] = np.log1p(df_norm['volume'])
+                    vol_max = df_norm['volume'].max()
+                    if vol_max > 0:
+                        df_norm['volume'] = df_norm['volume'] / vol_max
+                
+                logger.debug(f"Applied symbol-grouped normalization for {symbol}")
+            
+            # Fill any NaN values
+            df_norm = df_norm.fillna(0)
+            
+            return df_norm
+            
+        except Exception as e:
+            logger.error(f"Error in symbol-grouped normalization for {symbol}: {e}")
+            return df
+
+    def _initialize_cnn_with_data(self, symbol_features: Dict[str, Any]):
+        """Initialize CNN with multi-symbol feature matrix"""
+        try:
+            # Create combined feature matrix: [ETH_1m, ETH_1h, ETH_1d, BTC_1m]
+            combined_features = []
+            
+            # ETH features (1m, 1h, 1d)
+            for timeframe in ['1m', '1h', '1d']:
+                eth_data = symbol_features.get('ETH/USDT', {}).get(timeframe)
+                if eth_data is not None and not eth_data.empty:
+                    # Use last 60 candles for CNN input
+                    recent_data = eth_data.tail(60)
+                    features = recent_data[['open', 'high', 'low', 'close', 'volume']].values
+                    combined_features.append(features.flatten())
+            
+            # BTC features (1m)
+            btc_data = symbol_features.get('BTC/USDT', {}).get('1m')
+            if btc_data is not None and not btc_data.empty:
+                recent_data = btc_data.tail(60)
+                features = recent_data[['open', 'high', 'low', 'close', 'volume']].values
+                combined_features.append(features.flatten())
+            
+            if combined_features:
+                # Concatenate all features
+                full_features = np.concatenate(combined_features)
+                logger.info(f"CNN initialized with {len(full_features)} multi-symbol features")
+                
+                # Store for model access
+                if not hasattr(self, 'model_historical_features'):
+                    self.model_historical_features = {}
+                self.model_historical_features['cnn'] = full_features
+                
+        except Exception as e:
+            logger.error(f"Error initializing CNN with historical data: {e}")
+
+    def _initialize_dqn_with_data(self, symbol_features: Dict[str, Any]):
+        """Initialize DQN with multi-symbol state vectors"""
+        try:
+            # Create comprehensive state vector combining all symbols and timeframes
+            state_components = []
+            
+            for symbol in ['ETH/USDT', 'BTC/USDT']:
+                timeframes = ['1m', '1h', '1d'] if symbol == 'ETH/USDT' else ['1m']
+                
+                for timeframe in timeframes:
+                    data = symbol_features.get(symbol, {}).get(timeframe)
+                    if data is not None and not data.empty:
+                        # Extract key features for state
+                        latest = data.iloc[-1]
+                        state_features = [
+                            latest['close'],  # Current price
+                            latest['volume'], # Current volume
+                            data['close'].pct_change().iloc[-1] if len(data) > 1 else 0,  # Price change
+                        ]
+                        state_components.extend(state_features)
+            
+            if state_components:
+                # Pad or truncate to expected DQN state size
+                target_size = 100  # DQN expects 100-dimensional state
+                if len(state_components) < target_size:
+                    state_components.extend([0] * (target_size - len(state_components)))
+                else:
+                    state_components = state_components[:target_size]
+                
+                state_vector = np.array(state_components, dtype=np.float32)
+                logger.info(f"DQN initialized with {len(state_vector)} dimensional multi-symbol state")
+                
+                # Store for model access
+                if not hasattr(self, 'model_historical_features'):
+                    self.model_historical_features = {}
+                self.model_historical_features['dqn'] = state_vector
+                
+        except Exception as e:
+            logger.error(f"Error initializing DQN with historical data: {e}")
+
+    def _initialize_transformer_with_data(self, symbol_features: Dict[str, Any]):
+        """Initialize Transformer with multi-symbol sequence data"""
+        try:
+            # Prepare sequence data for transformer
+            sequences = []
+            
+            # ETH sequences
+            for timeframe in ['1m', '1h', '1d']:
+                eth_data = symbol_features.get('ETH/USDT', {}).get(timeframe)
+                if eth_data is not None and not eth_data.empty:
+                    # Use last 150 points as sequence
+                    sequence = eth_data.tail(150)[['open', 'high', 'low', 'close', 'volume']].values
+                    sequences.append(sequence)
+            
+            # BTC sequence
+            btc_data = symbol_features.get('BTC/USDT', {}).get('1m')
+            if btc_data is not None and not btc_data.empty:
+                sequence = btc_data.tail(150)[['open', 'high', 'low', 'close', 'volume']].values
+                sequences.append(sequence)
+            
+            if sequences:
+                logger.info(f"Transformer initialized with {len(sequences)} multi-symbol sequences")
+                
+                # Store for model access
+                if not hasattr(self, 'model_historical_features'):
+                    self.model_historical_features = {}
+                self.model_historical_features['transformer'] = sequences
+                
+        except Exception as e:
+            logger.error(f"Error initializing Transformer with historical data: {e}")
+
+    def _initialize_decision_with_data(self, symbol_features: Dict[str, Any]):
+        """Initialize Decision Fusion with comprehensive multi-symbol features"""
+        try:
+            # Aggregate all available features for decision fusion
+            all_features = {}
+            
+            for symbol in symbol_features:
+                for timeframe in symbol_features[symbol]:
+                    data = symbol_features[symbol][timeframe]
+                    if data is not None and not data.empty:
+                        key = f"{symbol}_{timeframe}"
+                        all_features[key] = {
+                            'latest_price': data['close'].iloc[-1],
+                            'volume': data['volume'].iloc[-1],
+                            'price_change': data['close'].pct_change().iloc[-1] if len(data) > 1 else 0,
+                            'volatility': data['close'].std() if len(data) > 1 else 0
+                        }
+            
+            if all_features:
+                logger.info(f"Decision Fusion initialized with {len(all_features)} symbol-timeframe combinations")
+                
+                # Store for model access
+                if not hasattr(self, 'model_historical_features'):
+                    self.model_historical_features = {}
+                self.model_historical_features['decision'] = all_features
+                
+        except Exception as e:
+            logger.error(f"Error initializing Decision Fusion with historical data: {e}")
+
    def get_ohlcv_data(self, symbol: str, timeframe: str, limit: int = 300) -> List:
        """Get OHLCV data for a symbol with specified timeframe and limit."""
        try: