new_2

2025-05-24 02:15:25 +03:00
parent 6e8ec97539
commit b181d11923
6 changed files with 1117 additions and 254 deletions
--- a/core/data_provider.py
+++ b/core/data_provider.py
@@ -149,41 +149,166 @@ class DataProvider:
            return None
    
    def _add_technical_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
-        """Add technical indicators to the DataFrame"""
+        """Add comprehensive technical indicators for multi-timeframe analysis"""
        try:
            df = df.copy()
            
-            # Moving averages
+            # Ensure we have enough data for indicators
+            if len(df) < 50:
+                logger.warning(f"Insufficient data for comprehensive indicators: {len(df)} rows")
+                return self._add_basic_indicators(df)
+            
+            # === TREND INDICATORS ===
+            # Moving averages (multiple timeframes)
+            df['sma_10'] = ta.trend.sma_indicator(df['close'], window=10)
            df['sma_20'] = ta.trend.sma_indicator(df['close'], window=20)
            df['sma_50'] = ta.trend.sma_indicator(df['close'], window=50)
            df['ema_12'] = ta.trend.ema_indicator(df['close'], window=12)
            df['ema_26'] = ta.trend.ema_indicator(df['close'], window=26)
+            df['ema_50'] = ta.trend.ema_indicator(df['close'], window=50)
            
-            # MACD
+            # MACD family
            macd = ta.trend.MACD(df['close'])
            df['macd'] = macd.macd()
            df['macd_signal'] = macd.macd_signal()
            df['macd_histogram'] = macd.macd_diff()
            
-            # RSI
-            df['rsi'] = ta.momentum.rsi(df['close'], window=14)
+            # ADX (Average Directional Index)
+            adx = ta.trend.ADXIndicator(df['high'], df['low'], df['close'])
+            df['adx'] = adx.adx()
+            df['adx_pos'] = adx.adx_pos()
+            df['adx_neg'] = adx.adx_neg()
            
+            # Parabolic SAR
+            psar = ta.trend.PSARIndicator(df['high'], df['low'], df['close'])
+            df['psar'] = psar.psar()
+            
+            # === MOMENTUM INDICATORS ===
+            # RSI (multiple periods)
+            df['rsi_14'] = ta.momentum.rsi(df['close'], window=14)
+            df['rsi_7'] = ta.momentum.rsi(df['close'], window=7)
+            df['rsi_21'] = ta.momentum.rsi(df['close'], window=21)
+            
+            # Stochastic Oscillator
+            stoch = ta.momentum.StochasticOscillator(df['high'], df['low'], df['close'])
+            df['stoch_k'] = stoch.stoch()
+            df['stoch_d'] = stoch.stoch_signal()
+            
+            # Williams %R
+            df['williams_r'] = ta.momentum.williams_r(df['high'], df['low'], df['close'])
+            
+            # Ultimate Oscillator (instead of CCI which isn't available)
+            df['ultimate_osc'] = ta.momentum.ultimate_oscillator(df['high'], df['low'], df['close'])
+            
+            # === VOLATILITY INDICATORS ===
            # Bollinger Bands
            bollinger = ta.volatility.BollingerBands(df['close'])
            df['bb_upper'] = bollinger.bollinger_hband()
            df['bb_lower'] = bollinger.bollinger_lband()
            df['bb_middle'] = bollinger.bollinger_mavg()
+            df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['bb_middle']
+            df['bb_percent'] = (df['close'] - df['bb_lower']) / (df['bb_upper'] - df['bb_lower'])
            
-            # Volume moving average (simple rolling mean since ta.volume.volume_sma doesn't exist)
-            df['volume_sma'] = df['volume'].rolling(window=20).mean()
+            # Average True Range
+            df['atr'] = ta.volatility.average_true_range(df['high'], df['low'], df['close'])
+            
+            # Keltner Channels
+            keltner = ta.volatility.KeltnerChannel(df['high'], df['low'], df['close'])
+            df['keltner_upper'] = keltner.keltner_channel_hband()
+            df['keltner_lower'] = keltner.keltner_channel_lband()
+            df['keltner_middle'] = keltner.keltner_channel_mband()
+            
+            # === VOLUME INDICATORS ===
+            # Volume moving averages
+            df['volume_sma_10'] = df['volume'].rolling(window=10).mean()
+            df['volume_sma_20'] = df['volume'].rolling(window=20).mean()
+            df['volume_sma_50'] = df['volume'].rolling(window=50).mean()
+            
+            # On Balance Volume
+            df['obv'] = ta.volume.on_balance_volume(df['close'], df['volume'])
+            
+            # Volume Price Trend
+            df['vpt'] = ta.volume.volume_price_trend(df['close'], df['volume'])
+            
+            # Money Flow Index
+            df['mfi'] = ta.volume.money_flow_index(df['high'], df['low'], df['close'], df['volume'])
+            
+            # Accumulation/Distribution Line
+            df['ad_line'] = ta.volume.acc_dist_index(df['high'], df['low'], df['close'], df['volume'])
+            
+            # Volume Weighted Average Price (VWAP)
+            df['vwap'] = (df['close'] * df['volume']).cumsum() / df['volume'].cumsum()
+            
+            # === PRICE ACTION INDICATORS ===
+            # Price position relative to range
+            df['price_position'] = (df['close'] - df['low']) / (df['high'] - df['low'])
+            
+            # True Range (use ATR calculation for true range)
+            df['true_range'] = df['atr']  # ATR is based on true range, so use it directly
+            
+            # Rate of Change
+            df['roc'] = ta.momentum.roc(df['close'], window=10)
+            
+            # === CUSTOM INDICATORS ===
+            # Trend strength (combination of multiple trend indicators)
+            df['trend_strength'] = (
+                (df['close'] > df['sma_20']).astype(int) +
+                (df['sma_10'] > df['sma_20']).astype(int) +
+                (df['macd'] > df['macd_signal']).astype(int) +
+                (df['adx'] > 25).astype(int)
+            ) / 4.0
+            
+            # Momentum composite
+            df['momentum_composite'] = (
+                (df['rsi_14'] / 100) +
+                ((df['stoch_k'] + 50) / 100) +  # Normalize stoch_k
+                ((df['williams_r'] + 50) / 100)  # Normalize williams_r
+            ) / 3.0
+            
+            # Volatility regime
+            df['volatility_regime'] = (df['atr'] / df['close']).rolling(window=20).rank(pct=True)
+            
+            # === FILL NaN VALUES ===
+            # Forward fill first, then backward fill, then zero fill
+            df = df.ffill().bfill().fillna(0)
+            
+            logger.debug(f"Added {len([col for col in df.columns if col not in ['timestamp', 'open', 'high', 'low', 'close', 'volume']])} technical indicators")
+            return df
+            
+        except Exception as e:
+            logger.error(f"Error adding comprehensive technical indicators: {e}")
+            # Fallback to basic indicators
+            return self._add_basic_indicators(df)
+    
+    def _add_basic_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Add basic indicators for small datasets"""
+        try:
+            df = df.copy()
+            
+            # Basic moving averages
+            if len(df) >= 20:
+                df['sma_20'] = ta.trend.sma_indicator(df['close'], window=20)
+                df['ema_12'] = ta.trend.ema_indicator(df['close'], window=12)
+            
+            # Basic RSI
+            if len(df) >= 14:
+                df['rsi_14'] = ta.momentum.rsi(df['close'], window=14)
+            
+            # Basic volume indicators
+            if len(df) >= 10:
+                df['volume_sma_10'] = df['volume'].rolling(window=10).mean()
+            
+            # Basic price action
+            df['price_position'] = (df['close'] - df['low']) / (df['high'] - df['low'])
+            df['price_position'] = df['price_position'].fillna(0.5)  # Default to middle
            
            # Fill NaN values
-            df = df.bfill().fillna(0)
+            df = df.ffill().bfill().fillna(0)
            
            return df
            
        except Exception as e:
-            logger.error(f"Error adding technical indicators: {e}")
+            logger.error(f"Error adding basic indicators: {e}")
            return df
    
    def _load_from_cache(self, symbol: str, timeframe: str) -> Optional[pd.DataFrame]:
@@ -381,37 +506,255 @@ class DataProvider:
    
    def get_feature_matrix(self, symbol: str, timeframes: List[str] = None, 
                          window_size: int = 20) -> Optional[np.ndarray]:
-        """Get feature matrix for multiple timeframes"""
+        """
+        Get comprehensive feature matrix for multiple timeframes with technical indicators
+        
+        Returns:
+            np.ndarray: Shape (n_timeframes, window_size, n_features)
+                       Each timeframe becomes a separate channel for CNN
+        """
        try:
            if timeframes is None:
                timeframes = self.timeframes
            
-            features = []
+            feature_channels = []
+            common_feature_names = None
            
+            # First pass: determine common features across all timeframes
+            timeframe_features = {}
            for tf in timeframes:
-                df = self.get_latest_candles(symbol, tf, limit=window_size + 50)
+                logger.debug(f"Processing timeframe {tf} for {symbol}")
+                df = self.get_latest_candles(symbol, tf, limit=window_size + 100)
                
-                if df is not None and len(df) >= window_size:
-                    # Select feature columns
-                    feature_cols = ['open', 'high', 'low', 'close', 'volume']
-                    if 'sma_20' in df.columns:
-                        feature_cols.extend(['sma_20', 'rsi', 'macd'])
-                    
-                    # Get the latest window
-                    tf_features = df[feature_cols].tail(window_size).values
-                    features.append(tf_features)
+                if df is None or len(df) < window_size:
+                    logger.warning(f"Insufficient data for {symbol} {tf}: {len(df) if df is not None else 0} rows")
+                    continue
+                
+                # Get feature columns
+                basic_cols = ['open', 'high', 'low', 'close', 'volume']
+                indicator_cols = [col for col in df.columns 
+                                if col not in basic_cols + ['timestamp'] and not col.startswith('unnamed')]
+                
+                selected_features = self._select_cnn_features(df, basic_cols, indicator_cols)
+                timeframe_features[tf] = (df, selected_features)
+                
+                if common_feature_names is None:
+                    common_feature_names = set(selected_features)
                else:
-                    logger.warning(f"Insufficient data for {symbol} {tf}")
-                    return None
+                    common_feature_names = common_feature_names.intersection(set(selected_features))
            
-            if features:
-                # Stack features from all timeframes
-                return np.stack(features, axis=0)  # Shape: (n_timeframes, window_size, n_features)
+            if not common_feature_names:
+                logger.error(f"No common features found across timeframes for {symbol}")
+                return None
+            
+            # Convert to sorted list for consistent ordering
+            common_feature_names = sorted(list(common_feature_names))
+            logger.info(f"Using {len(common_feature_names)} common features: {common_feature_names}")
+            
+            # Second pass: create feature channels with common features
+            for tf in timeframes:
+                if tf not in timeframe_features:
+                    continue
+                    
+                df, _ = timeframe_features[tf]
+                
+                # Use only common features
+                try:
+                    tf_features = self._normalize_features(df[common_feature_names].tail(window_size))
+                    
+                    if tf_features is not None and len(tf_features) == window_size:
+                        feature_channels.append(tf_features.values)
+                        logger.debug(f"Added {len(common_feature_names)} features for {tf}")
+                    else:
+                        logger.warning(f"Feature normalization failed for {tf}")
+                except Exception as e:
+                    logger.error(f"Error processing features for {tf}: {e}")
+                    continue
+            
+            if not feature_channels:
+                logger.error(f"No valid feature channels created for {symbol}")
+                return None
+            
+            # Verify all channels have the same shape
+            shapes = [channel.shape for channel in feature_channels]
+            if len(set(shapes)) > 1:
+                logger.error(f"Shape mismatch in feature channels: {shapes}")
+                return None
+            
+            # Stack all timeframe channels
+            feature_matrix = np.stack(feature_channels, axis=0)
+            
+            logger.info(f"Created feature matrix for {symbol}: {feature_matrix.shape} "
+                       f"({len(feature_channels)} timeframes, {window_size} steps, {len(common_feature_names)} features)")
+            
+            return feature_matrix
+            
+        except Exception as e:
+            logger.error(f"Error creating feature matrix for {symbol}: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return None
+    
+    def _select_cnn_features(self, df: pd.DataFrame, basic_cols: List[str], indicator_cols: List[str]) -> List[str]:
+        """Select the most important features for CNN training"""
+        try:
+            selected = []
+            
+            # Always include basic OHLCV (normalized)
+            selected.extend(basic_cols)
+            
+            # Priority indicators (most informative for CNNs)
+            priority_indicators = [
+                # Trend indicators
+                'sma_10', 'sma_20', 'sma_50', 'ema_12', 'ema_26', 'ema_50',
+                'macd', 'macd_signal', 'macd_histogram',
+                'adx', 'adx_pos', 'adx_neg', 'psar',
+                
+                # Momentum indicators  
+                'rsi_14', 'rsi_7', 'rsi_21',
+                'stoch_k', 'stoch_d', 'williams_r', 'ultimate_osc',
+                
+                # Volatility indicators
+                'bb_upper', 'bb_lower', 'bb_middle', 'bb_width', 'bb_percent',
+                'atr', 'keltner_upper', 'keltner_lower', 'keltner_middle',
+                
+                # Volume indicators
+                'volume_sma_10', 'volume_sma_20', 'obv', 'vpt', 'mfi', 'ad_line', 'vwap',
+                
+                # Price action
+                'price_position', 'true_range', 'roc',
+                
+                # Custom composites
+                'trend_strength', 'momentum_composite', 'volatility_regime'
+            ]
+            
+            # Add available priority indicators
+            for indicator in priority_indicators:
+                if indicator in indicator_cols:
+                    selected.append(indicator)
+            
+            # Add any other technical indicators not in priority list (limit to avoid curse of dimensionality)
+            remaining_indicators = [col for col in indicator_cols if col not in selected]
+            if remaining_indicators:
+                # Limit to 10 additional indicators
+                selected.extend(remaining_indicators[:10])
+            
+            # Verify all selected features exist in dataframe
+            final_selected = [col for col in selected if col in df.columns]
+            
+            logger.debug(f"Selected {len(final_selected)} features from {len(df.columns)} available columns")
+            return final_selected
+            
+        except Exception as e:
+            logger.error(f"Error selecting CNN features: {e}")
+            return basic_cols  # Fallback to basic OHLCV
+    
+    def _normalize_features(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
+        """Normalize features for CNN training"""
+        try:
+            df_norm = df.copy()
+            
+            # Handle different normalization strategies for different feature types
+            for col in df_norm.columns:
+                if col in ['open', 'high', 'low', 'close', 'sma_10', 'sma_20', 'sma_50', 
+                          'ema_12', 'ema_26', 'ema_50', 'bb_upper', 'bb_lower', 'bb_middle',
+                          'keltner_upper', 'keltner_lower', 'keltner_middle', 'psar', 'vwap']:
+                    # Price-based indicators: normalize by close price
+                    if 'close' in df_norm.columns:
+                        base_price = df_norm['close'].iloc[-1]  # Use latest close as reference
+                        if base_price > 0:
+                            df_norm[col] = df_norm[col] / base_price
+                
+                elif col == 'volume':
+                    # Volume: normalize by its own rolling mean
+                    volume_mean = df_norm[col].rolling(window=min(20, len(df_norm))).mean().iloc[-1]
+                    if volume_mean > 0:
+                        df_norm[col] = df_norm[col] / volume_mean
+                
+                elif col in ['rsi_14', 'rsi_7', 'rsi_21']:
+                    # RSI: already 0-100, normalize to 0-1
+                    df_norm[col] = df_norm[col] / 100.0
+                
+                elif col in ['stoch_k', 'stoch_d']:
+                    # Stochastic: already 0-100, normalize to 0-1
+                    df_norm[col] = df_norm[col] / 100.0
+                
+                elif col == 'williams_r':
+                    # Williams %R: -100 to 0, normalize to 0-1
+                    df_norm[col] = (df_norm[col] + 100) / 100.0
+                
+                elif col in ['macd', 'macd_signal', 'macd_histogram']:
+                    # MACD: normalize by ATR or close price
+                    if 'atr' in df_norm.columns and df_norm['atr'].iloc[-1] > 0:
+                        df_norm[col] = df_norm[col] / df_norm['atr'].iloc[-1]
+                    elif 'close' in df_norm.columns:
+                        df_norm[col] = df_norm[col] / df_norm['close'].iloc[-1]
+                
+                elif col in ['bb_width', 'bb_percent', 'price_position', 'trend_strength', 
+                            'momentum_composite', 'volatility_regime']:
+                    # Already normalized indicators: ensure 0-1 range
+                    df_norm[col] = np.clip(df_norm[col], 0, 1)
+                
+                elif col in ['atr', 'true_range']:
+                    # Volatility indicators: normalize by close price
+                    if 'close' in df_norm.columns:
+                        df_norm[col] = df_norm[col] / df_norm['close'].iloc[-1]
+                
+                else:
+                    # Other indicators: z-score normalization
+                    col_mean = df_norm[col].rolling(window=min(20, len(df_norm))).mean().iloc[-1]
+                    col_std = df_norm[col].rolling(window=min(20, len(df_norm))).std().iloc[-1]
+                    if col_std > 0:
+                        df_norm[col] = (df_norm[col] - col_mean) / col_std
+                    else:
+                        df_norm[col] = 0
+            
+            # Replace inf/-inf with 0
+            df_norm = df_norm.replace([np.inf, -np.inf], 0)
+            
+            # Fill any remaining NaN values
+            df_norm = df_norm.fillna(0)
+            
+            return df_norm
+            
+        except Exception as e:
+            logger.error(f"Error normalizing features: {e}")
+            return df
+    
+    def get_multi_symbol_feature_matrix(self, symbols: List[str] = None, 
+                                      timeframes: List[str] = None, 
+                                      window_size: int = 20) -> Optional[np.ndarray]:
+        """
+        Get feature matrix for multiple symbols and timeframes
+        
+        Returns:
+            np.ndarray: Shape (n_symbols, n_timeframes, window_size, n_features)
+        """
+        try:
+            if symbols is None:
+                symbols = self.symbols
+            if timeframes is None:
+                timeframes = self.timeframes
+            
+            symbol_matrices = []
+            
+            for symbol in symbols:
+                symbol_matrix = self.get_feature_matrix(symbol, timeframes, window_size)
+                if symbol_matrix is not None:
+                    symbol_matrices.append(symbol_matrix)
+                else:
+                    logger.warning(f"Could not create feature matrix for {symbol}")
+            
+            if symbol_matrices:
+                # Stack all symbol matrices
+                multi_symbol_matrix = np.stack(symbol_matrices, axis=0)
+                logger.info(f"Created multi-symbol feature matrix: {multi_symbol_matrix.shape}")
+                return multi_symbol_matrix
            
            return None
            
        except Exception as e:
-            logger.error(f"Error creating feature matrix for {symbol}: {e}")
+            logger.error(f"Error creating multi-symbol feature matrix: {e}")
            return None
    
    def health_check(self) -> Dict[str, Any]: