new_2
This commit is contained in:
@ -149,41 +149,166 @@ class DataProvider:
|
||||
return None
|
||||
|
||||
def _add_technical_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Add technical indicators to the DataFrame"""
|
||||
"""Add comprehensive technical indicators for multi-timeframe analysis"""
|
||||
try:
|
||||
df = df.copy()
|
||||
|
||||
# Moving averages
|
||||
# Ensure we have enough data for indicators
|
||||
if len(df) < 50:
|
||||
logger.warning(f"Insufficient data for comprehensive indicators: {len(df)} rows")
|
||||
return self._add_basic_indicators(df)
|
||||
|
||||
# === TREND INDICATORS ===
|
||||
# Moving averages (multiple timeframes)
|
||||
df['sma_10'] = ta.trend.sma_indicator(df['close'], window=10)
|
||||
df['sma_20'] = ta.trend.sma_indicator(df['close'], window=20)
|
||||
df['sma_50'] = ta.trend.sma_indicator(df['close'], window=50)
|
||||
df['ema_12'] = ta.trend.ema_indicator(df['close'], window=12)
|
||||
df['ema_26'] = ta.trend.ema_indicator(df['close'], window=26)
|
||||
df['ema_50'] = ta.trend.ema_indicator(df['close'], window=50)
|
||||
|
||||
# MACD
|
||||
# MACD family
|
||||
macd = ta.trend.MACD(df['close'])
|
||||
df['macd'] = macd.macd()
|
||||
df['macd_signal'] = macd.macd_signal()
|
||||
df['macd_histogram'] = macd.macd_diff()
|
||||
|
||||
# RSI
|
||||
df['rsi'] = ta.momentum.rsi(df['close'], window=14)
|
||||
# ADX (Average Directional Index)
|
||||
adx = ta.trend.ADXIndicator(df['high'], df['low'], df['close'])
|
||||
df['adx'] = adx.adx()
|
||||
df['adx_pos'] = adx.adx_pos()
|
||||
df['adx_neg'] = adx.adx_neg()
|
||||
|
||||
# Parabolic SAR
|
||||
psar = ta.trend.PSARIndicator(df['high'], df['low'], df['close'])
|
||||
df['psar'] = psar.psar()
|
||||
|
||||
# === MOMENTUM INDICATORS ===
|
||||
# RSI (multiple periods)
|
||||
df['rsi_14'] = ta.momentum.rsi(df['close'], window=14)
|
||||
df['rsi_7'] = ta.momentum.rsi(df['close'], window=7)
|
||||
df['rsi_21'] = ta.momentum.rsi(df['close'], window=21)
|
||||
|
||||
# Stochastic Oscillator
|
||||
stoch = ta.momentum.StochasticOscillator(df['high'], df['low'], df['close'])
|
||||
df['stoch_k'] = stoch.stoch()
|
||||
df['stoch_d'] = stoch.stoch_signal()
|
||||
|
||||
# Williams %R
|
||||
df['williams_r'] = ta.momentum.williams_r(df['high'], df['low'], df['close'])
|
||||
|
||||
# Ultimate Oscillator (instead of CCI which isn't available)
|
||||
df['ultimate_osc'] = ta.momentum.ultimate_oscillator(df['high'], df['low'], df['close'])
|
||||
|
||||
# === VOLATILITY INDICATORS ===
|
||||
# Bollinger Bands
|
||||
bollinger = ta.volatility.BollingerBands(df['close'])
|
||||
df['bb_upper'] = bollinger.bollinger_hband()
|
||||
df['bb_lower'] = bollinger.bollinger_lband()
|
||||
df['bb_middle'] = bollinger.bollinger_mavg()
|
||||
df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['bb_middle']
|
||||
df['bb_percent'] = (df['close'] - df['bb_lower']) / (df['bb_upper'] - df['bb_lower'])
|
||||
|
||||
# Volume moving average (simple rolling mean since ta.volume.volume_sma doesn't exist)
|
||||
df['volume_sma'] = df['volume'].rolling(window=20).mean()
|
||||
# Average True Range
|
||||
df['atr'] = ta.volatility.average_true_range(df['high'], df['low'], df['close'])
|
||||
|
||||
# Keltner Channels
|
||||
keltner = ta.volatility.KeltnerChannel(df['high'], df['low'], df['close'])
|
||||
df['keltner_upper'] = keltner.keltner_channel_hband()
|
||||
df['keltner_lower'] = keltner.keltner_channel_lband()
|
||||
df['keltner_middle'] = keltner.keltner_channel_mband()
|
||||
|
||||
# === VOLUME INDICATORS ===
|
||||
# Volume moving averages
|
||||
df['volume_sma_10'] = df['volume'].rolling(window=10).mean()
|
||||
df['volume_sma_20'] = df['volume'].rolling(window=20).mean()
|
||||
df['volume_sma_50'] = df['volume'].rolling(window=50).mean()
|
||||
|
||||
# On Balance Volume
|
||||
df['obv'] = ta.volume.on_balance_volume(df['close'], df['volume'])
|
||||
|
||||
# Volume Price Trend
|
||||
df['vpt'] = ta.volume.volume_price_trend(df['close'], df['volume'])
|
||||
|
||||
# Money Flow Index
|
||||
df['mfi'] = ta.volume.money_flow_index(df['high'], df['low'], df['close'], df['volume'])
|
||||
|
||||
# Accumulation/Distribution Line
|
||||
df['ad_line'] = ta.volume.acc_dist_index(df['high'], df['low'], df['close'], df['volume'])
|
||||
|
||||
# Volume Weighted Average Price (VWAP)
|
||||
df['vwap'] = (df['close'] * df['volume']).cumsum() / df['volume'].cumsum()
|
||||
|
||||
# === PRICE ACTION INDICATORS ===
|
||||
# Price position relative to range
|
||||
df['price_position'] = (df['close'] - df['low']) / (df['high'] - df['low'])
|
||||
|
||||
# True Range (use ATR calculation for true range)
|
||||
df['true_range'] = df['atr'] # ATR is based on true range, so use it directly
|
||||
|
||||
# Rate of Change
|
||||
df['roc'] = ta.momentum.roc(df['close'], window=10)
|
||||
|
||||
# === CUSTOM INDICATORS ===
|
||||
# Trend strength (combination of multiple trend indicators)
|
||||
df['trend_strength'] = (
|
||||
(df['close'] > df['sma_20']).astype(int) +
|
||||
(df['sma_10'] > df['sma_20']).astype(int) +
|
||||
(df['macd'] > df['macd_signal']).astype(int) +
|
||||
(df['adx'] > 25).astype(int)
|
||||
) / 4.0
|
||||
|
||||
# Momentum composite
|
||||
df['momentum_composite'] = (
|
||||
(df['rsi_14'] / 100) +
|
||||
((df['stoch_k'] + 50) / 100) + # Normalize stoch_k
|
||||
((df['williams_r'] + 50) / 100) # Normalize williams_r
|
||||
) / 3.0
|
||||
|
||||
# Volatility regime
|
||||
df['volatility_regime'] = (df['atr'] / df['close']).rolling(window=20).rank(pct=True)
|
||||
|
||||
# === FILL NaN VALUES ===
|
||||
# Forward fill first, then backward fill, then zero fill
|
||||
df = df.ffill().bfill().fillna(0)
|
||||
|
||||
logger.debug(f"Added {len([col for col in df.columns if col not in ['timestamp', 'open', 'high', 'low', 'close', 'volume']])} technical indicators")
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding comprehensive technical indicators: {e}")
|
||||
# Fallback to basic indicators
|
||||
return self._add_basic_indicators(df)
|
||||
|
||||
def _add_basic_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Add basic indicators for small datasets"""
|
||||
try:
|
||||
df = df.copy()
|
||||
|
||||
# Basic moving averages
|
||||
if len(df) >= 20:
|
||||
df['sma_20'] = ta.trend.sma_indicator(df['close'], window=20)
|
||||
df['ema_12'] = ta.trend.ema_indicator(df['close'], window=12)
|
||||
|
||||
# Basic RSI
|
||||
if len(df) >= 14:
|
||||
df['rsi_14'] = ta.momentum.rsi(df['close'], window=14)
|
||||
|
||||
# Basic volume indicators
|
||||
if len(df) >= 10:
|
||||
df['volume_sma_10'] = df['volume'].rolling(window=10).mean()
|
||||
|
||||
# Basic price action
|
||||
df['price_position'] = (df['close'] - df['low']) / (df['high'] - df['low'])
|
||||
df['price_position'] = df['price_position'].fillna(0.5) # Default to middle
|
||||
|
||||
# Fill NaN values
|
||||
df = df.bfill().fillna(0)
|
||||
df = df.ffill().bfill().fillna(0)
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding technical indicators: {e}")
|
||||
logger.error(f"Error adding basic indicators: {e}")
|
||||
return df
|
||||
|
||||
def _load_from_cache(self, symbol: str, timeframe: str) -> Optional[pd.DataFrame]:
|
||||
@ -381,37 +506,255 @@ class DataProvider:
|
||||
|
||||
def get_feature_matrix(self, symbol: str, timeframes: List[str] = None,
|
||||
window_size: int = 20) -> Optional[np.ndarray]:
|
||||
"""Get feature matrix for multiple timeframes"""
|
||||
"""
|
||||
Get comprehensive feature matrix for multiple timeframes with technical indicators
|
||||
|
||||
Returns:
|
||||
np.ndarray: Shape (n_timeframes, window_size, n_features)
|
||||
Each timeframe becomes a separate channel for CNN
|
||||
"""
|
||||
try:
|
||||
if timeframes is None:
|
||||
timeframes = self.timeframes
|
||||
|
||||
features = []
|
||||
feature_channels = []
|
||||
common_feature_names = None
|
||||
|
||||
# First pass: determine common features across all timeframes
|
||||
timeframe_features = {}
|
||||
for tf in timeframes:
|
||||
df = self.get_latest_candles(symbol, tf, limit=window_size + 50)
|
||||
logger.debug(f"Processing timeframe {tf} for {symbol}")
|
||||
df = self.get_latest_candles(symbol, tf, limit=window_size + 100)
|
||||
|
||||
if df is not None and len(df) >= window_size:
|
||||
# Select feature columns
|
||||
feature_cols = ['open', 'high', 'low', 'close', 'volume']
|
||||
if 'sma_20' in df.columns:
|
||||
feature_cols.extend(['sma_20', 'rsi', 'macd'])
|
||||
|
||||
# Get the latest window
|
||||
tf_features = df[feature_cols].tail(window_size).values
|
||||
features.append(tf_features)
|
||||
if df is None or len(df) < window_size:
|
||||
logger.warning(f"Insufficient data for {symbol} {tf}: {len(df) if df is not None else 0} rows")
|
||||
continue
|
||||
|
||||
# Get feature columns
|
||||
basic_cols = ['open', 'high', 'low', 'close', 'volume']
|
||||
indicator_cols = [col for col in df.columns
|
||||
if col not in basic_cols + ['timestamp'] and not col.startswith('unnamed')]
|
||||
|
||||
selected_features = self._select_cnn_features(df, basic_cols, indicator_cols)
|
||||
timeframe_features[tf] = (df, selected_features)
|
||||
|
||||
if common_feature_names is None:
|
||||
common_feature_names = set(selected_features)
|
||||
else:
|
||||
logger.warning(f"Insufficient data for {symbol} {tf}")
|
||||
return None
|
||||
common_feature_names = common_feature_names.intersection(set(selected_features))
|
||||
|
||||
if features:
|
||||
# Stack features from all timeframes
|
||||
return np.stack(features, axis=0) # Shape: (n_timeframes, window_size, n_features)
|
||||
if not common_feature_names:
|
||||
logger.error(f"No common features found across timeframes for {symbol}")
|
||||
return None
|
||||
|
||||
# Convert to sorted list for consistent ordering
|
||||
common_feature_names = sorted(list(common_feature_names))
|
||||
logger.info(f"Using {len(common_feature_names)} common features: {common_feature_names}")
|
||||
|
||||
# Second pass: create feature channels with common features
|
||||
for tf in timeframes:
|
||||
if tf not in timeframe_features:
|
||||
continue
|
||||
|
||||
df, _ = timeframe_features[tf]
|
||||
|
||||
# Use only common features
|
||||
try:
|
||||
tf_features = self._normalize_features(df[common_feature_names].tail(window_size))
|
||||
|
||||
if tf_features is not None and len(tf_features) == window_size:
|
||||
feature_channels.append(tf_features.values)
|
||||
logger.debug(f"Added {len(common_feature_names)} features for {tf}")
|
||||
else:
|
||||
logger.warning(f"Feature normalization failed for {tf}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing features for {tf}: {e}")
|
||||
continue
|
||||
|
||||
if not feature_channels:
|
||||
logger.error(f"No valid feature channels created for {symbol}")
|
||||
return None
|
||||
|
||||
# Verify all channels have the same shape
|
||||
shapes = [channel.shape for channel in feature_channels]
|
||||
if len(set(shapes)) > 1:
|
||||
logger.error(f"Shape mismatch in feature channels: {shapes}")
|
||||
return None
|
||||
|
||||
# Stack all timeframe channels
|
||||
feature_matrix = np.stack(feature_channels, axis=0)
|
||||
|
||||
logger.info(f"Created feature matrix for {symbol}: {feature_matrix.shape} "
|
||||
f"({len(feature_channels)} timeframes, {window_size} steps, {len(common_feature_names)} features)")
|
||||
|
||||
return feature_matrix
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating feature matrix for {symbol}: {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
return None
|
||||
|
||||
def _select_cnn_features(self, df: pd.DataFrame, basic_cols: List[str], indicator_cols: List[str]) -> List[str]:
|
||||
"""Select the most important features for CNN training"""
|
||||
try:
|
||||
selected = []
|
||||
|
||||
# Always include basic OHLCV (normalized)
|
||||
selected.extend(basic_cols)
|
||||
|
||||
# Priority indicators (most informative for CNNs)
|
||||
priority_indicators = [
|
||||
# Trend indicators
|
||||
'sma_10', 'sma_20', 'sma_50', 'ema_12', 'ema_26', 'ema_50',
|
||||
'macd', 'macd_signal', 'macd_histogram',
|
||||
'adx', 'adx_pos', 'adx_neg', 'psar',
|
||||
|
||||
# Momentum indicators
|
||||
'rsi_14', 'rsi_7', 'rsi_21',
|
||||
'stoch_k', 'stoch_d', 'williams_r', 'ultimate_osc',
|
||||
|
||||
# Volatility indicators
|
||||
'bb_upper', 'bb_lower', 'bb_middle', 'bb_width', 'bb_percent',
|
||||
'atr', 'keltner_upper', 'keltner_lower', 'keltner_middle',
|
||||
|
||||
# Volume indicators
|
||||
'volume_sma_10', 'volume_sma_20', 'obv', 'vpt', 'mfi', 'ad_line', 'vwap',
|
||||
|
||||
# Price action
|
||||
'price_position', 'true_range', 'roc',
|
||||
|
||||
# Custom composites
|
||||
'trend_strength', 'momentum_composite', 'volatility_regime'
|
||||
]
|
||||
|
||||
# Add available priority indicators
|
||||
for indicator in priority_indicators:
|
||||
if indicator in indicator_cols:
|
||||
selected.append(indicator)
|
||||
|
||||
# Add any other technical indicators not in priority list (limit to avoid curse of dimensionality)
|
||||
remaining_indicators = [col for col in indicator_cols if col not in selected]
|
||||
if remaining_indicators:
|
||||
# Limit to 10 additional indicators
|
||||
selected.extend(remaining_indicators[:10])
|
||||
|
||||
# Verify all selected features exist in dataframe
|
||||
final_selected = [col for col in selected if col in df.columns]
|
||||
|
||||
logger.debug(f"Selected {len(final_selected)} features from {len(df.columns)} available columns")
|
||||
return final_selected
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error selecting CNN features: {e}")
|
||||
return basic_cols # Fallback to basic OHLCV
|
||||
|
||||
def _normalize_features(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
|
||||
"""Normalize features for CNN training"""
|
||||
try:
|
||||
df_norm = df.copy()
|
||||
|
||||
# Handle different normalization strategies for different feature types
|
||||
for col in df_norm.columns:
|
||||
if col in ['open', 'high', 'low', 'close', 'sma_10', 'sma_20', 'sma_50',
|
||||
'ema_12', 'ema_26', 'ema_50', 'bb_upper', 'bb_lower', 'bb_middle',
|
||||
'keltner_upper', 'keltner_lower', 'keltner_middle', 'psar', 'vwap']:
|
||||
# Price-based indicators: normalize by close price
|
||||
if 'close' in df_norm.columns:
|
||||
base_price = df_norm['close'].iloc[-1] # Use latest close as reference
|
||||
if base_price > 0:
|
||||
df_norm[col] = df_norm[col] / base_price
|
||||
|
||||
elif col == 'volume':
|
||||
# Volume: normalize by its own rolling mean
|
||||
volume_mean = df_norm[col].rolling(window=min(20, len(df_norm))).mean().iloc[-1]
|
||||
if volume_mean > 0:
|
||||
df_norm[col] = df_norm[col] / volume_mean
|
||||
|
||||
elif col in ['rsi_14', 'rsi_7', 'rsi_21']:
|
||||
# RSI: already 0-100, normalize to 0-1
|
||||
df_norm[col] = df_norm[col] / 100.0
|
||||
|
||||
elif col in ['stoch_k', 'stoch_d']:
|
||||
# Stochastic: already 0-100, normalize to 0-1
|
||||
df_norm[col] = df_norm[col] / 100.0
|
||||
|
||||
elif col == 'williams_r':
|
||||
# Williams %R: -100 to 0, normalize to 0-1
|
||||
df_norm[col] = (df_norm[col] + 100) / 100.0
|
||||
|
||||
elif col in ['macd', 'macd_signal', 'macd_histogram']:
|
||||
# MACD: normalize by ATR or close price
|
||||
if 'atr' in df_norm.columns and df_norm['atr'].iloc[-1] > 0:
|
||||
df_norm[col] = df_norm[col] / df_norm['atr'].iloc[-1]
|
||||
elif 'close' in df_norm.columns:
|
||||
df_norm[col] = df_norm[col] / df_norm['close'].iloc[-1]
|
||||
|
||||
elif col in ['bb_width', 'bb_percent', 'price_position', 'trend_strength',
|
||||
'momentum_composite', 'volatility_regime']:
|
||||
# Already normalized indicators: ensure 0-1 range
|
||||
df_norm[col] = np.clip(df_norm[col], 0, 1)
|
||||
|
||||
elif col in ['atr', 'true_range']:
|
||||
# Volatility indicators: normalize by close price
|
||||
if 'close' in df_norm.columns:
|
||||
df_norm[col] = df_norm[col] / df_norm['close'].iloc[-1]
|
||||
|
||||
else:
|
||||
# Other indicators: z-score normalization
|
||||
col_mean = df_norm[col].rolling(window=min(20, len(df_norm))).mean().iloc[-1]
|
||||
col_std = df_norm[col].rolling(window=min(20, len(df_norm))).std().iloc[-1]
|
||||
if col_std > 0:
|
||||
df_norm[col] = (df_norm[col] - col_mean) / col_std
|
||||
else:
|
||||
df_norm[col] = 0
|
||||
|
||||
# Replace inf/-inf with 0
|
||||
df_norm = df_norm.replace([np.inf, -np.inf], 0)
|
||||
|
||||
# Fill any remaining NaN values
|
||||
df_norm = df_norm.fillna(0)
|
||||
|
||||
return df_norm
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error normalizing features: {e}")
|
||||
return df
|
||||
|
||||
def get_multi_symbol_feature_matrix(self, symbols: List[str] = None,
|
||||
timeframes: List[str] = None,
|
||||
window_size: int = 20) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Get feature matrix for multiple symbols and timeframes
|
||||
|
||||
Returns:
|
||||
np.ndarray: Shape (n_symbols, n_timeframes, window_size, n_features)
|
||||
"""
|
||||
try:
|
||||
if symbols is None:
|
||||
symbols = self.symbols
|
||||
if timeframes is None:
|
||||
timeframes = self.timeframes
|
||||
|
||||
symbol_matrices = []
|
||||
|
||||
for symbol in symbols:
|
||||
symbol_matrix = self.get_feature_matrix(symbol, timeframes, window_size)
|
||||
if symbol_matrix is not None:
|
||||
symbol_matrices.append(symbol_matrix)
|
||||
else:
|
||||
logger.warning(f"Could not create feature matrix for {symbol}")
|
||||
|
||||
if symbol_matrices:
|
||||
# Stack all symbol matrices
|
||||
multi_symbol_matrix = np.stack(symbol_matrices, axis=0)
|
||||
logger.info(f"Created multi-symbol feature matrix: {multi_symbol_matrix.shape}")
|
||||
return multi_symbol_matrix
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating feature matrix for {symbol}: {e}")
|
||||
logger.error(f"Error creating multi-symbol feature matrix: {e}")
|
||||
return None
|
||||
|
||||
def health_check(self) -> Dict[str, Any]:
|
||||
|
Reference in New Issue
Block a user