new data flow (trading works)

2025-05-29 15:10:44 +03:00
parent 3f4e9b9774
commit 3e697acf08
3 changed files with 605 additions and 68 deletions
--- a/.gitignore
+++ b/.gitignore
@ -36,3 +36,4 @@ models/trading_agent_best_pnl.pt
 NN/models/saved/hybrid_stats_20250409_022901.json
 *__pycache__*
 *.png
 closed_trades_history.json
--- a/core/data_provider.py
+++ b/core/data_provider.py
@ -349,15 +349,15 @@ class DataProvider:
            df = df[['timestamp', 'open', 'high', 'low', 'close', 'volume']]
            df = df.sort_values('timestamp').reset_index(drop=True)
-            logger.info(f"✅ MEXC: Fetched {len(df)} candles for {symbol} {timeframe}")
+            logger.info(f"MEXC: Fetched {len(df)} candles for {symbol} {timeframe}")
            return df
        except Exception as e:
-            logger.error(f"❌ MEXC: Error fetching data: {e}")
+            logger.error(f"MEXC: Error fetching data: {e}")
            return None
    def _fetch_from_binance(self, symbol: str, timeframe: str, limit: int) -> Optional[pd.DataFrame]:
-        """Fetch data from Binance API (primary data source)"""
+        """Fetch data from Binance API (primary data source) with HTTP 451 error handling"""
        try:
            # Convert symbol format
            binance_symbol = symbol.replace('/', '').upper()
@ -369,7 +369,7 @@ class DataProvider:
            }
            binance_timeframe = timeframe_map.get(timeframe, '1h')
-            # API request
+            # API request with timeout and better headers
            url = "https://api.binance.com/api/v3/klines"
            params = {
                'symbol': binance_symbol,
@ -377,7 +377,19 @@ class DataProvider:
                'limit': limit
            }
-            response = requests.get(url, params=params)
+            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
                'Accept': 'application/json',
                'Connection': 'keep-alive'
            }
            response = requests.get(url, params=params, headers=headers, timeout=10)
            # Handle HTTP 451 (Unavailable For Legal Reasons) specifically
            if response.status_code == 451:
                logger.warning(f"Binance API returned 451 (blocked) for {symbol} {timeframe} - using fallback")
                return self._get_fallback_data(symbol, timeframe, limit)
            response.raise_for_status()
            data = response.json()
@ -402,9 +414,40 @@ class DataProvider:
            return df
        except Exception as e:
-            logger.error(f"Error fetching from Binance API: {e}")
+            if "451" in str(e) or "Client Error" in str(e):
                logger.warning(f"Binance API access blocked (451) for {symbol} {timeframe} - using fallback")
                return self._get_fallback_data(symbol, timeframe, limit)
            else:
                logger.error(f"Error fetching from Binance API: {e}")
                return self._get_fallback_data(symbol, timeframe, limit)
    def _get_fallback_data(self, symbol: str, timeframe: str, limit: int) -> Optional[pd.DataFrame]:
        """Get fallback data when Binance API is unavailable - REAL DATA ONLY"""
        try:
            logger.info(f"FALLBACK: Attempting to get real cached data for {symbol} {timeframe}")
            # ONLY try cached data
            cached_data = self._load_from_cache(symbol, timeframe)
            if cached_data is not None and not cached_data.empty:
                # Limit to requested amount
                limited_data = cached_data.tail(limit) if len(cached_data) > limit else cached_data
                logger.info(f"FALLBACK: Using cached real data for {symbol} {timeframe}: {len(limited_data)} bars")
                return limited_data
            # Try MEXC as secondary real data source
            mexc_data = self._fetch_from_mexc(symbol, timeframe, limit)
            if mexc_data is not None and not mexc_data.empty:
                logger.info(f"FALLBACK: Using MEXC real data for {symbol} {timeframe}: {len(mexc_data)} bars")
                return mexc_data
            # NO SYNTHETIC DATA - Return None if no real data available
            logger.warning(f"FALLBACK: No real data available for {symbol} {timeframe} - waiting for real data")
            return None
-    
+            
        except Exception as e:
            logger.error(f"Error getting fallback data: {e}")
            return None
    def _add_technical_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
        """Add comprehensive technical indicators for multi-timeframe analysis"""
        try:
--- a/web/dashboard.py
+++ b/web/dashboard.py
@ -99,6 +99,96 @@ except ImportError:
 logger = logging.getLogger(__name__)
 class AdaptiveThresholdLearner:
    """Learn optimal confidence thresholds based on real trade outcomes"""
    def __init__(self, initial_threshold: float = 0.30):
        self.base_threshold = initial_threshold
        self.current_threshold = initial_threshold
        self.trade_outcomes = deque(maxlen=100)
        self.threshold_history = deque(maxlen=50)
        self.learning_rate = 0.02
        self.min_threshold = 0.20
        self.max_threshold = 0.70
        logger.info(f"[ADAPTIVE] Initialized with starting threshold: {initial_threshold:.2%}")
    def record_trade_outcome(self, confidence: float, pnl: float, threshold_used: float):
        """Record a trade outcome to learn from"""
        try:
            outcome = {
                'confidence': confidence,
                'pnl': pnl,
                'profitable': pnl > 0,
                'threshold_used': threshold_used,
                'timestamp': datetime.now()
            }
            self.trade_outcomes.append(outcome)
            # Learn from outcomes
            if len(self.trade_outcomes) >= 10:
                self._update_threshold()
        except Exception as e:
            logger.error(f"Error recording trade outcome: {e}")
    def _update_threshold(self):
        """Update threshold based on recent trade statistics"""
        try:
            recent_trades = list(self.trade_outcomes)[-20:]
            if len(recent_trades) < 10:
                return
            profitable_count = sum(1 for t in recent_trades if t['profitable'])
            win_rate = profitable_count / len(recent_trades)
            avg_pnl = sum(t['pnl'] for t in recent_trades) / len(recent_trades)
            # Adaptive adjustment logic
            if win_rate > 0.60 and avg_pnl > 0.20:
                adjustment = -self.learning_rate * 1.5  # Lower threshold for more trades
            elif win_rate < 0.40 or avg_pnl < -0.30:
                adjustment = self.learning_rate * 2.0  # Raise threshold to be more selective
            else:
                adjustment = 0  # No change
            old_threshold = self.current_threshold
            self.current_threshold = max(self.min_threshold, 
                                       min(self.max_threshold, 
                                           self.current_threshold + adjustment))
            if abs(self.current_threshold - old_threshold) > 0.005:
                logger.info(f"[ADAPTIVE] Threshold: {old_threshold:.2%} -> {self.current_threshold:.2%} (WR: {win_rate:.1%}, PnL: ${avg_pnl:.2f})")
        except Exception as e:
            logger.error(f"Error updating adaptive threshold: {e}")
    def get_current_threshold(self) -> float:
        return self.current_threshold
    def get_learning_stats(self) -> Dict[str, Any]:
        """Get learning statistics"""
        try:
            if not self.trade_outcomes:
                return {'status': 'No trades recorded yet'}
            recent_trades = list(self.trade_outcomes)[-20:]
            profitable_count = sum(1 for t in recent_trades if t['profitable'])
            win_rate = profitable_count / len(recent_trades) if recent_trades else 0
            avg_pnl = sum(t['pnl'] for t in recent_trades) / len(recent_trades) if recent_trades else 0
            return {
                'current_threshold': self.current_threshold,
                'base_threshold': self.base_threshold,
                'total_trades': len(self.trade_outcomes),
                'recent_win_rate': win_rate,
                'recent_avg_pnl': avg_pnl,
                'threshold_changes': len(self.threshold_history),
                'learning_active': len(self.trade_outcomes) >= 10
            }
        except Exception as e:
            return {'error': str(e)}
 class TradingDashboard:
    """Modern trading dashboard with real-time updates and enhanced RL training integration"""
@ -156,11 +246,15 @@ class TradingDashboard:
        # Load existing closed trades from file
        self._load_closed_trades_from_file()
-        # Signal execution settings for scalping
+        # Signal execution settings for scalping - REMOVED FREQUENCY LIMITS
-        self.min_confidence_threshold = 0.65  # Only execute trades above this confidence
+        self.min_confidence_threshold = 0.30  # Start lower to allow learning  
-        self.signal_cooldown = 5  # Minimum seconds between signals
+        self.signal_cooldown = 0  # REMOVED: Model decides when to act, no artificial delays
        self.last_signal_time = 0
        # Adaptive threshold learning - starts low and learns optimal thresholds
        self.adaptive_learner = AdaptiveThresholdLearner(initial_threshold=0.30)
        logger.info("[ADAPTIVE] Adaptive threshold learning enabled - will adjust based on trade outcomes")
        # Real-time tick data infrastructure
        self.tick_cache = deque(maxlen=54000)  # 15 minutes * 60 seconds * 60 ticks/second = 54000 ticks
        self.one_second_bars = deque(maxlen=900)  # 15 minutes of 1-second bars
@ -745,39 +839,36 @@ class TradingDashboard:
                    logger.warning(f"[CHART_ERROR] Error getting chart data: {e}")
                    chart_data = None
-                # Generate demo trading signals for dashboard display
+                # Generate trading signals based on model decisions - NO FREQUENCY LIMITS
                try:
                    if current_price and chart_data is not None and not chart_data.empty and len(chart_data) >= 5:
-                        current_time = time.time()
+                        # Model decides when to act - check every update for signals
-                        
+                        signal = self._generate_trading_signal(symbol, current_price, chart_data)
-                        # Generate signals more frequently for demo (every 5 updates = 5 seconds)
+                        if signal:
-                        if n_intervals % 5 == 0 and (current_time - self.last_signal_time) >= self.signal_cooldown:
+                            # Add to signals list (all signals, regardless of execution)
-                            signal = self._generate_trading_signal(symbol, current_price, chart_data)
+                            signal['signal_type'] = 'GENERATED'
-                            if signal:
+                            self.recent_signals.append(signal.copy())
-                                self.last_signal_time = current_time
+                            if len(self.recent_signals) > 100:  # Keep last 100 signals
-                                
+                                self.recent_signals = self.recent_signals[-100:]
-                                # Add to signals list (all signals, regardless of execution)
+                            
-                                signal['signal_type'] = 'GENERATED'
+                            # Use adaptive threshold instead of fixed threshold
-                                self.recent_signals.append(signal.copy())
+                            current_threshold = self.adaptive_learner.get_current_threshold()
-                                if len(self.recent_signals) > 100:  # Keep last 100 signals
+                            should_execute = signal['confidence'] >= current_threshold
-                                    self.recent_signals = self.recent_signals[-100:]
+                            
-                                
+                            if should_execute:
-                                # Determine if we should execute this signal based on confidence
+                                signal['signal_type'] = 'EXECUTED'
-                                should_execute = signal['confidence'] >= self.min_confidence_threshold
+                                signal['threshold_used'] = current_threshold  # Track threshold for learning
-                                
+                                signal['reason'] = f"ADAPTIVE EXECUTE (≥{current_threshold:.2%}): {signal['reason']}"
-                                if should_execute:
+                                logger.debug(f"[EXECUTE] {signal['action']} signal @ ${signal['price']:.2f} (confidence: {signal['confidence']:.1%} ≥ {current_threshold:.1%})")
-                                    signal['signal_type'] = 'EXECUTED'
+                                self._process_trading_decision(signal)
-                                    signal['reason'] = f"HIGH CONFIDENCE: {signal['reason']}"
+                            else:
-                                    logger.debug(f"[EXECUTE] {signal['action']} signal @ ${signal['price']:.2f} (confidence: {signal['confidence']:.1%})")
+                                signal['signal_type'] = 'IGNORED'
-                                    self._process_trading_decision(signal)
+                                signal['reason'] = f"ADAPTIVE IGNORE (<{current_threshold:.2%}): {signal['reason']}"
-                                else:
+                                logger.debug(f"[IGNORE] {signal['action']} signal @ ${signal['price']:.2f} (confidence: {signal['confidence']:.1%} < {current_threshold:.1%})")
-                                    signal['signal_type'] = 'IGNORED'
+                                # Add to recent decisions for display but don't execute trade
-                                    signal['reason'] = f"LOW CONFIDENCE: {signal['reason']}"
+                                self.recent_decisions.append(signal)
-                                    logger.debug(f"[IGNORE] {signal['action']} signal @ ${signal['price']:.2f} (confidence: {signal['confidence']:.1%})")
+                                if len(self.recent_decisions) > 500:  # Keep last 500 decisions
-                                    # Add to recent decisions for display but don't execute trade
+                                    self.recent_decisions = self.recent_decisions[-500:]
                                    self.recent_decisions.append(signal)
                                    if len(self.recent_decisions) > 500:  # Keep last 500 decisions
                                        self.recent_decisions = self.recent_decisions[-500:]
                    else:
                        # Fallback: Add a simple monitoring update
                        if n_intervals % 10 == 0 and current_price:  # Every 10 seconds
@ -1834,6 +1925,15 @@ class TradingDashboard:
                    # Trigger RL training on this closed trade
                    self._trigger_rl_training_on_closed_trade(closed_trade)
                    # Record outcome for adaptive threshold learning
                    if 'confidence' in decision and 'threshold_used' in decision:
                        self.adaptive_learner.record_trade_outcome(
                            confidence=decision['confidence'],
                            pnl=net_pnl,
                            threshold_used=decision['threshold_used']
                        )
                        logger.debug(f"[ADAPTIVE] Recorded SHORT close outcome: PnL=${net_pnl:.2f}")
                    logger.info(f"[TRADE] CLOSED SHORT: {size} @ ${exit_price:.2f} | PnL: ${net_pnl:.2f} | OPENING LONG")
                    # Clear position before opening new one
@ -1916,6 +2016,15 @@ class TradingDashboard:
                    # Trigger RL training on this closed trade
                    self._trigger_rl_training_on_closed_trade(closed_trade)
                    # Record outcome for adaptive threshold learning
                    if 'confidence' in decision and 'threshold_used' in decision:
                        self.adaptive_learner.record_trade_outcome(
                            confidence=decision['confidence'],
                            pnl=net_pnl,
                            threshold_used=decision['threshold_used']
                        )
                        logger.debug(f"[ADAPTIVE] Recorded SHORT close outcome: PnL=${net_pnl:.2f}")
                    logger.info(f"[TRADE] CLOSED SHORT: {size} @ ${exit_price:.2f} | PnL: ${net_pnl:.2f} | OPENING LONG")
                    # Clear position before opening new one
@ -2979,6 +3088,63 @@ class TradingDashboard:
                    ], className="mb-3 p-2 border border-secondary rounded")
                )
            # Adaptive Threshold Learning Statistics
            try:
                adaptive_stats = self.adaptive_learner.get_learning_stats()
                if adaptive_stats and 'error' not in adaptive_stats:
                    current_threshold = adaptive_stats.get('current_threshold', 0.3)
                    base_threshold = adaptive_stats.get('base_threshold', 0.3)
                    total_trades = adaptive_stats.get('total_trades', 0)
                    recent_win_rate = adaptive_stats.get('recent_win_rate', 0)
                    recent_avg_pnl = adaptive_stats.get('recent_avg_pnl', 0)
                    learning_active = adaptive_stats.get('learning_active', False)
                    training_items.append(
                        html.Div([
                            html.H6([
                                html.I(className="fas fa-graduation-cap me-2 text-warning"),
                                "Adaptive Threshold Learning"
                            ], className="mb-2"),
                            html.Div([
                                html.Small([
                                    html.Strong("Current Threshold: "),
                                    html.Span(f"{current_threshold:.1%}", className="text-warning fw-bold")
                                ], className="d-block"),
                                html.Small([
                                    html.Strong("Base Threshold: "),
                                    html.Span(f"{base_threshold:.1%}", className="text-muted")
                                ], className="d-block"),
                                html.Small([
                                    html.Strong("Learning Status: "),
                                    html.Span("ACTIVE" if learning_active else "COLLECTING DATA", 
                                             className="text-success" if learning_active else "text-info")
                                ], className="d-block"),
                                html.Small([
                                    html.Strong("Trades Analyzed: "),
                                    html.Span(f"{total_trades}", className="text-info")
                                ], className="d-block"),
                                html.Small([
                                    html.Strong("Recent Win Rate: "),
                                    html.Span(f"{recent_win_rate:.1%}", 
                                             className="text-success" if recent_win_rate > 0.5 else "text-danger")
                                ], className="d-block"),
                                html.Small([
                                    html.Strong("Recent Avg P&L: "),
                                    html.Span(f"${recent_avg_pnl:.2f}", 
                                             className="text-success" if recent_avg_pnl > 0 else "text-danger")
                                ], className="d-block")
                            ])
                        ], className="mb-3 p-2 border border-warning rounded")
                    )
            except Exception as e:
                logger.warning(f"Error calculating adaptive threshold: {e}")
                training_items.append(
                    html.Div([
                        html.P("Adaptive threshold learning error", className="text-danger"),
                        html.Small(f"Error: {str(e)}", className="text-muted")
                    ], className="mb-3 p-2 border border-danger rounded")
                )
            # Real-time Training Events Log
            training_items.append(
                html.Div([
@ -3861,28 +4027,63 @@ class TradingDashboard:
            return False
    def _calculate_rl_reward(self, closed_trade):
-        """Calculate reward for RL training based on trade performance"""
+        """Calculate enhanced reward for RL training with proper penalties for losing trades"""
        try:
            net_pnl = closed_trade.get('net_pnl', 0)
            duration = closed_trade.get('duration', timedelta(0))
            duration_hours = max(duration.total_seconds() / 3600, 0.01)  # Avoid division by zero
            fees = closed_trade.get('fees', 0)
            side = closed_trade.get('side', 'LONG')
-            # Base reward is normalized PnL
+            # Enhanced reward calculation with stronger penalties for losses
-            base_reward = net_pnl / 10.0  # Normalize to reasonable range
+            base_reward = net_pnl / 5.0  # Increase sensitivity (was /10.0)
-            # Time efficiency bonus/penalty
+            # Fee penalty - trading costs should be considered
-            # Reward faster profitable trades, penalize slow losses
+            fee_penalty = fees / 2.0  # Penalize high fee trades
            # Time efficiency factor - more nuanced
            if net_pnl > 0:
-                # Profitable trades: bonus for speed
+                # Profitable trades: reward speed, but not too much
-                time_factor = min(2.0, 1.0 / duration_hours)  # Max 2x bonus for very fast trades
+                if duration_hours < 0.1:  # < 6 minutes
-                reward = base_reward * time_factor
+                    time_bonus = 0.5  # Fast profit bonus
                elif duration_hours < 1.0:  # < 1 hour  
                    time_bonus = 0.2  # Moderate speed bonus
                else:
                    time_bonus = 0.0  # No bonus for slow profits
                reward = base_reward + time_bonus - fee_penalty
            else:
-                # Losing trades: penalty increases with time
+                # Losing trades: STRONG penalties that increase with time and size
-                time_penalty = min(2.0, duration_hours / 24.0)  # Max 2x penalty for very slow trades
+                loss_magnitude_penalty = abs(net_pnl) / 3.0  # Stronger loss penalty
-                reward = base_reward * (1 + time_penalty)
+                
                # Time penalty for holding losing positions
                if duration_hours > 4.0:  # Holding losses too long
                    time_penalty = 2.0  # Severe penalty
                elif duration_hours > 1.0:  # Moderate holding time
                    time_penalty = 1.0  # Moderate penalty  
                else:
                    time_penalty = 0.5  # Small penalty for quick losses
                # Total penalty for losing trades
                reward = base_reward - loss_magnitude_penalty - time_penalty - fee_penalty
-            # Clip reward to reasonable range
+            # Risk-adjusted rewards based on position side and market conditions
-            reward = max(-5.0, min(5.0, reward))
+            if side == 'SHORT' and net_pnl > 0:
                # Bonus for successful shorts (harder to time)
                reward += 0.3
            elif side == 'LONG' and net_pnl < 0 and duration_hours > 2.0:
                # Extra penalty for holding losing longs too long
                reward -= 0.5
            # Clip reward to reasonable range but allow stronger penalties
            reward = max(-10.0, min(8.0, reward))  # Expanded range for better learning
            # Log detailed reward breakdown for analysis
            if abs(net_pnl) > 0.5:  # Log significant trades
                logger.info(f"[RL_REWARD] Trade #{closed_trade.get('trade_id')}: "
                           f"PnL=${net_pnl:.2f}, Fees=${fees:.3f}, "
                           f"Duration={duration_hours:.2f}h, Side={side}, "
                           f"Final_Reward={reward:.3f}")
            return reward
@ -3982,55 +4183,143 @@ class TradingDashboard:
            return None
    def _prepare_rl_state(self, market_data, training_episode):
-        """Prepare state representation for RL training"""
+        """Prepare enhanced state representation for RL training with comprehensive market context"""
        try:
            # Calculate technical indicators
            df = market_data.copy()
-            # Price features
+            # Basic price features
            df['returns'] = df['close'].pct_change()
            df['log_returns'] = np.log(df['close'] / df['close'].shift(1))
            df['price_ma_5'] = df['close'].rolling(5).mean()
            df['price_ma_20'] = df['close'].rolling(20).mean()
            df['price_ma_50'] = df['close'].rolling(50).mean()
-            # Volatility
+            # Volatility and risk metrics
            df['volatility'] = df['returns'].rolling(10).std()
            df['volatility_ma'] = df['volatility'].rolling(5).mean()
            df['max_drawdown'] = (df['close'] / df['close'].cummax() - 1).rolling(20).min()
-            # RSI
+            # Momentum indicators
            df['rsi'] = self._calculate_rsi(df['close'])
            df['rsi_ma'] = df['rsi'].rolling(5).mean()
            df['momentum'] = df['close'] / df['close'].shift(10) - 1  # 10-period momentum
-            # Volume features
+            # Volume analysis
            df['volume_ma'] = df['volume'].rolling(10).mean()
            df['volume_ratio'] = df['volume'] / df['volume_ma']
            df['volume_trend'] = df['volume_ma'] / df['volume_ma'].shift(5) - 1
            # Market structure
            df['higher_highs'] = (df['high'] > df['high'].shift(1)).rolling(5).sum() / 5
            df['lower_lows'] = (df['low'] < df['low'].shift(1)).rolling(5).sum() / 5
            df['trend_strength'] = df['higher_highs'] - df['lower_lows']
            # Support/Resistance levels (simplified)
            df['distance_to_high'] = (df['high'].rolling(20).max() - df['close']) / df['close']
            df['distance_to_low'] = (df['close'] - df['low'].rolling(20).min()) / df['close']
            # Time-based features
            df['hour'] = df.index.hour if hasattr(df.index, 'hour') else 12  # Default to noon
            df['is_market_hours'] = ((df['hour'] >= 9) & (df['hour'] <= 16)).astype(float)
            # Drop NaN values
            df = df.dropna()
            if df.empty:
                logger.warning("Empty dataframe after technical indicators calculation")
                return None
-            # Take the last row as the state (most recent before trade)
+            # Enhanced state features (normalized)
            state_features = [
                # Price momentum and trend
                df['returns'].iloc[-1],
-                df['price_ma_5'].iloc[-1] / df['close'].iloc[-1] - 1,  # Normalized MA ratio
+                df['log_returns'].iloc[-1],
-                df['price_ma_20'].iloc[-1] / df['close'].iloc[-1] - 1,
+                (df['price_ma_5'].iloc[-1] / df['close'].iloc[-1] - 1),
                (df['price_ma_20'].iloc[-1] / df['close'].iloc[-1] - 1),
                (df['price_ma_50'].iloc[-1] / df['close'].iloc[-1] - 1),
                df['momentum'].iloc[-1],
                df['trend_strength'].iloc[-1],
                # Volatility and risk
                df['volatility'].iloc[-1],
                df['volatility_ma'].iloc[-1],
                df['max_drawdown'].iloc[-1],
                # Momentum indicators
                df['rsi'].iloc[-1] / 100.0,  # Normalize RSI to 0-1
-                df['volume_ratio'].iloc[-1]
+                df['rsi_ma'].iloc[-1] / 100.0,
                # Volume analysis
                df['volume_ratio'].iloc[-1],
                df['volume_trend'].iloc[-1],
                # Market structure
                df['distance_to_high'].iloc[-1],
                df['distance_to_low'].iloc[-1],
                # Time features
                df['hour'].iloc[-1] / 24.0,  # Normalize hour to 0-1
                df['is_market_hours'].iloc[-1],
            ]
-            # Add trade-specific features
+            # Add Williams pivot points features (250 features)
            try:
                pivot_features = self._get_williams_pivot_features(df)
                if pivot_features:
                    state_features.extend(pivot_features)
                else:
                    state_features.extend([0.0] * 250)  # Default if calculation fails
            except Exception as e:
                logger.warning(f"Error calculating Williams pivot points: {e}")
                state_features.extend([0.0] * 250)  # Default features
            # Add multi-timeframe OHLCV features (300 features)
            try:
                multi_tf_features = self._get_multi_timeframe_features(training_episode.get('symbol', 'ETH/USDT'))
                if multi_tf_features:
                    state_features.extend(multi_tf_features)
                else:
                    state_features.extend([0.0] * 300)  # Default if calculation fails
            except Exception as e:
                logger.warning(f"Error calculating multi-timeframe features: {e}")
                state_features.extend([0.0] * 300)  # Default features
            # Add trade-specific context
            entry_price = training_episode['entry_price']
            current_price = df['close'].iloc[-1]
-            state_features.extend([
+            trade_features = [
-                (current_price - entry_price) / entry_price,  # Price change since entry
+                (current_price - entry_price) / entry_price,  # Unrealized P&L
                training_episode['duration_seconds'] / 3600.0,  # Duration in hours
-            ])
+                1.0 if training_episode['side'] == 'LONG' else 0.0,  # Position side
                min(training_episode['duration_seconds'] / 14400.0, 1.0),  # Time pressure (0-4h normalized)
            ]
            state_features.extend(trade_features)
            # Add recent volatility context (last 3 periods)
            if len(df) >= 3:
                recent_volatility = [
                    df['volatility'].iloc[-3],
                    df['volatility'].iloc[-2], 
                    df['volatility'].iloc[-1]
                ]
                state_features.extend(recent_volatility)
            else:
                state_features.extend([0.0, 0.0, 0.0])
            # Ensure all features are valid numbers
            state_features = [float(x) if pd.notna(x) and np.isfinite(x) else 0.0 for x in state_features]
            logger.debug(f"[RL_STATE] Prepared {len(state_features)} features for trade #{training_episode.get('trade_id')} (including Williams pivot points and multi-timeframe)")
            return np.array(state_features, dtype=np.float32)
        except Exception as e:
-            logger.warning(f"Error preparing RL state: {e}")
+            logger.warning(f"Error preparing enhanced RL state: {e}")
            import traceback
            logger.debug(traceback.format_exc())
            return None
    def _send_rl_training_step(self, state, action, reward, training_episode):
@ -4154,6 +4443,210 @@ class TradingDashboard:
        except Exception as e:
            logger.error(f"Error stopping streaming: {e}")
    def _get_williams_pivot_features(self, df: pd.DataFrame) -> Optional[List[float]]:
        """Calculate Williams Market Structure pivot points features"""
        try:
            # Import Williams Market Structure
            try:
                from training.williams_market_structure import WilliamsMarketStructure
            except ImportError:
                logger.warning("Williams Market Structure not available")
                return None
            # Convert DataFrame to numpy array for Williams calculation
            if len(df) < 50:
                return None
            ohlcv_array = np.array([
                [df.index[i].timestamp() if hasattr(df.index[i], 'timestamp') else time.time(),
                 df['open'].iloc[i], df['high'].iloc[i], df['low'].iloc[i], 
                 df['close'].iloc[i], df['volume'].iloc[i]]
                for i in range(len(df))
            ])
            # Calculate Williams pivot points
            williams = WilliamsMarketStructure()
            structure_levels = williams.calculate_recursive_pivot_points(ohlcv_array)
            # Extract features (250 features total)
            pivot_features = williams.extract_features_for_rl(structure_levels)
            logger.debug(f"[PIVOT] Calculated {len(pivot_features)} Williams pivot features")
            return pivot_features
        except Exception as e:
            logger.warning(f"Error calculating Williams pivot features: {e}")
            return None
    def _get_multi_timeframe_features(self, symbol: str) -> Optional[List[float]]:
        """Get multi-timeframe OHLCV features for comprehensive market context"""
        try:
            features = []
            timeframes = ['1m', '5m', '15m', '1h', '4h', '1d']  # 6 timeframes
            for timeframe in timeframes:
                try:
                    # Get data for this timeframe
                    df = self.data_provider.get_historical_data(
                        symbol=symbol,
                        timeframe=timeframe,
                        limit=50,  # Last 50 bars
                        refresh=True
                    )
                    if df is not None and not df.empty and len(df) >= 10:
                        # Calculate normalized features for this timeframe
                        tf_features = self._extract_timeframe_features(df, timeframe)
                        features.extend(tf_features)
                    else:
                        # Fill with zeros if no data
                        features.extend([0.0] * 50)  # 50 features per timeframe
                except Exception as e:
                    logger.debug(f"Error getting {timeframe} data: {e}")
                    features.extend([0.0] * 50)  # 50 features per timeframe
            # Total: 6 timeframes * 50 features = 300 features
            return features[:300]
        except Exception as e:
            logger.warning(f"Error calculating multi-timeframe features: {e}")
            return None
    def _extract_timeframe_features(self, df: pd.DataFrame, timeframe: str) -> List[float]:
        """Extract normalized features from a single timeframe"""
        try:
            features = []
            # Price action features (10 features)
            if len(df) >= 10:
                close_prices = df['close'].tail(10).values
                # Price momentum and trends
                features.extend([
                    (close_prices[-1] - close_prices[0]) / close_prices[0],  # Total change
                    (close_prices[-1] - close_prices[-2]) / close_prices[-2],  # Last change
                    (close_prices[-1] - close_prices[-5]) / close_prices[-5],  # 5-period change
                    np.std(close_prices) / np.mean(close_prices),  # Normalized volatility
                    (np.max(close_prices) - np.min(close_prices)) / np.mean(close_prices),  # Range
                ])
                # Trend direction indicators
                higher_highs = sum(1 for i in range(1, len(close_prices)) if close_prices[i] > close_prices[i-1])
                features.extend([
                    higher_highs / (len(close_prices) - 1),  # % higher highs
                    (len(close_prices) - 1 - higher_highs) / (len(close_prices) - 1),  # % lower highs
                ])
                # Price position in range
                current_price = close_prices[-1]
                price_min = np.min(close_prices)
                price_max = np.max(close_prices)
                price_range = price_max - price_min
                if price_range > 0:
                    features.extend([
                        (current_price - price_min) / price_range,  # Position in range (0-1)
                        (price_max - current_price) / price_range,  # Distance from high
                        (current_price - price_min) / price_range,  # Distance from low
                    ])
                else:
                    features.extend([0.5, 0.5, 0.5])
            else:
                features.extend([0.0] * 10)
            # Volume features (10 features)
            if 'volume' in df.columns and len(df) >= 10:
                volumes = df['volume'].tail(10).values
                features.extend([
                    volumes[-1] / np.mean(volumes) if np.mean(volumes) > 0 else 1.0,  # Current vs avg
                    np.std(volumes) / np.mean(volumes) if np.mean(volumes) > 0 else 0.0,  # Volume volatility
                    (volumes[-1] - volumes[-2]) / volumes[-2] if volumes[-2] > 0 else 0.0,  # Volume change
                    np.max(volumes) / np.mean(volumes) if np.mean(volumes) > 0 else 1.0,  # Max spike
                    np.min(volumes) / np.mean(volumes) if np.mean(volumes) > 0 else 1.0,  # Min ratio
                ])
                # Volume trend
                volume_trend = np.polyfit(range(len(volumes)), volumes, 1)[0]
                features.append(volume_trend / np.mean(volumes) if np.mean(volumes) > 0 else 0.0)
                # Pad remaining volume features
                features.extend([0.0] * 4)
            else:
                features.extend([0.0] * 10)
            # Technical indicators (20 features)
            try:
                # RSI
                rsi = self._calculate_rsi(df['close'])
                features.append(rsi.iloc[-1] / 100.0 if not rsi.empty else 0.5)
                # Moving averages
                if len(df) >= 20:
                    sma_20 = df['close'].rolling(20).mean()
                    features.append((df['close'].iloc[-1] - sma_20.iloc[-1]) / sma_20.iloc[-1])
                else:
                    features.append(0.0)
                if len(df) >= 50:
                    sma_50 = df['close'].rolling(50).mean()
                    features.append((df['close'].iloc[-1] - sma_50.iloc[-1]) / sma_50.iloc[-1])
                else:
                    features.append(0.0)
                # MACD approximation
                if len(df) >= 26:
                    ema_12 = df['close'].ewm(span=12).mean()
                    ema_26 = df['close'].ewm(span=26).mean()
                    macd = ema_12 - ema_26
                    features.append(macd.iloc[-1] / df['close'].iloc[-1])
                else:
                    features.append(0.0)
                # Bollinger Bands approximation
                if len(df) >= 20:
                    bb_middle = df['close'].rolling(20).mean()
                    bb_std = df['close'].rolling(20).std()
                    bb_upper = bb_middle + (bb_std * 2)
                    bb_lower = bb_middle - (bb_std * 2)
                    current_price = df['close'].iloc[-1]
                    features.extend([
                        (current_price - bb_lower.iloc[-1]) / (bb_upper.iloc[-1] - bb_lower.iloc[-1]) if bb_upper.iloc[-1] != bb_lower.iloc[-1] else 0.5,
                        (bb_upper.iloc[-1] - current_price) / (bb_upper.iloc[-1] - bb_lower.iloc[-1]) if bb_upper.iloc[-1] != bb_lower.iloc[-1] else 0.5,
                    ])
                else:
                    features.extend([0.5, 0.5])
                # Pad remaining technical features
                features.extend([0.0] * 14)
            except Exception as e:
                logger.debug(f"Error calculating technical indicators for {timeframe}: {e}")
                features.extend([0.0] * 20)
            # Timeframe-specific features (10 features)
            timeframe_weights = {
                '1m': [1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                '5m': [0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
                '15m': [0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
                '1h': [0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
                '4h': [0.0, 0.0, 0.0, 0.0, 1.0, 0.0],
                '1d': [0.0, 0.0, 0.0, 0.0, 0.0, 1.0],
            }
            # Add timeframe encoding
            features.extend(timeframe_weights.get(timeframe, [0.0] * 6))
            features.extend([0.0] * 4)  # Pad to 10 features
            # Ensure exactly 50 features per timeframe
            return features[:50]
        except Exception as e:
            logger.warning(f"Error extracting features for {timeframe}: {e}")
            return [0.0] * 50
 def create_dashboard(data_provider: DataProvider = None, orchestrator: TradingOrchestrator = None, trading_executor: TradingExecutor = None) -> TradingDashboard:
    """Factory function to create a trading dashboard"""