new data flow (trading works)

2025-05-29 15:10:44 +03:00
parent 3f4e9b9774
commit 3e697acf08
3 changed files with 605 additions and 68 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -36,3 +36,4 @@ models/trading_agent_best_pnl.pt
 NN/models/saved/hybrid_stats_20250409_022901.json
 *__pycache__*
 *.png
+closed_trades_history.json
--- a/core/data_provider.py
+++ b/core/data_provider.py
@@ -349,15 +349,15 @@ class DataProvider:
            df = df[['timestamp', 'open', 'high', 'low', 'close', 'volume']]
            df = df.sort_values('timestamp').reset_index(drop=True)
            
-            logger.info(f"✅ MEXC: Fetched {len(df)} candles for {symbol} {timeframe}")
+            logger.info(f"MEXC: Fetched {len(df)} candles for {symbol} {timeframe}")
            return df
            
        except Exception as e:
-            logger.error(f"❌ MEXC: Error fetching data: {e}")
+            logger.error(f"MEXC: Error fetching data: {e}")
            return None
    
    def _fetch_from_binance(self, symbol: str, timeframe: str, limit: int) -> Optional[pd.DataFrame]:
-        """Fetch data from Binance API (primary data source)"""
+        """Fetch data from Binance API (primary data source) with HTTP 451 error handling"""
        try:
            # Convert symbol format
            binance_symbol = symbol.replace('/', '').upper()
@@ -369,7 +369,7 @@ class DataProvider:
            }
            binance_timeframe = timeframe_map.get(timeframe, '1h')
            
-            # API request
+            # API request with timeout and better headers
            url = "https://api.binance.com/api/v3/klines"
            params = {
                'symbol': binance_symbol,
@@ -377,7 +377,19 @@ class DataProvider:
                'limit': limit
            }
            
-            response = requests.get(url, params=params)
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
+                'Accept': 'application/json',
+                'Connection': 'keep-alive'
+            }
+            
+            response = requests.get(url, params=params, headers=headers, timeout=10)
+            
+            # Handle HTTP 451 (Unavailable For Legal Reasons) specifically
+            if response.status_code == 451:
+                logger.warning(f"Binance API returned 451 (blocked) for {symbol} {timeframe} - using fallback")
+                return self._get_fallback_data(symbol, timeframe, limit)
+            
            response.raise_for_status()
            
            data = response.json()
@@ -402,7 +414,38 @@ class DataProvider:
            return df
            
        except Exception as e:
+            if "451" in str(e) or "Client Error" in str(e):
+                logger.warning(f"Binance API access blocked (451) for {symbol} {timeframe} - using fallback")
+                return self._get_fallback_data(symbol, timeframe, limit)
+            else:
                logger.error(f"Error fetching from Binance API: {e}")
+                return self._get_fallback_data(symbol, timeframe, limit)
+
+    def _get_fallback_data(self, symbol: str, timeframe: str, limit: int) -> Optional[pd.DataFrame]:
+        """Get fallback data when Binance API is unavailable - REAL DATA ONLY"""
+        try:
+            logger.info(f"FALLBACK: Attempting to get real cached data for {symbol} {timeframe}")
+            
+            # ONLY try cached data
+            cached_data = self._load_from_cache(symbol, timeframe)
+            if cached_data is not None and not cached_data.empty:
+                # Limit to requested amount
+                limited_data = cached_data.tail(limit) if len(cached_data) > limit else cached_data
+                logger.info(f"FALLBACK: Using cached real data for {symbol} {timeframe}: {len(limited_data)} bars")
+                return limited_data
+            
+            # Try MEXC as secondary real data source
+            mexc_data = self._fetch_from_mexc(symbol, timeframe, limit)
+            if mexc_data is not None and not mexc_data.empty:
+                logger.info(f"FALLBACK: Using MEXC real data for {symbol} {timeframe}: {len(mexc_data)} bars")
+                return mexc_data
+            
+            # NO SYNTHETIC DATA - Return None if no real data available
+            logger.warning(f"FALLBACK: No real data available for {symbol} {timeframe} - waiting for real data")
+            return None
+            
+        except Exception as e:
+            logger.error(f"Error getting fallback data: {e}")
            return None

    def _add_technical_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
--- a/web/dashboard.py
+++ b/web/dashboard.py
@@ -99,6 +99,96 @@ except ImportError:

 logger = logging.getLogger(__name__)

+class AdaptiveThresholdLearner:
+    """Learn optimal confidence thresholds based on real trade outcomes"""
+    
+    def __init__(self, initial_threshold: float = 0.30):
+        self.base_threshold = initial_threshold
+        self.current_threshold = initial_threshold
+        self.trade_outcomes = deque(maxlen=100)
+        self.threshold_history = deque(maxlen=50)
+        self.learning_rate = 0.02
+        self.min_threshold = 0.20
+        self.max_threshold = 0.70
+        
+        logger.info(f"[ADAPTIVE] Initialized with starting threshold: {initial_threshold:.2%}")
+    
+    def record_trade_outcome(self, confidence: float, pnl: float, threshold_used: float):
+        """Record a trade outcome to learn from"""
+        try:
+            outcome = {
+                'confidence': confidence,
+                'pnl': pnl,
+                'profitable': pnl > 0,
+                'threshold_used': threshold_used,
+                'timestamp': datetime.now()
+            }
+            
+            self.trade_outcomes.append(outcome)
+            
+            # Learn from outcomes
+            if len(self.trade_outcomes) >= 10:
+                self._update_threshold()
+            
+        except Exception as e:
+            logger.error(f"Error recording trade outcome: {e}")
+    
+    def _update_threshold(self):
+        """Update threshold based on recent trade statistics"""
+        try:
+            recent_trades = list(self.trade_outcomes)[-20:]
+            if len(recent_trades) < 10:
+                return
+            
+            profitable_count = sum(1 for t in recent_trades if t['profitable'])
+            win_rate = profitable_count / len(recent_trades)
+            avg_pnl = sum(t['pnl'] for t in recent_trades) / len(recent_trades)
+            
+            # Adaptive adjustment logic
+            if win_rate > 0.60 and avg_pnl > 0.20:
+                adjustment = -self.learning_rate * 1.5  # Lower threshold for more trades
+            elif win_rate < 0.40 or avg_pnl < -0.30:
+                adjustment = self.learning_rate * 2.0  # Raise threshold to be more selective
+            else:
+                adjustment = 0  # No change
+            
+            old_threshold = self.current_threshold
+            self.current_threshold = max(self.min_threshold, 
+                                       min(self.max_threshold, 
+                                           self.current_threshold + adjustment))
+            
+            if abs(self.current_threshold - old_threshold) > 0.005:
+                logger.info(f"[ADAPTIVE] Threshold: {old_threshold:.2%} -> {self.current_threshold:.2%} (WR: {win_rate:.1%}, PnL: ${avg_pnl:.2f})")
+            
+        except Exception as e:
+            logger.error(f"Error updating adaptive threshold: {e}")
+    
+    def get_current_threshold(self) -> float:
+        return self.current_threshold
+    
+    def get_learning_stats(self) -> Dict[str, Any]:
+        """Get learning statistics"""
+        try:
+            if not self.trade_outcomes:
+                return {'status': 'No trades recorded yet'}
+            
+            recent_trades = list(self.trade_outcomes)[-20:]
+            profitable_count = sum(1 for t in recent_trades if t['profitable'])
+            win_rate = profitable_count / len(recent_trades) if recent_trades else 0
+            avg_pnl = sum(t['pnl'] for t in recent_trades) / len(recent_trades) if recent_trades else 0
+            
+            return {
+                'current_threshold': self.current_threshold,
+                'base_threshold': self.base_threshold,
+                'total_trades': len(self.trade_outcomes),
+                'recent_win_rate': win_rate,
+                'recent_avg_pnl': avg_pnl,
+                'threshold_changes': len(self.threshold_history),
+                'learning_active': len(self.trade_outcomes) >= 10
+            }
+        except Exception as e:
+            return {'error': str(e)}
+
 class TradingDashboard:
    """Modern trading dashboard with real-time updates and enhanced RL training integration"""
    
@@ -156,11 +246,15 @@ class TradingDashboard:
        # Load existing closed trades from file
        self._load_closed_trades_from_file()
        
-        # Signal execution settings for scalping
-        self.min_confidence_threshold = 0.65  # Only execute trades above this confidence
-        self.signal_cooldown = 5  # Minimum seconds between signals
+        # Signal execution settings for scalping - REMOVED FREQUENCY LIMITS
+        self.min_confidence_threshold = 0.30  # Start lower to allow learning  
+        self.signal_cooldown = 0  # REMOVED: Model decides when to act, no artificial delays
        self.last_signal_time = 0
        
+        # Adaptive threshold learning - starts low and learns optimal thresholds
+        self.adaptive_learner = AdaptiveThresholdLearner(initial_threshold=0.30)
+        logger.info("[ADAPTIVE] Adaptive threshold learning enabled - will adjust based on trade outcomes")
+        
        # Real-time tick data infrastructure
        self.tick_cache = deque(maxlen=54000)  # 15 minutes * 60 seconds * 60 ticks/second = 54000 ticks
        self.one_second_bars = deque(maxlen=900)  # 15 minutes of 1-second bars
@@ -745,35 +839,32 @@ class TradingDashboard:
                    logger.warning(f"[CHART_ERROR] Error getting chart data: {e}")
                    chart_data = None
                
-                # Generate demo trading signals for dashboard display
+                # Generate trading signals based on model decisions - NO FREQUENCY LIMITS
                try:
                    if current_price and chart_data is not None and not chart_data.empty and len(chart_data) >= 5:
-                        current_time = time.time()
-                        
-                        # Generate signals more frequently for demo (every 5 updates = 5 seconds)
-                        if n_intervals % 5 == 0 and (current_time - self.last_signal_time) >= self.signal_cooldown:
+                        # Model decides when to act - check every update for signals
                        signal = self._generate_trading_signal(symbol, current_price, chart_data)
                        if signal:
-                                self.last_signal_time = current_time
-                                
                            # Add to signals list (all signals, regardless of execution)
                            signal['signal_type'] = 'GENERATED'
                            self.recent_signals.append(signal.copy())
                            if len(self.recent_signals) > 100:  # Keep last 100 signals
                                self.recent_signals = self.recent_signals[-100:]
                            
-                                # Determine if we should execute this signal based on confidence
-                                should_execute = signal['confidence'] >= self.min_confidence_threshold
+                            # Use adaptive threshold instead of fixed threshold
+                            current_threshold = self.adaptive_learner.get_current_threshold()
+                            should_execute = signal['confidence'] >= current_threshold
                            
                            if should_execute:
                                signal['signal_type'] = 'EXECUTED'
-                                    signal['reason'] = f"HIGH CONFIDENCE: {signal['reason']}"
-                                    logger.debug(f"[EXECUTE] {signal['action']} signal @ ${signal['price']:.2f} (confidence: {signal['confidence']:.1%})")
+                                signal['threshold_used'] = current_threshold  # Track threshold for learning
+                                signal['reason'] = f"ADAPTIVE EXECUTE (≥{current_threshold:.2%}): {signal['reason']}"
+                                logger.debug(f"[EXECUTE] {signal['action']} signal @ ${signal['price']:.2f} (confidence: {signal['confidence']:.1%} ≥ {current_threshold:.1%})")
                                self._process_trading_decision(signal)
                            else:
                                signal['signal_type'] = 'IGNORED'
-                                    signal['reason'] = f"LOW CONFIDENCE: {signal['reason']}"
-                                    logger.debug(f"[IGNORE] {signal['action']} signal @ ${signal['price']:.2f} (confidence: {signal['confidence']:.1%})")
+                                signal['reason'] = f"ADAPTIVE IGNORE (<{current_threshold:.2%}): {signal['reason']}"
+                                logger.debug(f"[IGNORE] {signal['action']} signal @ ${signal['price']:.2f} (confidence: {signal['confidence']:.1%} < {current_threshold:.1%})")
                                # Add to recent decisions for display but don't execute trade
                                self.recent_decisions.append(signal)
                                if len(self.recent_decisions) > 500:  # Keep last 500 decisions
@@ -1834,6 +1925,15 @@ class TradingDashboard:
                    # Trigger RL training on this closed trade
                    self._trigger_rl_training_on_closed_trade(closed_trade)
                    
+                    # Record outcome for adaptive threshold learning
+                    if 'confidence' in decision and 'threshold_used' in decision:
+                        self.adaptive_learner.record_trade_outcome(
+                            confidence=decision['confidence'],
+                            pnl=net_pnl,
+                            threshold_used=decision['threshold_used']
+                        )
+                        logger.debug(f"[ADAPTIVE] Recorded SHORT close outcome: PnL=${net_pnl:.2f}")
+                    
                    logger.info(f"[TRADE] CLOSED SHORT: {size} @ ${exit_price:.2f} | PnL: ${net_pnl:.2f} | OPENING LONG")
                    
                    # Clear position before opening new one
@@ -1916,6 +2016,15 @@ class TradingDashboard:
                    # Trigger RL training on this closed trade
                    self._trigger_rl_training_on_closed_trade(closed_trade)
                    
+                    # Record outcome for adaptive threshold learning
+                    if 'confidence' in decision and 'threshold_used' in decision:
+                        self.adaptive_learner.record_trade_outcome(
+                            confidence=decision['confidence'],
+                            pnl=net_pnl,
+                            threshold_used=decision['threshold_used']
+                        )
+                        logger.debug(f"[ADAPTIVE] Recorded SHORT close outcome: PnL=${net_pnl:.2f}")
+                    
                    logger.info(f"[TRADE] CLOSED SHORT: {size} @ ${exit_price:.2f} | PnL: ${net_pnl:.2f} | OPENING LONG")
                    
                    # Clear position before opening new one
@@ -2979,6 +3088,63 @@ class TradingDashboard:
                    ], className="mb-3 p-2 border border-secondary rounded")
                )
            
+            # Adaptive Threshold Learning Statistics
+            try:
+                adaptive_stats = self.adaptive_learner.get_learning_stats()
+                if adaptive_stats and 'error' not in adaptive_stats:
+                    current_threshold = adaptive_stats.get('current_threshold', 0.3)
+                    base_threshold = adaptive_stats.get('base_threshold', 0.3)
+                    total_trades = adaptive_stats.get('total_trades', 0)
+                    recent_win_rate = adaptive_stats.get('recent_win_rate', 0)
+                    recent_avg_pnl = adaptive_stats.get('recent_avg_pnl', 0)
+                    learning_active = adaptive_stats.get('learning_active', False)
+                    
+                    training_items.append(
+                        html.Div([
+                            html.H6([
+                                html.I(className="fas fa-graduation-cap me-2 text-warning"),
+                                "Adaptive Threshold Learning"
+                            ], className="mb-2"),
+                            html.Div([
+                                html.Small([
+                                    html.Strong("Current Threshold: "),
+                                    html.Span(f"{current_threshold:.1%}", className="text-warning fw-bold")
+                                ], className="d-block"),
+                                html.Small([
+                                    html.Strong("Base Threshold: "),
+                                    html.Span(f"{base_threshold:.1%}", className="text-muted")
+                                ], className="d-block"),
+                                html.Small([
+                                    html.Strong("Learning Status: "),
+                                    html.Span("ACTIVE" if learning_active else "COLLECTING DATA", 
+                                             className="text-success" if learning_active else "text-info")
+                                ], className="d-block"),
+                                html.Small([
+                                    html.Strong("Trades Analyzed: "),
+                                    html.Span(f"{total_trades}", className="text-info")
+                                ], className="d-block"),
+                                html.Small([
+                                    html.Strong("Recent Win Rate: "),
+                                    html.Span(f"{recent_win_rate:.1%}", 
+                                             className="text-success" if recent_win_rate > 0.5 else "text-danger")
+                                ], className="d-block"),
+                                html.Small([
+                                    html.Strong("Recent Avg P&L: "),
+                                    html.Span(f"${recent_avg_pnl:.2f}", 
+                                             className="text-success" if recent_avg_pnl > 0 else "text-danger")
+                                ], className="d-block")
+                            ])
+                        ], className="mb-3 p-2 border border-warning rounded")
+                    )
+            except Exception as e:
+                logger.warning(f"Error calculating adaptive threshold: {e}")
+                training_items.append(
+                    html.Div([
+                        html.P("Adaptive threshold learning error", className="text-danger"),
+                        html.Small(f"Error: {str(e)}", className="text-muted")
+                    ], className="mb-3 p-2 border border-danger rounded")
+                )
+            
            # Real-time Training Events Log
            training_items.append(
                html.Div([
@@ -3861,28 +4027,63 @@ class TradingDashboard:
            return False
    
    def _calculate_rl_reward(self, closed_trade):
-        """Calculate reward for RL training based on trade performance"""
+        """Calculate enhanced reward for RL training with proper penalties for losing trades"""
        try:
            net_pnl = closed_trade.get('net_pnl', 0)
            duration = closed_trade.get('duration', timedelta(0))
            duration_hours = max(duration.total_seconds() / 3600, 0.01)  # Avoid division by zero
+            fees = closed_trade.get('fees', 0)
+            side = closed_trade.get('side', 'LONG')
            
-            # Base reward is normalized PnL
-            base_reward = net_pnl / 10.0  # Normalize to reasonable range
+            # Enhanced reward calculation with stronger penalties for losses
+            base_reward = net_pnl / 5.0  # Increase sensitivity (was /10.0)
            
-            # Time efficiency bonus/penalty
-            # Reward faster profitable trades, penalize slow losses
+            # Fee penalty - trading costs should be considered
+            fee_penalty = fees / 2.0  # Penalize high fee trades
+            
+            # Time efficiency factor - more nuanced
            if net_pnl > 0:
-                # Profitable trades: bonus for speed
-                time_factor = min(2.0, 1.0 / duration_hours)  # Max 2x bonus for very fast trades
-                reward = base_reward * time_factor
+                # Profitable trades: reward speed, but not too much
+                if duration_hours < 0.1:  # < 6 minutes
+                    time_bonus = 0.5  # Fast profit bonus
+                elif duration_hours < 1.0:  # < 1 hour  
+                    time_bonus = 0.2  # Moderate speed bonus
                else:
-                # Losing trades: penalty increases with time
-                time_penalty = min(2.0, duration_hours / 24.0)  # Max 2x penalty for very slow trades
-                reward = base_reward * (1 + time_penalty)
+                    time_bonus = 0.0  # No bonus for slow profits
+                reward = base_reward + time_bonus - fee_penalty
                
-            # Clip reward to reasonable range
-            reward = max(-5.0, min(5.0, reward))
+            else:
+                # Losing trades: STRONG penalties that increase with time and size
+                loss_magnitude_penalty = abs(net_pnl) / 3.0  # Stronger loss penalty
+                
+                # Time penalty for holding losing positions
+                if duration_hours > 4.0:  # Holding losses too long
+                    time_penalty = 2.0  # Severe penalty
+                elif duration_hours > 1.0:  # Moderate holding time
+                    time_penalty = 1.0  # Moderate penalty  
+                else:
+                    time_penalty = 0.5  # Small penalty for quick losses
+                
+                # Total penalty for losing trades
+                reward = base_reward - loss_magnitude_penalty - time_penalty - fee_penalty
+            
+            # Risk-adjusted rewards based on position side and market conditions
+            if side == 'SHORT' and net_pnl > 0:
+                # Bonus for successful shorts (harder to time)
+                reward += 0.3
+            elif side == 'LONG' and net_pnl < 0 and duration_hours > 2.0:
+                # Extra penalty for holding losing longs too long
+                reward -= 0.5
+            
+            # Clip reward to reasonable range but allow stronger penalties
+            reward = max(-10.0, min(8.0, reward))  # Expanded range for better learning
+            
+            # Log detailed reward breakdown for analysis
+            if abs(net_pnl) > 0.5:  # Log significant trades
+                logger.info(f"[RL_REWARD] Trade #{closed_trade.get('trade_id')}: "
+                           f"PnL=${net_pnl:.2f}, Fees=${fees:.3f}, "
+                           f"Duration={duration_hours:.2f}h, Side={side}, "
+                           f"Final_Reward={reward:.3f}")
            
            return reward
            
@@ -3982,55 +4183,143 @@ class TradingDashboard:
            return None
    
    def _prepare_rl_state(self, market_data, training_episode):
-        """Prepare state representation for RL training"""
+        """Prepare enhanced state representation for RL training with comprehensive market context"""
        try:
            # Calculate technical indicators
            df = market_data.copy()
            
-            # Price features
+            # Basic price features
            df['returns'] = df['close'].pct_change()
+            df['log_returns'] = np.log(df['close'] / df['close'].shift(1))
            df['price_ma_5'] = df['close'].rolling(5).mean()
            df['price_ma_20'] = df['close'].rolling(20).mean()
+            df['price_ma_50'] = df['close'].rolling(50).mean()
            
-            # Volatility
+            # Volatility and risk metrics
            df['volatility'] = df['returns'].rolling(10).std()
+            df['volatility_ma'] = df['volatility'].rolling(5).mean()
+            df['max_drawdown'] = (df['close'] / df['close'].cummax() - 1).rolling(20).min()
            
-            # RSI
+            # Momentum indicators
            df['rsi'] = self._calculate_rsi(df['close'])
+            df['rsi_ma'] = df['rsi'].rolling(5).mean()
+            df['momentum'] = df['close'] / df['close'].shift(10) - 1  # 10-period momentum
            
-            # Volume features
+            # Volume analysis
            df['volume_ma'] = df['volume'].rolling(10).mean()
            df['volume_ratio'] = df['volume'] / df['volume_ma']
+            df['volume_trend'] = df['volume_ma'] / df['volume_ma'].shift(5) - 1
+            
+            # Market structure
+            df['higher_highs'] = (df['high'] > df['high'].shift(1)).rolling(5).sum() / 5
+            df['lower_lows'] = (df['low'] < df['low'].shift(1)).rolling(5).sum() / 5
+            df['trend_strength'] = df['higher_highs'] - df['lower_lows']
+            
+            # Support/Resistance levels (simplified)
+            df['distance_to_high'] = (df['high'].rolling(20).max() - df['close']) / df['close']
+            df['distance_to_low'] = (df['close'] - df['low'].rolling(20).min()) / df['close']
+            
+            # Time-based features
+            df['hour'] = df.index.hour if hasattr(df.index, 'hour') else 12  # Default to noon
+            df['is_market_hours'] = ((df['hour'] >= 9) & (df['hour'] <= 16)).astype(float)
            
            # Drop NaN values
            df = df.dropna()
            
            if df.empty:
+                logger.warning("Empty dataframe after technical indicators calculation")
                return None
            
-            # Take the last row as the state (most recent before trade)
+            # Enhanced state features (normalized)
            state_features = [
+                # Price momentum and trend
                df['returns'].iloc[-1],
-                df['price_ma_5'].iloc[-1] / df['close'].iloc[-1] - 1,  # Normalized MA ratio
-                df['price_ma_20'].iloc[-1] / df['close'].iloc[-1] - 1,
+                df['log_returns'].iloc[-1],
+                (df['price_ma_5'].iloc[-1] / df['close'].iloc[-1] - 1),
+                (df['price_ma_20'].iloc[-1] / df['close'].iloc[-1] - 1),
+                (df['price_ma_50'].iloc[-1] / df['close'].iloc[-1] - 1),
+                df['momentum'].iloc[-1],
+                df['trend_strength'].iloc[-1],
+                
+                # Volatility and risk
                df['volatility'].iloc[-1],
+                df['volatility_ma'].iloc[-1],
+                df['max_drawdown'].iloc[-1],
+                
+                # Momentum indicators
                df['rsi'].iloc[-1] / 100.0,  # Normalize RSI to 0-1
-                df['volume_ratio'].iloc[-1]
+                df['rsi_ma'].iloc[-1] / 100.0,
+                
+                # Volume analysis
+                df['volume_ratio'].iloc[-1],
+                df['volume_trend'].iloc[-1],
+                
+                # Market structure
+                df['distance_to_high'].iloc[-1],
+                df['distance_to_low'].iloc[-1],
+                
+                # Time features
+                df['hour'].iloc[-1] / 24.0,  # Normalize hour to 0-1
+                df['is_market_hours'].iloc[-1],
            ]
            
-            # Add trade-specific features
+            # Add Williams pivot points features (250 features)
+            try:
+                pivot_features = self._get_williams_pivot_features(df)
+                if pivot_features:
+                    state_features.extend(pivot_features)
+                else:
+                    state_features.extend([0.0] * 250)  # Default if calculation fails
+            except Exception as e:
+                logger.warning(f"Error calculating Williams pivot points: {e}")
+                state_features.extend([0.0] * 250)  # Default features
+            
+            # Add multi-timeframe OHLCV features (300 features)
+            try:
+                multi_tf_features = self._get_multi_timeframe_features(training_episode.get('symbol', 'ETH/USDT'))
+                if multi_tf_features:
+                    state_features.extend(multi_tf_features)
+                else:
+                    state_features.extend([0.0] * 300)  # Default if calculation fails
+            except Exception as e:
+                logger.warning(f"Error calculating multi-timeframe features: {e}")
+                state_features.extend([0.0] * 300)  # Default features
+            
+            # Add trade-specific context
            entry_price = training_episode['entry_price']
            current_price = df['close'].iloc[-1]
            
-            state_features.extend([
-                (current_price - entry_price) / entry_price,  # Price change since entry
+            trade_features = [
+                (current_price - entry_price) / entry_price,  # Unrealized P&L
                training_episode['duration_seconds'] / 3600.0,  # Duration in hours
-            ])
+                1.0 if training_episode['side'] == 'LONG' else 0.0,  # Position side
+                min(training_episode['duration_seconds'] / 14400.0, 1.0),  # Time pressure (0-4h normalized)
+            ]
+            
+            state_features.extend(trade_features)
+            
+            # Add recent volatility context (last 3 periods)
+            if len(df) >= 3:
+                recent_volatility = [
+                    df['volatility'].iloc[-3],
+                    df['volatility'].iloc[-2], 
+                    df['volatility'].iloc[-1]
+                ]
+                state_features.extend(recent_volatility)
+            else:
+                state_features.extend([0.0, 0.0, 0.0])
+            
+            # Ensure all features are valid numbers
+            state_features = [float(x) if pd.notna(x) and np.isfinite(x) else 0.0 for x in state_features]
+            
+            logger.debug(f"[RL_STATE] Prepared {len(state_features)} features for trade #{training_episode.get('trade_id')} (including Williams pivot points and multi-timeframe)")
            
            return np.array(state_features, dtype=np.float32)
            
        except Exception as e:
-            logger.warning(f"Error preparing RL state: {e}")
+            logger.warning(f"Error preparing enhanced RL state: {e}")
+            import traceback
+            logger.debug(traceback.format_exc())
            return None
    
    def _send_rl_training_step(self, state, action, reward, training_episode):
@@ -4154,6 +4443,210 @@ class TradingDashboard:
        except Exception as e:
            logger.error(f"Error stopping streaming: {e}")

+    def _get_williams_pivot_features(self, df: pd.DataFrame) -> Optional[List[float]]:
+        """Calculate Williams Market Structure pivot points features"""
+        try:
+            # Import Williams Market Structure
+            try:
+                from training.williams_market_structure import WilliamsMarketStructure
+            except ImportError:
+                logger.warning("Williams Market Structure not available")
+                return None
+            
+            # Convert DataFrame to numpy array for Williams calculation
+            if len(df) < 50:
+                return None
+                
+            ohlcv_array = np.array([
+                [df.index[i].timestamp() if hasattr(df.index[i], 'timestamp') else time.time(),
+                 df['open'].iloc[i], df['high'].iloc[i], df['low'].iloc[i], 
+                 df['close'].iloc[i], df['volume'].iloc[i]]
+                for i in range(len(df))
+            ])
+            
+            # Calculate Williams pivot points
+            williams = WilliamsMarketStructure()
+            structure_levels = williams.calculate_recursive_pivot_points(ohlcv_array)
+            
+            # Extract features (250 features total)
+            pivot_features = williams.extract_features_for_rl(structure_levels)
+            
+            logger.debug(f"[PIVOT] Calculated {len(pivot_features)} Williams pivot features")
+            return pivot_features
+            
+        except Exception as e:
+            logger.warning(f"Error calculating Williams pivot features: {e}")
+            return None
+    
+    def _get_multi_timeframe_features(self, symbol: str) -> Optional[List[float]]:
+        """Get multi-timeframe OHLCV features for comprehensive market context"""
+        try:
+            features = []
+            timeframes = ['1m', '5m', '15m', '1h', '4h', '1d']  # 6 timeframes
+            
+            for timeframe in timeframes:
+                try:
+                    # Get data for this timeframe
+                    df = self.data_provider.get_historical_data(
+                        symbol=symbol,
+                        timeframe=timeframe,
+                        limit=50,  # Last 50 bars
+                        refresh=True
+                    )
+                    
+                    if df is not None and not df.empty and len(df) >= 10:
+                        # Calculate normalized features for this timeframe
+                        tf_features = self._extract_timeframe_features(df, timeframe)
+                        features.extend(tf_features)
+                    else:
+                        # Fill with zeros if no data
+                        features.extend([0.0] * 50)  # 50 features per timeframe
+                        
+                except Exception as e:
+                    logger.debug(f"Error getting {timeframe} data: {e}")
+                    features.extend([0.0] * 50)  # 50 features per timeframe
+            
+            # Total: 6 timeframes * 50 features = 300 features
+            return features[:300]
+            
+        except Exception as e:
+            logger.warning(f"Error calculating multi-timeframe features: {e}")
+            return None
+    
+    def _extract_timeframe_features(self, df: pd.DataFrame, timeframe: str) -> List[float]:
+        """Extract normalized features from a single timeframe"""
+        try:
+            features = []
+            
+            # Price action features (10 features)
+            if len(df) >= 10:
+                close_prices = df['close'].tail(10).values
+                
+                # Price momentum and trends
+                features.extend([
+                    (close_prices[-1] - close_prices[0]) / close_prices[0],  # Total change
+                    (close_prices[-1] - close_prices[-2]) / close_prices[-2],  # Last change
+                    (close_prices[-1] - close_prices[-5]) / close_prices[-5],  # 5-period change
+                    np.std(close_prices) / np.mean(close_prices),  # Normalized volatility
+                    (np.max(close_prices) - np.min(close_prices)) / np.mean(close_prices),  # Range
+                ])
+                
+                # Trend direction indicators
+                higher_highs = sum(1 for i in range(1, len(close_prices)) if close_prices[i] > close_prices[i-1])
+                features.extend([
+                    higher_highs / (len(close_prices) - 1),  # % higher highs
+                    (len(close_prices) - 1 - higher_highs) / (len(close_prices) - 1),  # % lower highs
+                ])
+                
+                # Price position in range
+                current_price = close_prices[-1]
+                price_min = np.min(close_prices)
+                price_max = np.max(close_prices)
+                price_range = price_max - price_min
+                
+                if price_range > 0:
+                    features.extend([
+                        (current_price - price_min) / price_range,  # Position in range (0-1)
+                        (price_max - current_price) / price_range,  # Distance from high
+                        (current_price - price_min) / price_range,  # Distance from low
+                    ])
+                else:
+                    features.extend([0.5, 0.5, 0.5])
+            else:
+                features.extend([0.0] * 10)
+            
+            # Volume features (10 features)
+            if 'volume' in df.columns and len(df) >= 10:
+                volumes = df['volume'].tail(10).values
+                
+                features.extend([
+                    volumes[-1] / np.mean(volumes) if np.mean(volumes) > 0 else 1.0,  # Current vs avg
+                    np.std(volumes) / np.mean(volumes) if np.mean(volumes) > 0 else 0.0,  # Volume volatility
+                    (volumes[-1] - volumes[-2]) / volumes[-2] if volumes[-2] > 0 else 0.0,  # Volume change
+                    np.max(volumes) / np.mean(volumes) if np.mean(volumes) > 0 else 1.0,  # Max spike
+                    np.min(volumes) / np.mean(volumes) if np.mean(volumes) > 0 else 1.0,  # Min ratio
+                ])
+                
+                # Volume trend
+                volume_trend = np.polyfit(range(len(volumes)), volumes, 1)[0]
+                features.append(volume_trend / np.mean(volumes) if np.mean(volumes) > 0 else 0.0)
+                
+                # Pad remaining volume features
+                features.extend([0.0] * 4)
+            else:
+                features.extend([0.0] * 10)
+            
+            # Technical indicators (20 features)
+            try:
+                # RSI
+                rsi = self._calculate_rsi(df['close'])
+                features.append(rsi.iloc[-1] / 100.0 if not rsi.empty else 0.5)
+                
+                # Moving averages
+                if len(df) >= 20:
+                    sma_20 = df['close'].rolling(20).mean()
+                    features.append((df['close'].iloc[-1] - sma_20.iloc[-1]) / sma_20.iloc[-1])
+                else:
+                    features.append(0.0)
+                
+                if len(df) >= 50:
+                    sma_50 = df['close'].rolling(50).mean()
+                    features.append((df['close'].iloc[-1] - sma_50.iloc[-1]) / sma_50.iloc[-1])
+                else:
+                    features.append(0.0)
+                
+                # MACD approximation
+                if len(df) >= 26:
+                    ema_12 = df['close'].ewm(span=12).mean()
+                    ema_26 = df['close'].ewm(span=26).mean()
+                    macd = ema_12 - ema_26
+                    features.append(macd.iloc[-1] / df['close'].iloc[-1])
+                else:
+                    features.append(0.0)
+                
+                # Bollinger Bands approximation
+                if len(df) >= 20:
+                    bb_middle = df['close'].rolling(20).mean()
+                    bb_std = df['close'].rolling(20).std()
+                    bb_upper = bb_middle + (bb_std * 2)
+                    bb_lower = bb_middle - (bb_std * 2)
+                    
+                    current_price = df['close'].iloc[-1]
+                    features.extend([
+                        (current_price - bb_lower.iloc[-1]) / (bb_upper.iloc[-1] - bb_lower.iloc[-1]) if bb_upper.iloc[-1] != bb_lower.iloc[-1] else 0.5,
+                        (bb_upper.iloc[-1] - current_price) / (bb_upper.iloc[-1] - bb_lower.iloc[-1]) if bb_upper.iloc[-1] != bb_lower.iloc[-1] else 0.5,
+                    ])
+                else:
+                    features.extend([0.5, 0.5])
+                
+                # Pad remaining technical features
+                features.extend([0.0] * 14)
+                
+            except Exception as e:
+                logger.debug(f"Error calculating technical indicators for {timeframe}: {e}")
+                features.extend([0.0] * 20)
+            
+            # Timeframe-specific features (10 features)
+            timeframe_weights = {
+                '1m': [1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                '5m': [0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
+                '15m': [0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
+                '1h': [0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
+                '4h': [0.0, 0.0, 0.0, 0.0, 1.0, 0.0],
+                '1d': [0.0, 0.0, 0.0, 0.0, 0.0, 1.0],
+            }
+            
+            # Add timeframe encoding
+            features.extend(timeframe_weights.get(timeframe, [0.0] * 6))
+            features.extend([0.0] * 4)  # Pad to 10 features
+            
+            # Ensure exactly 50 features per timeframe
+            return features[:50]
+            
+        except Exception as e:
+            logger.warning(f"Error extracting features for {timeframe}: {e}")
+            return [0.0] * 50
+

 def create_dashboard(data_provider: DataProvider = None, orchestrator: TradingOrchestrator = None, trading_executor: TradingExecutor = None) -> TradingDashboard:
    """Factory function to create a trading dashboard"""