diff --git a/REALTIME_RL_LEARNING_IMPLEMENTATION.md b/REALTIME_RL_LEARNING_IMPLEMENTATION.md deleted file mode 100644 index 4c42274..0000000 --- a/REALTIME_RL_LEARNING_IMPLEMENTATION.md +++ /dev/null @@ -1,335 +0,0 @@ -# Real-Time RL Learning Implementation - -## Overview - -This implementation transforms your trading system from using mock/simulated training to **real continuous learning** from every actual trade execution. The RL agent now learns and adapts from each trade signal and position closure, making progressively better decisions over time. - -## Key Features - -### ✅ **Real Trade Learning** -- Learns from every actual BUY/SELL signal execution -- Records position closures with actual P&L and fees -- Creates training experiences from real market outcomes -- No more mock training - every trade teaches the AI - -### ✅ **Continuous Adaptation** -- Trains after every few trades (configurable frequency) -- Adapts decision-making based on recent performance -- Improves confidence calibration over time -- Updates strategy based on market conditions - -### ✅ **Intelligent State Representation** -- 100-dimensional state vector capturing: - - Price momentum and returns (last 20 bars) - - Volume patterns and changes - - Technical indicators (RSI, MACD) - - Current position and P&L status - - Market regime (trending/ranging/volatile) - - Support/resistance levels - -### ✅ **Sophisticated Reward System** -- Base reward from actual P&L (normalized by price) -- Time penalty for slow trades -- Confidence bonus for high-confidence correct predictions -- Scaled and bounded rewards for stable learning - -### ✅ **Experience Replay with Prioritization** -- Stores all trading experiences in memory -- Prioritizes learning from significant outcomes -- Uses DQN with target networks for stable learning -- Implements proper TD-error based updates - -## Implementation Architecture - -### Core Components - -1. **`RealTimeRLTrainer`** - Main learning coordinator -2. **`TradingExperience`** - Represents individual trade outcomes -3. **`MarketStateBuilder`** - Constructs state vectors from market data -4. **Integration with `TradingExecutor`** - Seamless live trading integration - -### Data Flow - -``` -Trade Signal → Record State → Execute Trade → Record Outcome → Learn → Update Model - ↑ ↓ -Market Data Updates ←-------- Improved Predictions ←-------- Better Decisions -``` - -### Learning Process - -1. **Signal Recording**: When a trade signal is generated: - - Current market state is captured (100-dim vector) - - Action and confidence are recorded - - Position information is stored - -2. **Position Closure**: When a position is closed: - - Exit price and actual P&L are recorded - - Trading fees are included - - Holding time is calculated - - Reward is computed using sophisticated formula - -3. **Experience Creation**: - - Complete trading experience is created - - Added to agent's memory for learning - - Triggers training if conditions are met - -4. **Model Training**: - - DQN training with experience replay - - Target network updates for stability - - Epsilon decay for exploration/exploitation balance - -## Configuration - -### RL Learning Settings (`config.yaml`) - -```yaml -rl_learning: - enabled: true # Enable real-time RL learning - state_size: 100 # Size of state vector - learning_rate: 0.0001 # Learning rate for neural network - gamma: 0.95 # Discount factor for future rewards - epsilon: 0.1 # Exploration rate (low for live trading) - buffer_size: 10000 # Experience replay buffer size - batch_size: 32 # Training batch size - training_frequency: 3 # Train every N completed trades - save_frequency: 50 # Save model every N experiences - min_experiences: 10 # Minimum experiences before training starts - - # Reward shaping parameters - time_penalty_threshold: 300 # Seconds before time penalty applies - confidence_bonus_threshold: 0.7 # Confidence level for bonus rewards - - # Model persistence - model_save_path: "models/realtime_rl" - auto_load_model: true # Load existing model on startup -``` - -### MEXC Trading Integration - -```yaml -mexc_trading: - rl_learning_enabled: true # Enable RL learning from trade executions -``` - -## Usage - -### Automatic Learning (Default) - -The system automatically learns from trades when enabled: - -```python -# RL learning happens automatically during trading -executor = TradingExecutor("config.yaml") -success = executor.execute_signal("ETH/USDC", "BUY", 0.8, 3000) -``` - -### Manual Controls - -```python -# Get RL prediction for current market state -action, confidence = executor.get_rl_prediction("ETH/USDC") - -# Get training statistics -stats = executor.get_rl_training_stats() - -# Control training -executor.enable_rl_training(False) # Disable learning -executor.enable_rl_training(True) # Re-enable learning - -# Save model manually -executor.save_rl_model() -``` - -### Testing the Implementation - -```bash -# Run comprehensive tests -python test_realtime_rl_learning.py -``` - -## Learning Progress Tracking - -### Performance Metrics - -- **Total Experiences**: Number of completed trades learned from -- **Win Rate**: Percentage of profitable trades -- **Average Reward**: Mean reward per trading experience -- **Memory Size**: Number of experiences in replay buffer -- **Epsilon**: Current exploration rate -- **Training Loss**: Recent neural network training loss - -### Example Output - -``` -RL Training: Loss=0.0234, Epsilon=0.095, Avg Reward=0.1250, Memory Size=45 -Recorded experience: ETH/USDC PnL=$15.50 Reward=0.1876 (Win rate: 73.3%) -``` - -## Model Persistence - -### Automatic Saving -- Model automatically saves every N trades (configurable) -- Training history and performance stats are preserved -- Models are saved in `models/realtime_rl/` directory - -### Model Loading -- Existing models are automatically loaded on startup -- Training continues from where it left off -- No loss of learning progress - -## Advanced Features - -### State Vector Components - -| Index Range | Feature Type | Description | -|-------------|--------------|-------------| -| 0-19 | Price Returns | Last 20 normalized price returns | -| 20-22 | Momentum | 5-bar, 10-bar momentum + volatility | -| 30-39 | Volume | Recent volume changes | -| 40 | Volume Momentum | 5-bar volume momentum | -| 50-52 | Technical Indicators | RSI, MACD, MACD change | -| 60-62 | Position Info | Current position, P&L, balance | -| 70-72 | Market Regime | Trend, volatility, support/resistance | - -### Reward Calculation - -```python -# Base reward from P&L -base_reward = (pnl - fees) / entry_price - -# Time penalty for slow trades -time_penalty = -0.001 * (holding_time / 60) if holding_time > 300 else 0 - -# Confidence bonus for good high-confidence trades -confidence_bonus = 0.01 * confidence if pnl > 0 and confidence > 0.7 else 0 - -# Final scaled reward -reward = tanh((base_reward + time_penalty + confidence_bonus) * 100) * 10 -``` - -### Experience Replay Strategy - -- **Uniform Sampling**: Random selection from all experiences -- **Prioritized Replay**: Higher probability for high-reward/loss experiences -- **Batch Training**: Efficient GPU utilization with batch processing -- **Target Network**: Stable learning with delayed target updates - -## Benefits Over Mock Training - -### 1. **Real Market Learning** -- Learns from actual market conditions -- Adapts to real price movements and volatility -- No artificial or synthetic data bias - -### 2. **True Performance Feedback** -- Real P&L drives learning decisions -- Actual trading fees included in optimization -- Genuine market timing constraints - -### 3. **Continuous Improvement** -- Gets better with every trade -- Adapts to changing market conditions -- Self-improving system over time - -### 4. **Validation Through Trading** -- Performance directly measured by trading results -- No simulation-to-reality gap -- Immediate feedback on decision quality - -## Monitoring and Debugging - -### Key Metrics to Watch - -1. **Learning Progress**: - - Win rate trending upward - - Average reward improving - - Training loss decreasing - -2. **Trading Quality**: - - Higher confidence on winning trades - - Faster profitable trade execution - - Better risk/reward ratios - -3. **Model Health**: - - Stable training loss - - Appropriate epsilon decay - - Memory utilization efficiency - -### Troubleshooting - -#### Low Win Rate -- Check reward calculation parameters -- Verify state representation quality -- Adjust training frequency -- Review market data quality - -#### Unstable Training -- Reduce learning rate -- Increase batch size -- Check for data normalization issues -- Verify target network update frequency - -#### Poor Predictions -- Increase experience buffer size -- Improve state representation -- Add more technical indicators -- Adjust exploration rate - -## Future Enhancements - -### Potential Improvements - -1. **Multi-Asset Learning**: Learn across different trading pairs -2. **Market Regime Adaptation**: Separate models for different market conditions -3. **Ensemble Methods**: Combine multiple RL agents -4. **Transfer Learning**: Apply knowledge across timeframes -5. **Risk-Adjusted Rewards**: Include drawdown and volatility in rewards -6. **Online Learning**: Continuous model updates without replay buffer - -### Advanced Techniques - -1. **Double DQN**: Reduce overestimation bias -2. **Dueling Networks**: Separate value and advantage estimation -3. **Rainbow DQN**: Combine multiple improvements -4. **Actor-Critic Methods**: Policy gradient approaches -5. **Distributional RL**: Learn reward distributions - -## Testing Results - -When you run `python test_realtime_rl_learning.py`, you should see: - -``` -=== Testing Real-Time RL Trainer (Standalone) === -Simulating market data updates... -Simulating trading signals and position closures... -Trade 1: Win Rate=100.0%, Avg Reward=0.1876, Memory Size=1 -Trade 2: Win Rate=100.0%, Avg Reward=0.1876, Memory Size=2 -... -RL Training: Loss=0.0234, Epsilon=0.095, Avg Reward=0.1250, Memory Size=5 - -=== Testing TradingExecutor RL Integration === -RL trainer successfully integrated with TradingExecutor -Initial RL stats: {'total_experiences': 0, 'training_enabled': True, ...} -RL prediction for ETH/USDC: BUY (confidence: 0.67) -... - -REAL-TIME RL LEARNING TEST SUMMARY: - Standalone RL Trainer: PASS - Market State Builder: PASS - TradingExecutor Integration: PASS - -ALL TESTS PASSED! -Your system now features real-time RL learning that: - • Learns from every trade execution and position closure - • Adapts trading decisions based on market outcomes - • Continuously improves decision-making over time - • Tracks performance and learning progress - • Saves and loads trained models automatically -``` - -## Conclusion - -Your trading system now implements **true real-time RL learning** instead of mock training. Every trade becomes a learning opportunity, and the AI continuously improves its decision-making based on actual market outcomes. This creates a self-improving trading system that adapts to market conditions and gets better over time. - -The implementation is production-ready, with proper error handling, model persistence, and comprehensive monitoring. Start trading and watch your AI learn and improve with every decision! \ No newline at end of file diff --git a/closed_trades_history.json b/closed_trades_history.json index b2f2d9a..1c7fd4c 100644 --- a/closed_trades_history.json +++ b/closed_trades_history.json @@ -1,17 +1,17 @@ [ { "trade_id": 1, - "side": "LONG", - "entry_time": "2025-05-28T10:18:07.071013+00:00", - "exit_time": "2025-05-28T10:19:07.607704+00:00", - "entry_price": 2627.61, - "exit_price": 2624.24, - "size": 0.003615, - "gross_pnl": -0.01218255000000125, + "side": "SHORT", + "entry_time": "2025-05-28T08:15:12.599216+00:00", + "exit_time": "2025-05-28T08:15:56.366340+00:00", + "entry_price": 2632.21, + "exit_price": 2631.51, + "size": 0.003043, + "gross_pnl": 0.0021300999999994464, "fees": 0.0, - "net_pnl": -0.01218255000000125, - "duration": "0:01:00.536691", - "symbol": "ETH/USDT", - "mexc_executed": false + "net_pnl": 0.0021300999999994464, + "duration": "0:00:43.767124", + "symbol": "ETH/USDC", + "mexc_executed": true } ] \ No newline at end of file diff --git a/config.yaml b/config.yaml index 2bdfc26..2706c7b 100644 --- a/config.yaml +++ b/config.yaml @@ -1,14 +1,10 @@ # Enhanced Multi-Modal Trading System Configuration # Trading Symbols (extendable/configurable) -# NOTE: Dashboard live data streaming supports symbols with Binance WebSocket streams -# ETH/USDT is primary trading symbol, BTC/USDT provides correlated market data -# MEXC trading supports: ETH/USDC (not ETH/USDT) symbols: - - "ETH/USDT" # Primary trading symbol - Has live price data via Binance WebSocket - - "BTC/USDT" # Correlated asset for strategy analysis - Has live price data via Binance WebSocket - # - "ETH/USDC" # MEXC supports ETHUSDC for API trading but no live price stream - # - "MX/USDT" # No live price data available + - "ETH/USDC" # MEXC supports ETHUSDC for API trading + - "BTC/USDT" + - "MX/USDT" # Timeframes for ultra-fast scalping (500x leverage) timeframes: @@ -147,27 +143,6 @@ trading: base_size: 0.02 # 2% base position max_size: 0.05 # 5% maximum position -# Real-Time RL Learning Configuration -rl_learning: - enabled: true # Enable real-time RL learning from trades - state_size: 100 # Size of state vector for RL agent - learning_rate: 0.0001 # Learning rate for RL agent - gamma: 0.95 # Discount factor for future rewards - epsilon: 0.1 # Exploration rate (low for live trading) - buffer_size: 10000 # Experience replay buffer size - batch_size: 32 # Training batch size - training_frequency: 3 # Train every N completed trades - save_frequency: 50 # Save model every N experiences - min_experiences: 10 # Minimum experiences before training starts - - # Reward shaping parameters - time_penalty_threshold: 300 # Seconds before time penalty applies - confidence_bonus_threshold: 0.7 # Confidence level for bonus rewards - - # Model persistence - model_save_path: "models/realtime_rl" - auto_load_model: true # Load existing model on startup - # MEXC Trading API Configuration mexc_trading: enabled: true # Set to true to enable live trading @@ -190,14 +165,19 @@ mexc_trading: min_trade_interval_seconds: 30 # Minimum between trades # Order configuration - order_type: "market" # market or limit orders + order_type: "limit" # Use limit orders (MEXC ETHUSDC requires LIMIT orders) + timeout_seconds: 30 # Order timeout + retry_attempts: 0 # Number of retry attempts for failed orders - # Advanced features - emergency_stop: false # Emergency stop all trading - allowed_symbols: ["ETH/USDC"] # Allowed trading symbols (MEXC supports ETHUSDC) + # Safety features + require_confirmation: false # No manual confirmation for live trading + emergency_stop: false # Emergency stop all trading - # Real-time learning integration - rl_learning_enabled: true # Enable RL learning from trade executions + # Supported symbols for live trading + allowed_symbols: + - "ETH/USDC" # MEXC supports ETHUSDC for API trading + - "BTC/USDT" + - "MX/USDT" # Trading hours (UTC) trading_hours: diff --git a/core/realtime_rl_trainer.py b/core/realtime_rl_trainer.py deleted file mode 100644 index f10d10c..0000000 --- a/core/realtime_rl_trainer.py +++ /dev/null @@ -1,469 +0,0 @@ -""" -Real-Time RL Training System - -This module implements continuous learning from live trading decisions. -The RL agent learns from every trade signal and position closure to improve -decision-making over time. -""" - -import logging -import numpy as np -import torch -import torch.nn as nn -import torch.optim as optim -from collections import deque -from datetime import datetime, timedelta -from typing import Dict, List, Optional, Tuple, Any -import threading -import time -import json -import os - -# Import existing DQN agent -import sys -sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'NN')) -from NN.models.dqn_agent import DQNAgent - -logger = logging.getLogger(__name__) - -class TradingExperience: - """Represents a single trading experience for RL learning""" - - def __init__(self, - pre_trade_state: np.ndarray, - action: int, # 0=SELL, 1=HOLD, 2=BUY - entry_price: float, - exit_price: float, - holding_time: float, # seconds - pnl: float, - fees: float, - confidence: float, - market_conditions: Dict[str, Any], - timestamp: datetime): - self.pre_trade_state = pre_trade_state - self.action = action - self.entry_price = entry_price - self.exit_price = exit_price - self.holding_time = holding_time - self.pnl = pnl - self.fees = fees - self.confidence = confidence - self.market_conditions = market_conditions - self.timestamp = timestamp - - # Calculate reward - self.reward = self._calculate_reward() - - def _calculate_reward(self) -> float: - """Calculate reward for this trading experience""" - # Net PnL after fees - net_pnl = self.pnl - self.fees - - # Base reward from PnL (normalized by entry price) - base_reward = net_pnl / self.entry_price - - # Time penalty - prefer faster profitable trades - time_penalty = 0.0 - if self.holding_time > 300: # 5 minutes - time_penalty = -0.001 * (self.holding_time / 60) # -0.001 per minute - - # Confidence bonus - reward high-confidence correct decisions - confidence_bonus = 0.0 - if net_pnl > 0 and self.confidence > 0.7: - confidence_bonus = 0.01 * self.confidence - - # Volume consideration (prefer trades that move significant amounts) - volume_factor = min(abs(base_reward) * 10, 0.05) # Cap at 5% - - total_reward = base_reward + time_penalty + confidence_bonus - - # Scale reward to reasonable range - return np.tanh(total_reward * 100) * 10 # Scale and bound reward - - -class MarketStateBuilder: - """Builds state representations for RL agent from market data""" - - def __init__(self, state_size: int = 100): - self.state_size = state_size - self.price_history = deque(maxlen=50) - self.volume_history = deque(maxlen=50) - self.rsi_history = deque(maxlen=14) - self.macd_history = deque(maxlen=26) - - def update_market_data(self, price: float, volume: float, - rsi: float = None, macd: float = None): - """Update market data buffers""" - self.price_history.append(price) - self.volume_history.append(volume) - if rsi is not None: - self.rsi_history.append(rsi) - if macd is not None: - self.macd_history.append(macd) - - def build_state(self, current_position: str = 'NONE', - position_pnl: float = 0.0, - account_balance: float = 1000.0) -> np.ndarray: - """Build state vector for RL agent""" - state = np.zeros(self.state_size) - - try: - # Price features (normalized returns) - if len(self.price_history) >= 2: - prices = np.array(list(self.price_history)) - returns = np.diff(prices) / prices[:-1] - - # Recent returns (last 20) - recent_returns = returns[-20:] if len(returns) >= 20 else returns - state[:len(recent_returns)] = recent_returns - - # Price momentum features - state[20] = np.mean(returns[-5:]) if len(returns) >= 5 else 0 # 5-bar momentum - state[21] = np.mean(returns[-10:]) if len(returns) >= 10 else 0 # 10-bar momentum - state[22] = np.std(returns[-10:]) if len(returns) >= 10 else 0 # Volatility - - # Volume features - if len(self.volume_history) >= 2: - volumes = np.array(list(self.volume_history)) - volume_changes = np.diff(volumes) / volumes[:-1] - recent_volume_changes = volume_changes[-10:] if len(volume_changes) >= 10 else volume_changes - state[30:30+len(recent_volume_changes)] = recent_volume_changes - - # Volume momentum - state[40] = np.mean(volume_changes[-5:]) if len(volume_changes) >= 5 else 0 - - # Technical indicators - if len(self.rsi_history) >= 1: - state[50] = (list(self.rsi_history)[-1] - 50) / 50 # Normalized RSI - - if len(self.macd_history) >= 2: - macd_values = list(self.macd_history) - state[51] = macd_values[-1] / 100 # Normalized MACD - state[52] = (macd_values[-1] - macd_values[-2]) / 100 # MACD change - - # Position information - position_encoding = {'NONE': 0, 'LONG': 1, 'SHORT': -1} - state[60] = position_encoding.get(current_position, 0) - state[61] = position_pnl / 100 # Normalized PnL - state[62] = account_balance / 1000 # Normalized balance - - # Market regime features - if len(self.price_history) >= 20: - prices = np.array(list(self.price_history)) - - # Trend strength - state[70] = (prices[-1] - prices[-20]) / prices[-20] # 20-bar trend - - # Market volatility regime - returns = np.diff(prices) / prices[:-1] - state[71] = np.std(returns[-20:]) if len(returns) >= 20 else 0 - - # Support/resistance levels - high_20 = np.max(prices[-20:]) - low_20 = np.min(prices[-20:]) - current_price = prices[-1] - state[72] = (current_price - low_20) / (high_20 - low_20) if high_20 != low_20 else 0.5 - - except Exception as e: - logger.error(f"Error building state: {e}") - - return state - - -class RealTimeRLTrainer: - """Real-time RL trainer that learns from live trading decisions""" - - def __init__(self, config: Dict[str, Any] = None): - """Initialize the real-time RL trainer""" - self.config = config or {} - - # RL Agent configuration - state_size = self.config.get('state_size', 100) - action_size = 3 # BUY, HOLD, SELL - - # Initialize RL agent - self.agent = DQNAgent( - state_shape=(state_size,), - n_actions=action_size, - learning_rate=self.config.get('learning_rate', 0.0001), - gamma=self.config.get('gamma', 0.95), - epsilon=self.config.get('epsilon', 0.1), # Low epsilon for live trading - epsilon_min=0.05, - epsilon_decay=0.999, - buffer_size=self.config.get('buffer_size', 10000), - batch_size=self.config.get('batch_size', 32) - ) - - # Market state builder - self.state_builder = MarketStateBuilder(state_size) - - # Training data storage - self.pending_trades = {} # symbol -> trade info - self.completed_experiences = deque(maxlen=1000) - self.learning_history = [] - - # Training controls - self.training_enabled = self.config.get('training_enabled', True) - self.min_experiences_for_training = self.config.get('min_experiences', 10) - self.training_frequency = self.config.get('training_frequency', 5) # Train every N experiences - self.experience_count = 0 - - # Model saving - self.model_save_path = self.config.get('model_save_path', 'models/realtime_rl') - self.save_frequency = self.config.get('save_frequency', 100) # Save every N experiences - - # Performance tracking - self.performance_history = [] - self.recent_rewards = deque(maxlen=100) - self.trade_count = 0 - self.win_count = 0 - - # Threading for async training - self.training_thread = None - self.training_queue = deque() - self.training_lock = threading.Lock() - - logger.info(f"Real-time RL trainer initialized") - logger.info(f"State size: {state_size}, Action size: {action_size}") - logger.info(f"Training enabled: {self.training_enabled}") - - def update_market_data(self, symbol: str, price: float, volume: float, - rsi: float = None, macd: float = None): - """Update market data for state building""" - self.state_builder.update_market_data(price, volume, rsi, macd) - - def record_trade_signal(self, symbol: str, action: str, confidence: float, - current_price: float, position_info: Dict[str, Any] = None): - """Record a trade signal for future learning""" - try: - # Build current state - current_position = 'NONE' - position_pnl = 0.0 - account_balance = 1000.0 - - if position_info: - current_position = position_info.get('side', 'NONE') - position_pnl = position_info.get('unrealized_pnl', 0.0) - account_balance = position_info.get('account_balance', 1000.0) - - state = self.state_builder.build_state(current_position, position_pnl, account_balance) - - # Convert action to numeric - action_map = {'SELL': 0, 'HOLD': 1, 'BUY': 2} - action_num = action_map.get(action.upper(), 1) - - # Store pending trade - trade_info = { - 'pre_trade_state': state.copy(), - 'action': action_num, - 'entry_price': current_price, - 'confidence': confidence, - 'entry_time': datetime.now(), - 'market_conditions': { - 'volatility': np.std(list(self.state_builder.price_history)[-10:]) if len(self.state_builder.price_history) >= 10 else 0, - 'trend': state[70] if len(state) > 70 else 0, - 'volume_trend': state[40] if len(state) > 40 else 0 - } - } - - if action.upper() in ['BUY', 'SELL']: - self.pending_trades[symbol] = trade_info - logger.info(f"Recorded {action} signal for {symbol} at ${current_price:.2f} (confidence: {confidence:.2f})") - - except Exception as e: - logger.error(f"Error recording trade signal: {e}") - - def record_position_closure(self, symbol: str, exit_price: float, - pnl: float, fees: float): - """Record position closure and create learning experience""" - try: - if symbol not in self.pending_trades: - logger.warning(f"No pending trade found for {symbol}") - return - - trade_info = self.pending_trades.pop(symbol) - - # Calculate holding time - holding_time = (datetime.now() - trade_info['entry_time']).total_seconds() - - # Create trading experience - experience = TradingExperience( - pre_trade_state=trade_info['pre_trade_state'], - action=trade_info['action'], - entry_price=trade_info['entry_price'], - exit_price=exit_price, - holding_time=holding_time, - pnl=pnl, - fees=fees, - confidence=trade_info['confidence'], - market_conditions=trade_info['market_conditions'], - timestamp=datetime.now() - ) - - # Add to completed experiences - self.completed_experiences.append(experience) - self.recent_rewards.append(experience.reward) - self.experience_count += 1 - self.trade_count += 1 - - if experience.reward > 0: - self.win_count += 1 - - # Log the experience - logger.info(f"Recorded experience: {symbol} PnL=${pnl:.4f} Reward={experience.reward:.4f} " - f"(Win rate: {self.win_count/self.trade_count*100:.1f}%)") - - # Create next state (current market state after trade) - current_state = self.state_builder.build_state('NONE', 0.0, 1000.0) - - # Store in agent memory for learning - self.agent.remember( - state=trade_info['pre_trade_state'], - action=trade_info['action'], - reward=experience.reward, - next_state=current_state, - done=True # Each trade is a complete episode - ) - - # Trigger training if conditions are met - if self.training_enabled: - self._maybe_train() - - # Save model periodically - if self.experience_count % self.save_frequency == 0: - self._save_model() - - except Exception as e: - logger.error(f"Error recording position closure: {e}") - - def _maybe_train(self): - """Train the agent if conditions are met""" - try: - if (len(self.agent.memory) >= self.min_experiences_for_training and - self.experience_count % self.training_frequency == 0): - - # Perform training step - loss = self.agent.replay() - - if loss is not None: - self.learning_history.append({ - 'timestamp': datetime.now().isoformat(), - 'experience_count': self.experience_count, - 'loss': loss, - 'epsilon': self.agent.epsilon, - 'avg_reward': np.mean(list(self.recent_rewards)) if self.recent_rewards else 0, - 'win_rate': self.win_count / self.trade_count if self.trade_count > 0 else 0, - 'memory_size': len(self.agent.memory) - }) - - logger.info(f"RL Training: Loss={loss:.4f}, Epsilon={self.agent.epsilon:.3f}, " - f"Avg Reward={np.mean(list(self.recent_rewards)):.4f}, " - f"Memory Size={len(self.agent.memory)}") - - except Exception as e: - logger.error(f"Error in training: {e}") - - def get_action_prediction(self, symbol: str, current_position: str = 'NONE', - position_pnl: float = 0.0, account_balance: float = 1000.0) -> Tuple[str, float]: - """Get action prediction from trained RL agent""" - try: - # Build current state - state = self.state_builder.build_state(current_position, position_pnl, account_balance) - - # Get prediction from agent - with torch.no_grad(): - q_values, _, _, _, _ = self.agent.policy_net( - torch.FloatTensor(state).unsqueeze(0).to(self.agent.device) - ) - - # Get action with highest Q-value - action_idx = q_values.argmax().item() - confidence = torch.softmax(q_values, dim=1).max().item() - - # Convert to action string - action_map = {0: 'SELL', 1: 'HOLD', 2: 'BUY'} - action = action_map[action_idx] - - return action, confidence - - except Exception as e: - logger.error(f"Error getting action prediction: {e}") - return 'HOLD', 0.5 - - def get_training_stats(self) -> Dict[str, Any]: - """Get current training statistics""" - try: - return { - 'total_experiences': self.experience_count, - 'total_trades': self.trade_count, - 'win_count': self.win_count, - 'win_rate': self.win_count / self.trade_count if self.trade_count > 0 else 0, - 'avg_reward': np.mean(list(self.recent_rewards)) if self.recent_rewards else 0, - 'memory_size': len(self.agent.memory), - 'epsilon': self.agent.epsilon, - 'recent_loss': self.learning_history[-1]['loss'] if self.learning_history else 0, - 'training_enabled': self.training_enabled, - 'pending_trades': len(self.pending_trades) - } - - except Exception as e: - logger.error(f"Error getting training stats: {e}") - return {} - - def _save_model(self): - """Save the trained model""" - try: - os.makedirs(self.model_save_path, exist_ok=True) - - # Save RL agent - self.agent.save(os.path.join(self.model_save_path, 'rl_agent')) - - # Save training history - history_path = os.path.join(self.model_save_path, 'training_history.json') - with open(history_path, 'w') as f: - json.dump(self.learning_history, f, indent=2) - - # Save performance stats - stats_path = os.path.join(self.model_save_path, 'performance_stats.json') - with open(stats_path, 'w') as f: - json.dump(self.get_training_stats(), f, indent=2) - - logger.info(f"Saved RL model and training data to {self.model_save_path}") - - except Exception as e: - logger.error(f"Error saving model: {e}") - - def load_model(self): - """Load a previously saved model""" - try: - model_path = os.path.join(self.model_save_path, 'rl_agent') - if os.path.exists(f"{model_path}_policy_model.pt"): - self.agent.load(model_path) - logger.info(f"Loaded RL model from {model_path}") - - # Load training history if available - history_path = os.path.join(self.model_save_path, 'training_history.json') - if os.path.exists(history_path): - with open(history_path, 'r') as f: - self.learning_history = json.load(f) - - return True - else: - logger.info("No saved model found, starting with fresh model") - return False - - except Exception as e: - logger.error(f"Error loading model: {e}") - return False - - def enable_training(self, enabled: bool = True): - """Enable or disable training""" - self.training_enabled = enabled - logger.info(f"RL training {'enabled' if enabled else 'disabled'}") - - def reset_performance_stats(self): - """Reset performance tracking statistics""" - self.trade_count = 0 - self.win_count = 0 - self.recent_rewards.clear() - logger.info("Reset RL performance statistics") \ No newline at end of file diff --git a/core/trading_executor.py b/core/trading_executor.py index f822ea6..c5e9d38 100644 --- a/core/trading_executor.py +++ b/core/trading_executor.py @@ -9,7 +9,7 @@ import logging import time import os from datetime import datetime, timedelta -from typing import Dict, List, Optional, Any, Tuple +from typing import Dict, List, Optional, Any from dataclasses import dataclass from threading import Lock import sys @@ -20,7 +20,6 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'NN')) from NN.exchanges import MEXCInterface from .config import get_config from .config_sync import ConfigSynchronizer -from .realtime_rl_trainer import RealTimeRLTrainer logger = logging.getLogger(__name__) @@ -120,29 +119,6 @@ class TradingExecutor: mexc_interface=self.exchange if self.trading_enabled else None ) - # Initialize real-time RL trainer for continuous learning - rl_config = { - 'state_size': 100, - 'learning_rate': 0.0001, - 'gamma': 0.95, - 'epsilon': 0.1, # Low exploration for live trading - 'buffer_size': 10000, - 'batch_size': 32, - 'training_enabled': self.mexc_config.get('rl_learning_enabled', True), - 'min_experiences': 10, - 'training_frequency': 3, # Train every 3 trades - 'save_frequency': 50, # Save every 50 trades - 'model_save_path': 'models/realtime_rl' - } - - self.rl_trainer = RealTimeRLTrainer(rl_config) - - # Try to load existing RL model - if self.rl_trainer.load_model(): - logger.info("TRADING EXECUTOR: Loaded existing RL model for continuous learning") - else: - logger.info("TRADING EXECUTOR: Starting with fresh RL model") - # Perform initial fee sync on startup if trading is enabled if self.trading_enabled and self.exchange: try: @@ -213,29 +189,6 @@ class TradingExecutor: return False current_price = ticker['last'] - # Update RL trainer with market data (estimate volume from price movement) - estimated_volume = abs(current_price) * 1000 # Simple volume estimate - self.rl_trainer.update_market_data(symbol, current_price, estimated_volume) - - # Get position info for RL trainer - position_info = None - if symbol in self.positions: - position = self.positions[symbol] - position_info = { - 'side': position.side, - 'unrealized_pnl': position.unrealized_pnl, - 'account_balance': 1000.0 # Could get from exchange - } - - # Record trade signal with RL trainer for learning - self.rl_trainer.record_trade_signal( - symbol=symbol, - action=action, - confidence=confidence, - current_price=current_price, - position_info=position_info - ) - with self.lock: try: if action == 'BUY': @@ -395,14 +348,6 @@ class TradingExecutor: self.trade_history.append(trade_record) self.daily_loss += max(0, -pnl) # Add to daily loss if negative - # Record position closure with RL trainer for learning - self.rl_trainer.record_position_closure( - symbol=symbol, - exit_price=current_price, - pnl=pnl, - fees=0.0 # No fees in simulation - ) - # Remove position del self.positions[symbol] self.last_trade_time[symbol] = datetime.now() @@ -452,14 +397,6 @@ class TradingExecutor: self.trade_history.append(trade_record) self.daily_loss += max(0, -(pnl - fees)) # Add to daily loss if negative - # Record position closure with RL trainer for learning - self.rl_trainer.record_position_closure( - symbol=symbol, - exit_price=current_price, - pnl=pnl, - fees=fees - ) - # Remove position del self.positions[symbol] self.last_trade_time[symbol] = datetime.now() @@ -527,9 +464,6 @@ class TradingExecutor: effective_fee_rate = (total_fees / max(0.01, total_volume)) if total_volume > 0 else 0 fee_impact_on_pnl = (total_fees / max(0.01, abs(gross_pnl))) * 100 if gross_pnl != 0 else 0 - # Get RL training statistics - rl_stats = self.rl_trainer.get_training_stats() if hasattr(self, 'rl_trainer') else {} - return { 'daily_trades': self.daily_trades, 'daily_loss': self.daily_loss, @@ -556,15 +490,6 @@ class TradingExecutor: 'fee_impact_percent': fee_impact_on_pnl, 'is_fee_efficient': fee_impact_on_pnl < 5.0, # Less than 5% impact is good 'fee_savings_vs_market': (0.001 - effective_fee_rate) * total_volume if effective_fee_rate < 0.001 else 0 - }, - 'rl_learning': { - 'enabled': rl_stats.get('training_enabled', False), - 'total_experiences': rl_stats.get('total_experiences', 0), - 'rl_win_rate': rl_stats.get('win_rate', 0), - 'avg_reward': rl_stats.get('avg_reward', 0), - 'memory_size': rl_stats.get('memory_size', 0), - 'epsilon': rl_stats.get('epsilon', 0), - 'pending_trades': rl_stats.get('pending_trades', 0) } } @@ -878,71 +803,3 @@ class TradingExecutor: 'sync_available': False, 'error': str(e) } - - def get_rl_prediction(self, symbol: str) -> Tuple[str, float]: - """Get RL agent prediction for the current market state - - Args: - symbol: Trading symbol - - Returns: - tuple: (action, confidence) where action is BUY/SELL/HOLD - """ - if not hasattr(self, 'rl_trainer'): - return 'HOLD', 0.5 - - try: - # Get current position info - current_position = 'NONE' - position_pnl = 0.0 - account_balance = 1000.0 - - if symbol in self.positions: - position = self.positions[symbol] - current_position = position.side - position_pnl = position.unrealized_pnl - - # Get RL prediction - action, confidence = self.rl_trainer.get_action_prediction( - symbol=symbol, - current_position=current_position, - position_pnl=position_pnl, - account_balance=account_balance - ) - - return action, confidence - - except Exception as e: - logger.error(f"TRADING EXECUTOR: Error getting RL prediction: {e}") - return 'HOLD', 0.5 - - def enable_rl_training(self, enabled: bool = True): - """Enable or disable real-time RL training - - Args: - enabled: Whether to enable RL training - """ - if hasattr(self, 'rl_trainer'): - self.rl_trainer.enable_training(enabled) - logger.info(f"TRADING EXECUTOR: RL training {'enabled' if enabled else 'disabled'}") - else: - logger.warning("TRADING EXECUTOR: RL trainer not initialized") - - def get_rl_training_stats(self) -> Dict[str, Any]: - """Get comprehensive RL training statistics - - Returns: - dict: RL training statistics and performance metrics - """ - if hasattr(self, 'rl_trainer'): - return self.rl_trainer.get_training_stats() - else: - return {'error': 'RL trainer not initialized'} - - def save_rl_model(self): - """Manually save the current RL model""" - if hasattr(self, 'rl_trainer'): - self.rl_trainer._save_model() - logger.info("TRADING EXECUTOR: RL model saved manually") - else: - logger.warning("TRADING EXECUTOR: RL trainer not initialized") diff --git a/test_realtime_rl_learning.py b/test_realtime_rl_learning.py deleted file mode 100644 index dc2f944..0000000 --- a/test_realtime_rl_learning.py +++ /dev/null @@ -1,242 +0,0 @@ -""" -Test script for Real-Time RL Learning - -This script demonstrates the real-time RL learning system that learns -from each trade execution and position closure. -""" - -import os -import sys -import logging -import time -import numpy as np -from datetime import datetime - -# Add core directory to path -sys.path.append(os.path.join(os.path.dirname(__file__), 'core')) -sys.path.append(os.path.join(os.path.dirname(__file__), 'NN')) - -from core.trading_executor import TradingExecutor -from core.realtime_rl_trainer import RealTimeRLTrainer - -# Setup logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) - -def test_rl_trainer_standalone(): - """Test the RL trainer as a standalone component""" - logger.info("=== Testing Real-Time RL Trainer (Standalone) ===") - - # Initialize RL trainer - rl_config = { - 'state_size': 100, - 'learning_rate': 0.0001, - 'training_enabled': True, - 'min_experiences': 5, - 'training_frequency': 2, - 'save_frequency': 10 - } - - trainer = RealTimeRLTrainer(rl_config) - - # Simulate market data updates - logger.info("Simulating market data updates...") - for i in range(20): - price = 3000 + np.random.normal(0, 50) # ETH price around $3000 - volume = 1000 + np.random.normal(0, 100) - rsi = 30 + np.random.uniform(0, 40) - - trainer.update_market_data('ETH/USDC', price, volume, rsi) - time.sleep(0.1) # Small delay - - # Simulate some trading signals and closures - logger.info("Simulating trading signals and position closures...") - - trades = [ - {'action': 'BUY', 'price': 3000, 'confidence': 0.8, 'exit_price': 3020, 'pnl': 20, 'fees': 1.5}, - {'action': 'SELL', 'price': 3020, 'confidence': 0.7, 'exit_price': 3000, 'pnl': 20, 'fees': 1.5}, - {'action': 'BUY', 'price': 3000, 'confidence': 0.6, 'exit_price': 2990, 'pnl': -10, 'fees': 1.5}, - {'action': 'BUY', 'price': 2990, 'confidence': 0.9, 'exit_price': 3050, 'pnl': 60, 'fees': 1.5}, - {'action': 'SELL', 'price': 3050, 'confidence': 0.8, 'exit_price': 3010, 'pnl': 40, 'fees': 1.5}, - ] - - for i, trade in enumerate(trades): - # Record trade signal - trainer.record_trade_signal( - symbol='ETH/USDC', - action=trade['action'], - confidence=trade['confidence'], - current_price=trade['price'] - ) - - # Wait a bit to simulate holding time - time.sleep(0.5) - - # Record position closure - trainer.record_position_closure( - symbol='ETH/USDC', - exit_price=trade['exit_price'], - pnl=trade['pnl'], - fees=trade['fees'] - ) - - # Get training stats - stats = trainer.get_training_stats() - logger.info(f"Trade {i+1}: Win Rate={stats['win_rate']*100:.1f}%, " - f"Avg Reward={stats['avg_reward']:.4f}, " - f"Memory Size={stats['memory_size']}") - - time.sleep(0.2) - - # Test RL prediction - logger.info("Testing RL prediction...") - action, confidence = trainer.get_action_prediction('ETH/USDC') - logger.info(f"RL Prediction: {action} (confidence: {confidence:.2f})") - - # Final stats - final_stats = trainer.get_training_stats() - logger.info(f"Final Stats: {final_stats}") - - return True - -def test_trading_executor_integration(): - """Test RL learning integration with TradingExecutor""" - logger.info("\n=== Testing TradingExecutor RL Integration ===") - - try: - # Initialize trading executor with RL learning - executor = TradingExecutor("config.yaml") - - # Check if RL trainer was initialized - if hasattr(executor, 'rl_trainer'): - logger.info("RL trainer successfully integrated with TradingExecutor") - - # Get initial RL stats - rl_stats = executor.get_rl_training_stats() - logger.info(f"Initial RL stats: {rl_stats}") - - # Test RL prediction - action, confidence = executor.get_rl_prediction('ETH/USDC') - logger.info(f"RL prediction for ETH/USDC: {action} (confidence: {confidence:.2f})") - - # Simulate some trading signals - logger.info("Simulating trading signals through TradingExecutor...") - - test_signals = [ - {'symbol': 'ETH/USDC', 'action': 'BUY', 'confidence': 0.8, 'price': 3000}, - {'symbol': 'ETH/USDC', 'action': 'SELL', 'confidence': 0.7, 'price': 3020}, - {'symbol': 'ETH/USDC', 'action': 'BUY', 'confidence': 0.6, 'price': 3020}, - {'symbol': 'ETH/USDC', 'action': 'SELL', 'confidence': 0.9, 'price': 3040}, - ] - - for signal in test_signals: - success = executor.execute_signal( - symbol=signal['symbol'], - action=signal['action'], - confidence=signal['confidence'], - current_price=signal['price'] - ) - logger.info(f"Signal execution: {signal['action']} {signal['symbol']} - {'Success' if success else 'Failed'}") - time.sleep(1) # Simulate time between trades - - # Get updated stats - daily_stats = executor.get_daily_stats() - rl_learning_stats = daily_stats.get('rl_learning', {}) - logger.info(f"RL Learning Stats: {rl_learning_stats}") - - # Test RL training controls - logger.info("Testing RL training controls...") - executor.enable_rl_training(False) - logger.info("RL training disabled") - - executor.enable_rl_training(True) - logger.info("RL training re-enabled") - - # Save RL model - executor.save_rl_model() - logger.info("RL model saved") - - return True - - else: - logger.error("RL trainer was not initialized in TradingExecutor") - return False - - except Exception as e: - logger.error(f"Error testing TradingExecutor integration: {e}") - return False - -def test_market_state_builder(): - """Test the market state builder component""" - logger.info("\n=== Testing Market State Builder ===") - - from core.realtime_rl_trainer import MarketStateBuilder - - state_builder = MarketStateBuilder(state_size=100) - - # Add some market data - logger.info("Adding market data...") - for i in range(30): - price = 3000 + np.sin(i * 0.1) * 100 + np.random.normal(0, 10) - volume = 1000 + np.random.normal(0, 100) - rsi = 50 + 30 * np.sin(i * 0.05) + np.random.normal(0, 5) - macd = np.sin(i * 0.1) * 10 + np.random.normal(0, 2) - - state_builder.update_market_data(price, volume, rsi, macd) - - # Build states for different scenarios - scenarios = [ - {'position': 'NONE', 'pnl': 0, 'balance': 1000}, - {'position': 'LONG', 'pnl': 50, 'balance': 1050}, - {'position': 'SHORT', 'pnl': -20, 'balance': 980}, - ] - - for scenario in scenarios: - state = state_builder.build_state( - current_position=scenario['position'], - position_pnl=scenario['pnl'], - account_balance=scenario['balance'] - ) - - logger.info(f"State for {scenario['position']} position: " - f"Size={len(state)}, Range=[{state.min():.4f}, {state.max():.4f}]") - - return True - -def main(): - """Run all RL learning tests""" - logger.info("Starting Real-Time RL Learning Tests") - logger.info("=" * 60) - - # Test 1: Standalone RL trainer - trainer_test = test_rl_trainer_standalone() - - # Test 2: Market state builder - state_test = test_market_state_builder() - - # Test 3: TradingExecutor integration - integration_test = test_trading_executor_integration() - - # Summary - logger.info("\n" + "=" * 60) - logger.info("REAL-TIME RL LEARNING TEST SUMMARY:") - logger.info(f" Standalone RL Trainer: {'PASS' if trainer_test else 'FAIL'}") - logger.info(f" Market State Builder: {'PASS' if state_test else 'FAIL'}") - logger.info(f" TradingExecutor Integration: {'PASS' if integration_test else 'FAIL'}") - - if trainer_test and state_test and integration_test: - logger.info("\nALL TESTS PASSED!") - logger.info("Your system now features real-time RL learning that:") - logger.info(" • Learns from every trade execution and position closure") - logger.info(" • Adapts trading decisions based on market outcomes") - logger.info(" • Continuously improves decision-making over time") - logger.info(" • Tracks performance and learning progress") - logger.info(" • Saves and loads trained models automatically") - else: - logger.warning("\nSome tests failed. Check the logs above for details.") - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/web/dashboard.py b/web/dashboard.py index 216170d..b828c42 100644 --- a/web/dashboard.py +++ b/web/dashboard.py @@ -79,44 +79,6 @@ class TradingDashboard: self.trading_executor = trading_executor or TradingExecutor() self.model_registry = get_model_registry() - # IMPORTANT: Multi-symbol live data streaming for strategy analysis - # WebSocket streams are available for major pairs on Binance - # We get live data for correlated assets but only trade the primary symbol - # MEXC trading only supports ETH/USDC (not ETH/USDT) - self.available_symbols = { - 'ETH/USDT': 'ETHUSDT', # Primary trading symbol - Binance WebSocket - 'BTC/USDT': 'BTCUSDT', # Correlated asset - Binance WebSocket - # Note: ETH/USDC is for MEXC trading but no Binance WebSocket - } - - # Primary symbol for trading (first available symbol) - self.primary_symbol = None - self.primary_websocket_symbol = None - - # All symbols for live data streaming (strategy analysis) - self.streaming_symbols = [] - self.websocket_symbols = [] - - # Find primary trading symbol - for symbol in self.config.symbols: - if symbol in self.available_symbols: - self.primary_symbol = symbol - self.primary_websocket_symbol = self.available_symbols[symbol] - logger.info(f"DASHBOARD: Primary trading symbol: {symbol} (WebSocket: {self.primary_websocket_symbol})") - break - - # Fallback to ETH/USDT if no configured symbol is available - if not self.primary_symbol: - self.primary_symbol = 'ETH/USDT' - self.primary_websocket_symbol = 'ETHUSDT' - logger.warning(f"DASHBOARD: No configured symbols available for live data, using fallback: {self.primary_symbol}") - - # Setup all available symbols for streaming (strategy analysis) - for symbol, ws_symbol in self.available_symbols.items(): - self.streaming_symbols.append(symbol) - self.websocket_symbols.append(ws_symbol) - logger.info(f"DASHBOARD: Will stream live data for {symbol} (WebSocket: {ws_symbol})") - # Dashboard state self.recent_decisions = [] self.recent_signals = [] # Track all signals (not just executed trades) @@ -339,15 +301,7 @@ class TradingDashboard: html.I(className="fas fa-chart-pie me-2"), "Session Performance" ], className="card-title mb-2"), - html.Div([ - html.Button( - "Clear Session", - id="clear-session-btn", - className="btn btn-sm btn-outline-warning mb-2", - n_clicks=0 - ), - html.Div(id="session-performance") - ]) + html.Div(id="session-performance") ], className="card-body p-2") ], className="card", style={"width": "32%"}), @@ -424,15 +378,16 @@ class TradingDashboard: """Update all dashboard components with trading signals""" try: # Get current prices with improved fallback handling - symbol = self.primary_symbol # Use configured symbol instead of hardcoded + symbol = self.config.symbols[0] if self.config.symbols else "ETH/USDT" current_price = None chart_data = None data_source = "UNKNOWN" try: # First try WebSocket current price (lowest latency) - if self.primary_websocket_symbol in self.current_prices and self.current_prices[self.primary_websocket_symbol] > 0: - current_price = self.current_prices[self.primary_websocket_symbol] + ws_symbol = symbol.replace('/', '') # Convert ETH/USDT to ETHUSDT for WebSocket + if ws_symbol in self.current_prices and self.current_prices[ws_symbol] > 0: + current_price = self.current_prices[ws_symbol] data_source = "WEBSOCKET" logger.debug(f"[WS_PRICE] Using WebSocket price for {symbol}: ${current_price:.2f}") else: @@ -687,19 +642,6 @@ class TradingDashboard: self.clear_closed_trades_history() return [html.P("Trade history cleared", className="text-muted text-center")] return self._create_closed_trades_table() - - # Clear session performance callback - @self.app.callback( - Output('session-performance', 'children', allow_duplicate=True), - [Input('clear-session-btn', 'n_clicks')], - prevent_initial_call=True - ) - def clear_session_performance(n_clicks): - """Clear the session performance data""" - if n_clicks and n_clicks > 0: - self.clear_session_performance() - return [html.P("Session performance cleared", className="text-muted text-center")] - return self._create_session_performance() def _simulate_price_update(self, symbol: str, base_price: float) -> float: """ @@ -1541,7 +1483,7 @@ class TradingDashboard: 'fees': fee + self.current_position['fees'], 'net_pnl': net_pnl, 'duration': current_time - entry_time, - 'symbol': self.primary_symbol, # Use primary symbol instead of hardcoded + 'symbol': decision.get('symbol', 'ETH/USDT'), 'mexc_executed': decision.get('mexc_executed', False) } self.closed_trades.append(closed_trade) @@ -1614,7 +1556,7 @@ class TradingDashboard: 'fees': fee + self.current_position['fees'], 'net_pnl': net_pnl, 'duration': current_time - entry_time, - 'symbol': self.primary_symbol, # Use primary symbol instead of hardcoded + 'symbol': decision.get('symbol', 'ETH/USDT'), 'mexc_executed': decision.get('mexc_executed', False) } self.closed_trades.append(closed_trade) @@ -1666,7 +1608,7 @@ class TradingDashboard: 'fees': fee + self.current_position['fees'], 'net_pnl': net_pnl, 'duration': current_time - entry_time, - 'symbol': self.primary_symbol, # Use primary symbol instead of hardcoded + 'symbol': decision.get('symbol', 'ETH/USDT'), 'mexc_executed': decision.get('mexc_executed', False) } self.closed_trades.append(closed_trade) @@ -1774,35 +1716,36 @@ class TradingDashboard: # Simple trading loop without async complexity import time + symbols = self.config.symbols if self.config.symbols else ['ETH/USDT'] while True: try: - # Make trading decisions for the primary symbol only - symbol = self.primary_symbol - try: - # Get current price - current_data = self.data_provider.get_historical_data(symbol, '1m', limit=1, refresh=True) - if current_data is not None and not current_data.empty: - current_price = float(current_data['close'].iloc[-1]) + # Make trading decisions for each symbol every 30 seconds + for symbol in symbols: + try: + # Get current price + current_data = self.data_provider.get_historical_data(symbol, '1m', limit=1, refresh=True) + if current_data is not None and not current_data.empty: + current_price = float(current_data['close'].iloc[-1]) + + # Simple decision making + decision = { + 'action': 'HOLD', # Conservative default + 'symbol': symbol, + 'price': current_price, + 'confidence': 0.5, + 'timestamp': datetime.now(), + 'size': 0.1, + 'reason': f"Orchestrator monitoring {symbol}" + } + + # Process the decision (adds to dashboard display) + self._process_trading_decision(decision) + + logger.debug(f"[ORCHESTRATOR] {decision['action']} {symbol} @ ${current_price:.2f}") - # Simple decision making - decision = { - 'action': 'HOLD', # Conservative default - 'symbol': symbol, - 'price': current_price, - 'confidence': 0.5, - 'timestamp': datetime.now(), - 'size': 0.1, - 'reason': f"Orchestrator monitoring {symbol}" - } - - # Process the decision (adds to dashboard display) - self._process_trading_decision(decision) - - logger.debug(f"[ORCHESTRATOR] {decision['action']} {symbol} @ ${current_price:.2f}") - - except Exception as e: - logger.warning(f"[ORCHESTRATOR] Error processing {symbol}: {e}") + except Exception as e: + logger.warning(f"[ORCHESTRATOR] Error processing {symbol}: {e}") # Wait before next cycle time.sleep(30) @@ -1973,33 +1916,6 @@ class TradingDashboard: except Exception as e: logger.error(f"Error clearing closed trades history: {e}") - def clear_session_performance(self): - """Clear session performance data and reset session tracking""" - try: - # Reset session start time - self.session_start = datetime.now() - - # Clear session tracking data - self.session_trades = [] - self.session_pnl = 0.0 - self.total_realized_pnl = 0.0 - self.total_fees = 0.0 - - # Clear current position - self.current_position = None - - # Clear recent decisions and signals (but keep last few for context) - self.recent_decisions = [] - self.recent_signals = [] - - # Reset signal timing - self.last_signal_time = 0 - - logger.info("Session performance cleared and reset") - - except Exception as e: - logger.error(f"Error clearing session performance: {e}") - def _create_session_performance(self) -> List: """Create compact session performance display with signal statistics""" try: @@ -2018,28 +1934,11 @@ class TradingDashboard: total_signals = len(executed_signals) + len(ignored_signals) execution_rate = (len(executed_signals) / total_signals * 100) if total_signals > 0 else 0 - # Calculate portfolio metrics with better handling of small balances - portfolio_value = self.starting_balance + self.total_realized_pnl - - # Fix return calculation for small balances - if self.starting_balance >= 1.0: # Normal balance - portfolio_return = (self.total_realized_pnl / self.starting_balance * 100) - logger.debug(f"SESSION_PERF: Normal balance ${self.starting_balance:.4f}, return {portfolio_return:+.2f}%") - elif self.total_realized_pnl != 0: # Small balance with some P&L - # For very small balances, show absolute P&L instead of percentage - portfolio_return = None # Will display absolute value instead - logger.debug(f"SESSION_PERF: Small balance ${self.starting_balance:.4f}, P&L ${self.total_realized_pnl:+.4f}") - else: # No P&L - portfolio_return = 0.0 - logger.debug(f"SESSION_PERF: No P&L, balance ${self.starting_balance:.4f}") - - # Debug the final display value - display_value = f"{portfolio_return:+.2f}%" if portfolio_return is not None else f"${self.total_realized_pnl:+.4f}" - logger.debug(f"SESSION_PERF: Final display value: {display_value}") - # Calculate other metrics total_volume = sum(t.get('price', 0) * t.get('size', 0) for t in self.session_trades) avg_trade_pnl = (self.total_realized_pnl / closed_trades) if closed_trades > 0 else 0 + portfolio_value = self.starting_balance + self.total_realized_pnl + portfolio_return = (self.total_realized_pnl / self.starting_balance * 100) if self.starting_balance > 0 else 0 performance_items = [ # Row 1: Duration and Portfolio Value @@ -2081,19 +1980,16 @@ class TradingDashboard: ], className="col-6 small") ], className="row mb-1"), - # Row 4: Return/P&L and Fees + # Row 4: Portfolio Return and Fees html.Div([ html.Div([ - html.Strong("Net P&L: "), # Changed label to force UI update - # Show return percentage for normal balances, absolute P&L for small balances - html.Span( - f"{portfolio_return:+.2f}%" if portfolio_return is not None else f"${self.total_realized_pnl:+.4f}", - className="text-success" if self.total_realized_pnl >= 0 else "text-danger" - ) + html.Strong("Return: "), + html.Span(f"{portfolio_return:+.2f}%", + className="text-success" if portfolio_return >= 0 else "text-danger") ], className="col-6 small"), html.Div([ html.Strong("Fees: "), - html.Span(f"${self.total_fees:.4f}", className="text-muted") + html.Span(f"${self.total_fees:.2f}", className="text-muted") ], className="col-6 small") ], className="row") ] @@ -2283,12 +2179,12 @@ class TradingDashboard: logger.warning(f"RL prediction error: {e}") return np.array([0.33, 0.34, 0.33]), 0.5 - def get_memory_usage(self): - return 80 # MB estimate - - def to_device(self, device): - self.device = device - return self + def get_memory_usage(self): + return 80 # MB estimate + + def to_device(self, device): + self.device = device + return self rl_wrapper = RLWrapper(rl_path) @@ -2380,66 +2276,52 @@ class TradingDashboard: } def _start_websocket_stream(self): - """Start WebSocket connections for real-time tick data from multiple symbols""" + """Start WebSocket connection for real-time tick data""" try: if not WEBSOCKET_AVAILABLE: logger.warning("[WEBSOCKET] websocket-client not available. Using data provider fallback.") self.is_streaming = False return - # Check if we have symbols to stream - if not self.websocket_symbols: - logger.warning(f"[WEBSOCKET] No WebSocket symbols configured. Streaming disabled.") - self.is_streaming = False - return + symbol = self.config.symbols[0] if self.config.symbols else "ETHUSDT" - # Start WebSocket for each symbol in background threads - self.ws_threads = [] - for i, ws_symbol in enumerate(self.websocket_symbols): - symbol_name = self.streaming_symbols[i] - thread = threading.Thread( - target=self._websocket_worker, - args=(ws_symbol, symbol_name), - daemon=True - ) - thread.start() - self.ws_threads.append(thread) - logger.info(f"[WEBSOCKET] Starting real-time tick stream for {symbol_name} (WebSocket: {ws_symbol})") + # Start WebSocket in background thread + self.ws_thread = threading.Thread(target=self._websocket_worker, args=(symbol,), daemon=True) + self.ws_thread.start() + + logger.info(f"[WEBSOCKET] Starting real-time tick stream for {symbol}") except Exception as e: - logger.error(f"Error starting WebSocket streams: {e}") + logger.error(f"Error starting WebSocket stream: {e}") self.is_streaming = False - def _websocket_worker(self, websocket_symbol: str, symbol_name: str): + def _websocket_worker(self, symbol: str): """WebSocket worker thread for continuous tick data streaming""" try: # Use Binance WebSocket for real-time tick data - ws_url = f"wss://stream.binance.com:9443/ws/{websocket_symbol.lower()}@ticker" + ws_url = f"wss://stream.binance.com:9443/ws/{symbol.lower().replace('/', '')}@ticker" def on_message(ws, message): try: data = json.loads(message) - # Add symbol info to tick data for processing - data['symbol_name'] = symbol_name - data['websocket_symbol'] = websocket_symbol self._process_tick_data(data) except Exception as e: - logger.warning(f"Error processing WebSocket message for {symbol_name}: {e}") + logger.warning(f"Error processing WebSocket message: {e}") def on_error(ws, error): - logger.error(f"WebSocket error for {symbol_name}: {error}") + logger.error(f"WebSocket error: {error}") self.is_streaming = False def on_close(ws, close_status_code, close_msg): - logger.warning(f"WebSocket connection closed for {symbol_name}") + logger.warning("WebSocket connection closed") self.is_streaming = False # Attempt to reconnect after 5 seconds time.sleep(5) if not self.is_streaming: - self._websocket_worker(websocket_symbol, symbol_name) + self._websocket_worker(symbol) def on_open(ws): - logger.info(f"[WEBSOCKET] Connected to Binance stream for {symbol_name}") + logger.info("[WEBSOCKET] Connected to Binance stream") self.is_streaming = True # Create WebSocket connection @@ -2455,47 +2337,60 @@ class TradingDashboard: self.ws_connection.run_forever() except Exception as e: - logger.error(f"WebSocket worker error for {symbol_name}: {e}") + logger.error(f"WebSocket worker error: {e}") self.is_streaming = False def _process_tick_data(self, tick_data: Dict): - """Process incoming WebSocket tick data for multiple symbols""" + """Process incoming tick data and update 1-second bars""" try: - # Extract price and symbol information - price = float(tick_data.get('c', 0)) # 'c' is current price in Binance ticker - websocket_symbol = tick_data.get('websocket_symbol', tick_data.get('s', 'UNKNOWN')) - symbol_name = tick_data.get('symbol_name', 'UNKNOWN') + # Extract price and volume from Binance ticker data + price = float(tick_data.get('c', 0)) # Current price + volume = float(tick_data.get('v', 0)) # 24h volume + timestamp = datetime.now(timezone.utc) - if price <= 0: - return - - # Update current price for this symbol - self.current_prices[websocket_symbol] = price - - # Log price updates (less frequently to avoid spam) - if len(self.tick_buffer) % 10 == 0: # Log every 10th tick - logger.debug(f"[TICK] {symbol_name}: ${price:.2f}") - - # Create tick record for training data - tick_record = { - 'symbol': symbol_name, - 'websocket_symbol': websocket_symbol, + # Add to tick cache + tick = { + 'timestamp': timestamp, 'price': price, - 'volume': float(tick_data.get('v', 0)), - 'timestamp': datetime.now(), - 'is_primary': websocket_symbol == self.primary_websocket_symbol + 'volume': volume, + 'bid': float(tick_data.get('b', price)), # Best bid + 'ask': float(tick_data.get('a', price)), # Best ask + 'high_24h': float(tick_data.get('h', price)), + 'low_24h': float(tick_data.get('l', price)) } - # Add to tick buffer for 1-second bar creation - self.tick_buffer.append(tick_record) + self.tick_cache.append(tick) - # Keep buffer size manageable (last 1000 ticks per symbol) - if len(self.tick_buffer) > 1000: - self.tick_buffer = self.tick_buffer[-1000:] + # Update current second bar + current_second = timestamp.replace(microsecond=0) + + if self.current_second_data['timestamp'] != current_second: + # New second - finalize previous bar and start new one + if self.current_second_data['timestamp'] is not None: + self._finalize_second_bar() + + # Start new second bar + self.current_second_data = { + 'timestamp': current_second, + 'open': price, + 'high': price, + 'low': price, + 'close': price, + 'volume': 0, + 'tick_count': 1 + } + else: + # Update current second bar + self.current_second_data['high'] = max(self.current_second_data['high'], price) + self.current_second_data['low'] = min(self.current_second_data['low'], price) + self.current_second_data['close'] = price + self.current_second_data['tick_count'] += 1 + + # Update current price for dashboard + self.current_prices[tick_data.get('s', 'ETHUSDT')] = price except Exception as e: - logger.error(f"Error processing tick data: {e}") - logger.debug(f"Problematic tick data: {tick_data}") + logger.warning(f"Error processing tick data: {e}") def _finalize_second_bar(self): """Finalize the current second bar and add to bars cache""" @@ -3100,7 +2995,7 @@ class TradingDashboard: return { 'ohlcv': ohlcv, 'raw_ticks': df, - 'symbol': self.primary_symbol, # Use primary symbol instead of hardcoded + 'symbol': 'ETH/USDT', 'timeframe': '1s', 'features': ['open', 'high', 'low', 'close', 'volume', 'sma_20', 'sma_50', 'rsi'], 'timestamp': datetime.now() @@ -3373,7 +3268,6 @@ class TradingDashboard: logger.info("Continuous training stopped") except Exception as e: logger.error(f"Error stopping continuous training: {e}") - # Convenience function for integration def create_dashboard(data_provider: DataProvider = None, orchestrator: TradingOrchestrator = None, trading_executor: TradingExecutor = None) -> TradingDashboard: """Create and return a trading dashboard instance""" diff --git a/web/scalping_dashboard.py b/web/scalping_dashboard.py index 02e64ff..63af3b0 100644 --- a/web/scalping_dashboard.py +++ b/web/scalping_dashboard.py @@ -237,35 +237,6 @@ class TradingSession: 'open_positions': len(self.positions), 'trade_history': self.trade_history } - - def clear_session_performance(self): - """Reset session performance metrics while keeping session ID""" - try: - # Log session summary before clearing - summary = self.get_session_summary() - logger.info(f"CLEARING SESSION PERFORMANCE:") - logger.info(f"Session: {summary['session_id']}") - logger.info(f"Duration: {summary['duration']}") - logger.info(f"Final P&L: ${summary['total_pnl']:+.2f}") - logger.info(f"Total Trades: {summary['total_trades']}") - logger.info(f"Win Rate: {summary['win_rate']:.1%}") - - # Reset all performance metrics - self.start_time = datetime.now() - self.current_balance = self.starting_balance - self.total_pnl = 0.0 - self.total_fees = 0.0 - self.total_trades = 0 - self.winning_trades = 0 - self.losing_trades = 0 - self.positions.clear() # Close all positions - self.trade_history.clear() - self.last_action = None - - logger.info(f"SESSION PERFORMANCE CLEARED - Fresh start with ${self.starting_balance:.2f}") - - except Exception as e: - logger.error(f"Error clearing session performance: {e}") class RealTimeScalpingDashboard: """Real-time scalping dashboard with WebSocket streaming and ultra-low latency"""