diff --git a/crypto/gogo2/main.py b/crypto/gogo2/main.py index e5c7e02..41d30f6 100644 --- a/crypto/gogo2/main.py +++ b/crypto/gogo2/main.py @@ -17,66 +17,114 @@ from dotenv import load_dotenv import ccxt import websockets from torch.utils.tensorboard import SummaryWriter -import torch.cuda.amp as amp # Add this import at the top +import torch.amp as amp # Update import to use torch.amp instead of torch.cuda.amp from sklearn.preprocessing import MinMaxScaler -import copy -import argparse import traceback import math -import matplotlib.dates as mdates -from matplotlib.figure import Figure -from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s', - handlers=[logging.FileHandler("trading_bot.log"), logging.StreamHandler()] -) -logger = logging.getLogger("trading_bot") +from mexc_trading import MexcTradingClient # Load environment variables load_dotenv() MEXC_API_KEY = os.getenv('MEXC_API_KEY') MEXC_SECRET_KEY = os.getenv('MEXC_SECRET_KEY') +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger('trading_bot') + # Constants -INITIAL_BALANCE = 100 # USD -MAX_LEVERAGE = 100 -STOP_LOSS_PERCENT = 0.5 # Very tight stop loss (0.5%) due to high leverage -TAKE_PROFIT_PERCENT = 1.5 # Take profit at 1.5% -MEMORY_SIZE = 100000 -BATCH_SIZE = 64 -GAMMA = 0.99 # Discount factor -EPSILON_START = 1.0 -EPSILON_END = 0.05 -EPSILON_DECAY = 10000 -STATE_SIZE = 40 # Size of our state representation -LEARNING_RATE = 1e-4 -TARGET_UPDATE = 10 # Update target network every 10 episodes +INITIAL_BALANCE = 1000.0 # Starting balance in USDT +MAX_LEVERAGE = 1 # Maximum leverage to use +STOP_LOSS_PERCENT = 2.0 # Default stop loss percentage +TAKE_PROFIT_PERCENT = 4.0 # Default take profit percentage +STATE_SIZE = 40 # Size of the state vector for the agent (matches saved model) +LEARNING_RATE = 1e-4 # Learning rate for the optimizer +MEMORY_SIZE = 100000 # Size of the replay memory +BATCH_SIZE = 64 # Batch size for training +GAMMA = 0.99 # Discount factor for future rewards +EPSILON_START = 1.0 # Starting value of epsilon for exploration +EPSILON_END = 0.05 # Minimum value of epsilon +EPSILON_DECAY = 10000 # Decay rate for epsilon +TARGET_UPDATE = 10 # Update target network every N episodes # Experience replay tuple Experience = namedtuple('Experience', ['state', 'action', 'reward', 'next_state', 'done']) # Add this function near the top of the file, after the imports but before any classes -def find_local_extrema(prices, window=5): - """Find local minima (bottoms) and maxima (tops) in price data""" +def find_local_extrema(prices, window=5, volumes=None, volume_threshold=0.7): + """ + Find local minima (bottoms) and maxima (tops) in price data with volume confirmation + + Parameters: + ----------- + prices : list or array + Price data + window : int + Window size for extrema detection + volumes : list or array, optional + Volume data corresponding to prices + volume_threshold : float + Threshold for volume significance (percentile) + + Returns: + -------- + bottoms : list + Indices of local bottoms + tops : list + Indices of local tops + """ bottoms = [] tops = [] if len(prices) < window * 2 + 1: return bottoms, tops + # Determine volume significance if volumes are provided + volume_significance = None + if volumes is not None and len(volumes) == len(prices): + # Calculate rolling average volume + rolling_vol = [] + for i in range(len(volumes)): + start_idx = max(0, i - window * 2) + end_idx = i + 1 + rolling_vol.append(np.mean(volumes[start_idx:end_idx])) + + # Calculate volume threshold based on percentile + volume_significance = np.percentile(rolling_vol, volume_threshold * 100) + for i in range(window, len(prices) - window): # Check if this is a local minimum (bottom) - if all(prices[i] <= prices[i-j] for j in range(1, window+1)) and \ - all(prices[i] <= prices[i+j] for j in range(1, window+1)): - bottoms.append(i) + is_bottom = all(prices[i] <= prices[i-j] for j in range(1, window+1)) and \ + all(prices[i] <= prices[i+j] for j in range(1, window+1)) # Check if this is a local maximum (top) - if all(prices[i] >= prices[i-j] for j in range(1, window+1)) and \ - all(prices[i] >= prices[i+j] for j in range(1, window+1)): - tops.append(i) + is_top = all(prices[i] >= prices[i-j] for j in range(1, window+1)) and \ + all(prices[i] >= prices[i+j] for j in range(1, window+1)) + + # Apply volume filter if available + if volume_significance is not None: + # For bottoms, we want to see increased volume on the reversal + if is_bottom: + # Check if volume is significant at this point or in the next few candles + vol_confirmed = False + for j in range(3): # Check current and next 2 candles + if i+j < len(volumes) and volumes[i+j] > volume_significance: + vol_confirmed = True + break + + if vol_confirmed: + bottoms.append(i) + + # For tops, we want to see increased volume at the peak + elif is_top: + if volumes[i] > volume_significance: + tops.append(i) + else: + # If no volume data, just use price action + if is_bottom: + bottoms.append(i) + elif is_top: + tops.append(i) return bottoms, tops @@ -191,106 +239,213 @@ class DQN(nn.Module): class PricePredictionModel(nn.Module): def __init__(self, input_size=30, hidden_size=128, output_size=5, num_layers=2): super(PricePredictionModel, self).__init__() - self.lstm = nn.LSTM(1, hidden_size, num_layers=num_layers, batch_first=True, dropout=0.2) - self.fc = nn.Linear(hidden_size, output_size) - self.scaler = MinMaxScaler(feature_range=(0, 1)) - self.is_fitted = False + # Input features: price and volume + self.lstm = nn.LSTM(2, hidden_size, num_layers=num_layers, batch_first=True, dropout=0.2) + self.attention = nn.MultiheadAttention(hidden_size, num_heads=4, batch_first=True) + + # Price prediction head + self.price_fc = nn.Linear(hidden_size, output_size) + + # Extrema prediction head (probability of high/low in next candles) + self.extrema_fc = nn.Linear(hidden_size, output_size * 2) # For each candle: [p_low, p_high] + + # Scalers + self.price_scaler = MinMaxScaler(feature_range=(0, 1)) + self.volume_scaler = MinMaxScaler(feature_range=(0, 1)) + self.is_fitted_price = False + self.is_fitted_volume = False def forward(self, x): - # x shape: [batch_size, seq_len, 1] + # x shape: [batch_size, seq_len, 2] (price, volume) lstm_out, _ = self.lstm(x) - # Use the last time step output - predictions = self.fc(lstm_out[:, -1, :]) - return predictions - - def preprocess(self, data): - # Reshape data for scaler - data_reshaped = np.array(data).reshape(-1, 1) - # Fit scaler if not already fitted - if not self.is_fitted: - self.scaler.fit(data_reshaped) - self.is_fitted = True + # Apply attention mechanism + attn_out, _ = self.attention(lstm_out, lstm_out, lstm_out) + + # Use the last time step output with attention + last_hidden = attn_out[:, -1, :] + + # Price predictions + price_pred = self.price_fc(last_hidden) + + # Extrema predictions (probabilities of highs and lows) + extrema_logits = self.extrema_fc(last_hidden) + extrema_pred = torch.sigmoid(extrema_logits) # Convert to probabilities + + return price_pred, extrema_pred + + def preprocess(self, price_history, volume_history=None): + # Reshape price data for scaler + price_reshaped = np.array(price_history).reshape(-1, 1) + + # Fit price scaler if not already fitted + if not self.is_fitted_price: + self.price_scaler.fit(price_reshaped) + self.is_fitted_price = True + + # Transform price data + scaled_price = self.price_scaler.transform(price_reshaped) + + # If volume data is provided + if volume_history is not None: + # Reshape volume data for scaler + volume_reshaped = np.array(volume_history).reshape(-1, 1) + + # Fit volume scaler if not already fitted + if not self.is_fitted_volume: + self.volume_scaler.fit(volume_reshaped) + self.is_fitted_volume = True + + # Transform volume data + scaled_volume = self.volume_scaler.transform(volume_reshaped) + + # Combine price and volume data + scaled_data = np.hstack((scaled_price, scaled_volume)) + else: + # If no volume data, use zeros + scaled_volume = np.zeros_like(scaled_price) + scaled_data = np.hstack((scaled_price, scaled_volume)) - # Transform data - scaled_data = self.scaler.transform(data_reshaped) return scaled_data - def postprocess(self, scaled_predictions): + def postprocess_price(self, scaled_predictions): # Inverse transform to get actual price values - return self.scaler.inverse_transform(scaled_predictions.reshape(-1, 1)).flatten() + return self.price_scaler.inverse_transform(scaled_predictions.reshape(-1, 1)).flatten() - def predict_next_candles(self, price_history, num_candles=5): + def predict_next_candles(self, price_history, volume_history=None, num_candles=5): if len(price_history) < 30: # Need enough history - return np.zeros(num_candles) + return np.zeros(num_candles), np.zeros((num_candles, 2)) + + # Use default volume if not provided + if volume_history is None: + volume_history = np.ones_like(price_history) + + # Ensure same length + min_len = min(len(price_history), len(volume_history)) + price_history = price_history[-min_len:] + volume_history = volume_history[-min_len:] # Preprocess data - scaled_data = self.preprocess(price_history) + scaled_data = self.preprocess(price_history, volume_history) # Create sequence - sequence = scaled_data[-30:].reshape(1, 30, 1) + sequence = scaled_data[-30:].reshape(1, 30, 2) sequence_tensor = torch.FloatTensor(sequence).to(next(self.parameters()).device) # Get predictions with torch.no_grad(): - scaled_predictions = self(sequence_tensor).cpu().numpy()[0] + price_pred, extrema_pred = self(sequence_tensor) + scaled_price_predictions = price_pred.cpu().numpy()[0] + extrema_predictions = extrema_pred.cpu().numpy()[0].reshape(num_candles, 2) - # Postprocess predictions - predictions = self.postprocess(scaled_predictions) - return predictions + # Postprocess price predictions + price_predictions = self.postprocess_price(scaled_price_predictions) + + return price_predictions, extrema_predictions - def train_on_new_data(self, price_history, optimizer, epochs=5): - """Train the model on new price data""" + def train_on_new_data(self, price_history, volume_history=None, optimizer=None, epochs=5, extrema_weight=1.0): + """Train the model on new price and volume data with focus on extrema""" # Convert to numpy array if it's not already if isinstance(price_history, list): - price_history = np.array(price_history, dtype=np.float32) # Force float32 + price_history = np.array(price_history, dtype=np.float32) + + if volume_history is None: + volume_history = np.ones_like(price_history) + elif isinstance(volume_history, list): + volume_history = np.array(volume_history, dtype=np.float32) + + # Ensure same length + min_len = min(len(price_history), len(volume_history)) + price_history = price_history[-min_len:] + volume_history = volume_history[-min_len:] if len(price_history) < 35: # Need enough history for training - return 0.0 + return 0.0, 0.0 + + # Find local extrema in the price history + bottoms, tops = find_local_extrema(price_history, window=5) + + # Create extrema labels (0: normal, 1: bottom/buy, 2: top/sell) + extrema_labels = np.zeros(len(price_history)) + for idx in bottoms: + if 0 <= idx < len(extrema_labels): + extrema_labels[idx] = 1 # Bottom/Buy + for idx in tops: + if 0 <= idx < len(extrema_labels): + extrema_labels[idx] = 2 # Top/Sell # Preprocess data - scaled_data = self.preprocess(price_history) + scaled_data = self.preprocess(price_history, volume_history) # Create sequences and targets sequences = [] - targets = [] + price_targets = [] + extrema_targets = [] for i in range(len(scaled_data) - 35): - # Sequence: 30 time steps + # Sequence: 30 time steps of price and volume seq = scaled_data[i:i+30] - # Target: next 5 time steps - target = scaled_data[i+30:i+35].flatten() + + # Price target: next 5 time steps + price_target = scaled_data[i+30:i+35, 0].flatten() # Only price column + + # Extrema target: binary indicators for next 5 time steps + # For each of the next 5 candles, we predict [p_low, p_high] + extrema_target = np.zeros(5 * 2) + for j in range(5): + idx = i + 30 + j + if idx < len(extrema_labels): + if extrema_labels[idx] == 1: # Bottom/Buy + extrema_target[j*2] = 1.0 # p_low + elif extrema_labels[idx] == 2: # Top/Sell + extrema_target[j*2 + 1] = 1.0 # p_high sequences.append(seq) - targets.append(target) + price_targets.append(price_target) + extrema_targets.append(extrema_target) if not sequences: # If no sequences were created - return 0.0 + return 0.0, 0.0 # Convert to tensors - sequences_tensor = torch.FloatTensor(np.array(sequences).reshape(-1, 30, 1)).to(next(self.parameters()).device) - targets_tensor = torch.FloatTensor(np.array(targets)).to(next(self.parameters()).device) + sequences_tensor = torch.FloatTensor(np.array(sequences)).to(next(self.parameters()).device) + price_targets_tensor = torch.FloatTensor(np.array(price_targets)).to(next(self.parameters()).device) + extrema_targets_tensor = torch.FloatTensor(np.array(extrema_targets)).to(next(self.parameters()).device) + + # Create optimizer if not provided + if optimizer is None: + optimizer = optim.Adam(self.parameters(), lr=1e-3) # Training loop - total_loss = 0 + total_price_loss = 0 + total_extrema_loss = 0 + for _ in range(epochs): # Forward pass - predictions = self(sequences_tensor) + price_pred, extrema_pred = self(sequences_tensor) - # Calculate loss - loss = F.mse_loss(predictions, targets_tensor) + # Reshape extrema predictions to match targets + extrema_pred_flat = extrema_pred.reshape(-1, 10) # 5 candles * 2 (low/high) + + # Calculate losses + price_loss = F.mse_loss(price_pred, price_targets_tensor) + extrema_loss = F.binary_cross_entropy(extrema_pred_flat, extrema_targets_tensor) + + # Combined loss with weighting + combined_loss = price_loss + extrema_weight * extrema_loss # Backward pass and optimize optimizer.zero_grad() - loss.backward() + combined_loss.backward() optimizer.step() - total_loss += loss.item() + total_price_loss += price_loss.item() + total_extrema_loss += extrema_loss.item() - return total_loss / epochs + return total_price_loss / epochs, total_extrema_loss / epochs class TradingEnvironment: - def __init__(self, initial_balance=INITIAL_BALANCE, window_size=30, demo=True): + def __init__(self, initial_balance=INITIAL_BALANCE, window_size=30, demo=True, trading_client=None): """Initialize the trading environment""" self.initial_balance = initial_balance self.balance = initial_balance @@ -313,6 +468,9 @@ class TradingEnvironment: self.current_step = 0 self.current_price = 0 + # Initialize trading client for live trading + self.trading_client = trading_client + # Initialize features self.features = { 'price': [], @@ -462,6 +620,11 @@ class TradingEnvironment: self.current_price = self.data[self.current_step]['close'] # Process action (0: HOLD, 1: BUY/LONG, 2: SELL/SHORT, 3: CLOSE) + if not self.demo and self.trading_client: + # Execute real trades in live mode + asyncio.create_task(self._execute_live_action(action)) + + # Calculate reward (simulation still runs in parallel with live trading) reward = self.calculate_reward(action) # Check for stop loss / take profit hits @@ -476,6 +639,105 @@ class TradingEnvironment: return next_state, reward, done + async def _execute_live_action(self, action): + """Execute live trading action using the trading client""" + if not self.trading_client: + logger.warning("No trading client available for live trading") + return + + try: + if action == 0: # HOLD + # No action needed + pass + + elif action == 1: # BUY/LONG + if self.position == 'flat': + # Open long position + position_size = self.calculate_position_size() + stop_loss = self.current_price * (1 - STOP_LOSS_PERCENT / 100) + take_profit = self.current_price * (1 + TAKE_PROFIT_PERCENT / 100) + + success = await self.trading_client.open_position( + position_type='long', + size=position_size, + entry_price=self.current_price, + stop_loss=stop_loss, + take_profit=take_profit + ) + + if success: + logger.info(f"LIVE: Successfully opened LONG position at {self.current_price}") + else: + logger.error("LIVE: Failed to open LONG position") + + elif self.position == 'short': + # Close short position and open long + await self.trading_client.close_position(reason="switch_to_long") + + # Open new long position + position_size = self.calculate_position_size() + stop_loss = self.current_price * (1 - STOP_LOSS_PERCENT / 100) + take_profit = self.current_price * (1 + TAKE_PROFIT_PERCENT / 100) + + await self.trading_client.open_position( + position_type='long', + size=position_size, + entry_price=self.current_price, + stop_loss=stop_loss, + take_profit=take_profit + ) + + elif action == 2: # SELL/SHORT + if self.position == 'flat': + # Open short position + position_size = self.calculate_position_size() + stop_loss = self.current_price * (1 + STOP_LOSS_PERCENT / 100) + take_profit = self.current_price * (1 - TAKE_PROFIT_PERCENT / 100) + + success = await self.trading_client.open_position( + position_type='short', + size=position_size, + entry_price=self.current_price, + stop_loss=stop_loss, + take_profit=take_profit + ) + + if success: + logger.info(f"LIVE: Successfully opened SHORT position at {self.current_price}") + else: + logger.error("LIVE: Failed to open SHORT position") + + elif self.position == 'long': + # Close long position and open short + await self.trading_client.close_position(reason="switch_to_short") + + # Open new short position + position_size = self.calculate_position_size() + stop_loss = self.current_price * (1 + STOP_LOSS_PERCENT / 100) + take_profit = self.current_price * (1 - TAKE_PROFIT_PERCENT / 100) + + await self.trading_client.open_position( + position_type='short', + size=position_size, + entry_price=self.current_price, + stop_loss=stop_loss, + take_profit=take_profit + ) + + elif action == 3: # CLOSE + if self.position != 'flat': + # Close any open position + success = await self.trading_client.close_position(reason="manual_close") + + if success: + logger.info(f"LIVE: Successfully closed {self.position} position") + else: + logger.error(f"LIVE: Failed to close {self.position} position") + + except Exception as e: + logger.error(f"Error executing live action: {e}") + logger.error(traceback.format_exc()) + def check_sl_tp(self): """Check if stop loss or take profit has been hit""" if self.position == 'flat': @@ -718,11 +980,20 @@ class TradingEnvironment: # Add predicted prices (if available) if hasattr(self, 'predicted_prices') and len(self.predicted_prices) > 0: # Normalize predictions relative to current price - pred_norm = np.array(self.predicted_prices[:3]) / latest_price - 1.0 + pred_norm = np.array(self.predicted_prices[:5]) / latest_price - 1.0 state_components.append(pred_norm) else: # Add zeros if no predictions - state_components.append(np.zeros(3)) + state_components.append(np.zeros(5)) + + # Add predicted extrema probabilities (if available) + if hasattr(self, 'predicted_extrema') and len(self.predicted_extrema) > 0: + # Flatten the extrema predictions [p_low1, p_high1, p_low2, p_high2, ...] + extrema_probs = self.predicted_extrema.flatten() + state_components.append(extrema_probs) + else: + # Add zeros if no extrema predictions + state_components.append(np.zeros(10)) # 5 candles * 2 (low/high) # Add extrema signals (if available) if hasattr(self, 'optimal_signals') and len(self.optimal_signals) > 0: @@ -739,6 +1010,14 @@ class TradingEnvironment: # Add zeros if no signals state_components.append(np.zeros(5)) + # Add predicted extrema flags + extrema_flags = np.zeros(2) + if hasattr(self, 'has_predicted_low') and self.has_predicted_low: + extrema_flags[0] = 1.0 # Predicted low + if hasattr(self, 'has_predicted_high') and self.has_predicted_high: + extrema_flags[1] = 1.0 # Predicted high + state_components.append(extrema_flags) + # Position info position_info = np.zeros(5) if self.position == 'long': @@ -776,39 +1055,69 @@ class TradingEnvironment: def calculate_reward(self, action): """Calculate reward for the given action with improved penalties for losing trades""" - reward = 0 - - # Store previous balance for direct PnL calculation + # Store previous balance for reward calculation prev_balance = self.balance - # Base reward for actions - if action == 0: # HOLD - # Small penalty for doing nothing to encourage action - # But make it context-dependent - holding during high volatility should be penalized more - volatility = self.get_recent_volatility() - reward = -0.01 * (1 + volatility) + # Initialize reward + reward = 0.0 + # Process action (0: HOLD, 1: BUY/LONG, 2: SELL/SHORT, 3: CLOSE) + if action == 0: # HOLD + # Small negative reward for holding to encourage action + reward = -0.01 + + # But give positive reward for holding if we're predicting an extrema soon + if hasattr(self, 'has_predicted_low') and self.has_predicted_low and self.position == 'flat': + # Reward for waiting for a predicted bottom + reward = 0.05 + elif hasattr(self, 'has_predicted_high') and self.has_predicted_high and self.position == 'flat': + # Reward for waiting for a predicted top + reward = 0.05 + + # Check if holding a profitable position + if self.position == 'long' and self.current_price > self.entry_price: + # Reward for holding a profitable long + profit_pct = (self.current_price - self.entry_price) / self.entry_price + reward += profit_pct * 0.5 + elif self.position == 'short' and self.current_price < self.entry_price: + # Reward for holding a profitable short + profit_pct = (self.entry_price - self.current_price) / self.entry_price + reward += profit_pct * 0.5 + elif action == 1: # BUY/LONG if self.position == 'flat': - # Opening a long position + # Open long position self.position = 'long' self.entry_price = self.current_price self.entry_index = self.current_step self.position_size = self.calculate_position_size() - # Calculate stop loss and take profit levels - self.stop_loss = self.entry_price * (1 - STOP_LOSS_PERCENT / 100) - self.take_profit = self.entry_price * (1 + TAKE_PROFIT_PERCENT / 100) + # Set stop loss and take profit + self.stop_loss = self.current_price * (1 - STOP_LOSS_PERCENT / 100) + self.take_profit = self.current_price * (1 + TAKE_PROFIT_PERCENT / 100) - # Check if this is a good entry point based on technical indicators + # Calculate entry quality entry_quality = self.evaluate_entry_quality('long') - reward += entry_quality * 0.5 # Scale the reward based on entry quality - logger.info(f"OPENED LONG at {self.entry_price} | Stop loss: {self.stop_loss} | Take profit: {self.take_profit}") + # Base reward on entry quality + reward = entry_quality + + # Additional reward if buying at a predicted low + if hasattr(self, 'has_predicted_low') and self.has_predicted_low: + reward += 0.5 + logger.info("Buying at predicted low point - additional reward") + + # Penalize buying at a predicted high + if hasattr(self, 'has_predicted_high') and self.has_predicted_high: + reward -= 0.5 + logger.info("Buying at predicted high point - penalty applied") + + logger.info(f"OPENED long at {self.current_price} | Size: {self.position_size:.2f} | Entry quality: {entry_quality:.2f}") elif self.position == 'short': - # Closing a short position - pnl_percent = (self.entry_price - self.current_price) / self.entry_price * 100 + # Close short position + price_diff = self.entry_price - self.current_price + pnl_percent = price_diff / self.entry_price * 100 pnl_dollar = pnl_percent / 100 * self.position_size # Apply fees @@ -819,6 +1128,216 @@ class TradingEnvironment: self.total_pnl += pnl_dollar self.episode_pnl += pnl_dollar + # Update max drawdown + if self.balance < self.peak_balance: + current_drawdown = (self.peak_balance - self.balance) / self.peak_balance * 100 + self.max_drawdown = max(self.max_drawdown, current_drawdown) + elif self.balance > self.peak_balance: + self.peak_balance = self.balance + + # Record trade + self.trades.append({ + 'type': 'short', + 'entry': self.entry_price, + 'exit': self.current_price, + 'entry_time': self.data[self.entry_index]['timestamp'], + 'exit_time': self.data[self.current_step]['timestamp'], + 'pnl_percent': pnl_percent, + 'pnl_dollar': pnl_dollar, + 'duration': self.current_step - self.entry_index, + 'market_direction': self.get_market_direction(), + 'reason': 'switch_to_long' + }) + + # Reward based on PnL + if pnl_dollar > 0: + reward = 1.0 + pnl_dollar / 10 # Positive reward for profit + self.win_count += 1 + else: + reward = -1.0 # Negative reward for loss + self.loss_count += 1 + + logger.info(f"CLOSED short at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}") + + # Open new long position + self.position = 'long' + self.entry_price = self.current_price + self.entry_index = self.current_step + self.position_size = self.calculate_position_size() + + # Set stop loss and take profit + self.stop_loss = self.current_price * (1 - STOP_LOSS_PERCENT / 100) + self.take_profit = self.current_price * (1 + TAKE_PROFIT_PERCENT / 100) + + logger.info(f"OPENED long at {self.current_price} | Size: {self.position_size:.2f}") + + elif action == 2: # SELL/SHORT + if self.position == 'flat': + # Open short position + self.position = 'short' + self.entry_price = self.current_price + self.entry_index = self.current_step + self.position_size = self.calculate_position_size() + + # Set stop loss and take profit + self.stop_loss = self.current_price * (1 + STOP_LOSS_PERCENT / 100) + self.take_profit = self.current_price * (1 - TAKE_PROFIT_PERCENT / 100) + + # Calculate entry quality + entry_quality = self.evaluate_entry_quality('short') + + # Base reward on entry quality + reward = entry_quality + + # Additional reward if selling at a predicted high + if hasattr(self, 'has_predicted_high') and self.has_predicted_high: + reward += 0.5 + logger.info("Selling at predicted high point - additional reward") + + # Penalize selling at a predicted low + if hasattr(self, 'has_predicted_low') and self.has_predicted_low: + reward -= 0.5 + logger.info("Selling at predicted low point - penalty applied") + + logger.info(f"OPENED short at {self.current_price} | Size: {self.position_size:.2f} | Entry quality: {entry_quality:.2f}") + + elif self.position == 'long': + # Close long position + price_diff = self.current_price - self.entry_price + pnl_percent = price_diff / self.entry_price * 100 + pnl_dollar = pnl_percent / 100 * self.position_size + + # Apply fees + pnl_dollar -= self.calculate_fees(self.position_size) + + # Update balance + self.balance += pnl_dollar + self.total_pnl += pnl_dollar + self.episode_pnl += pnl_dollar + + # Update max drawdown + if self.balance < self.peak_balance: + current_drawdown = (self.peak_balance - self.balance) / self.peak_balance * 100 + self.max_drawdown = max(self.max_drawdown, current_drawdown) + elif self.balance > self.peak_balance: + self.peak_balance = self.balance + + # Record trade + self.trades.append({ + 'type': 'long', + 'entry': self.entry_price, + 'exit': self.current_price, + 'entry_time': self.data[self.entry_index]['timestamp'], + 'exit_time': self.data[self.current_step]['timestamp'], + 'pnl_percent': pnl_percent, + 'pnl_dollar': pnl_dollar, + 'duration': self.current_step - self.entry_index, + 'market_direction': self.get_market_direction(), + 'reason': 'switch_to_short' + }) + + # Reward based on PnL + if pnl_dollar > 0: + reward = 1.0 + pnl_dollar / 10 # Positive reward for profit + self.win_count += 1 + else: + reward = -1.0 # Negative reward for loss + self.loss_count += 1 + + logger.info(f"CLOSED long at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}") + + # Open new short position + self.position = 'short' + self.entry_price = self.current_price + self.entry_index = self.current_step + self.position_size = self.calculate_position_size() + + # Set stop loss and take profit + self.stop_loss = self.current_price * (1 + STOP_LOSS_PERCENT / 100) + self.take_profit = self.current_price * (1 - TAKE_PROFIT_PERCENT / 100) + + logger.info(f"OPENED short at {self.current_price} | Size: {self.position_size:.2f}") + + elif action == 3: # CLOSE + if self.position == 'long': + # Close long position + price_diff = self.current_price - self.entry_price + pnl_percent = price_diff / self.entry_price * 100 + pnl_dollar = pnl_percent / 100 * self.position_size + + # Apply fees + pnl_dollar -= self.calculate_fees(self.position_size) + + # Update balance + self.balance += pnl_dollar + self.total_pnl += pnl_dollar + self.episode_pnl += pnl_dollar + + # Update max drawdown + if self.balance < self.peak_balance: + current_drawdown = (self.peak_balance - self.balance) / self.peak_balance * 100 + self.max_drawdown = max(self.max_drawdown, current_drawdown) + elif self.balance > self.peak_balance: + self.peak_balance = self.balance + + # Record trade + self.trades.append({ + 'type': 'long', + 'entry': self.entry_price, + 'exit': self.current_price, + 'entry_time': self.data[self.entry_index]['timestamp'], + 'exit_time': self.data[self.current_step]['timestamp'], + 'pnl_percent': pnl_percent, + 'pnl_dollar': pnl_dollar, + 'duration': self.current_step - self.entry_index, + 'market_direction': self.get_market_direction(), + 'reason': 'manual_close' + }) + + # Reward based on PnL + if pnl_dollar > 0: + reward = 1.0 + pnl_dollar / 10 # Positive reward for profit + self.win_count += 1 + + # Extra reward for closing at a predicted high + if hasattr(self, 'has_predicted_high') and self.has_predicted_high: + reward += 0.5 + logger.info("Closing long at predicted high - additional reward") + else: + reward = -1.0 # Negative reward for loss + self.loss_count += 1 + + logger.info(f"CLOSED long at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}") + + # Reset position + self.position = 'flat' + self.entry_price = 0 + self.entry_index = 0 + self.position_size = 0 + self.stop_loss = 0 + self.take_profit = 0 + + elif self.position == 'short': + # Close short position + price_diff = self.entry_price - self.current_price + pnl_percent = price_diff / self.entry_price * 100 + pnl_dollar = pnl_percent / 100 * self.position_size + + # Apply fees + pnl_dollar -= self.calculate_fees(self.position_size) + + # Update balance + self.balance += pnl_dollar + self.total_pnl += pnl_dollar + self.episode_pnl += pnl_dollar + + # Update max drawdown + if self.balance < self.peak_balance: + current_drawdown = (self.peak_balance - self.balance) / self.peak_balance * 100 + self.max_drawdown = max(self.max_drawdown, current_drawdown) + elif self.balance > self.peak_balance: + self.peak_balance = self.balance + # Record trade self.trades.append({ 'type': 'short', @@ -833,177 +1352,17 @@ class TradingEnvironment: 'reason': 'manual_close' }) - # Update win/loss count - if pnl_dollar > 0: - self.win_count += 1 - reward += 1.0 + (pnl_percent / 2) # Bonus for winning trade - else: - self.loss_count += 1 - reward -= 1.0 + (abs(pnl_percent) / 2) # Penalty for losing trade - - logger.info(f"CLOSED short at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}") - - # Reset position and open new long - self.position = 'long' - self.entry_price = self.current_price - self.entry_index = self.current_step - self.position_size = self.calculate_position_size() - - # Calculate stop loss and take profit levels - self.stop_loss = self.entry_price * (1 - STOP_LOSS_PERCENT / 100) - self.take_profit = self.entry_price * (1 + TAKE_PROFIT_PERCENT / 100) - - logger.info(f"OPENED LONG at {self.entry_price} | Stop loss: {self.stop_loss} | Take profit: {self.take_profit}") - - elif action == 2: # SELL/SHORT - if self.position == 'flat': - # Opening a short position - self.position = 'short' - self.entry_price = self.current_price - self.position_size = self.calculate_position_size() - self.stop_loss = self.entry_price * (1 + STOP_LOSS_PERCENT/100) - self.take_profit = self.entry_price * (1 - TAKE_PROFIT_PERCENT/100) - - # Check if this is an optimal sell point (top) - current_idx = len(self.features['price']) - 1 - if hasattr(self, 'optimal_tops') and current_idx in self.optimal_tops: - reward += 2.0 # Bonus for selling at a top - else: - reward += 0.1 # Small reward for opening a position - - logger.info(f"OPENED SHORT at {self.entry_price} | Stop loss: {self.stop_loss} | Take profit: {self.take_profit}") - - elif self.position == 'long': - # Close long and open short - pnl_percent = (self.current_price - self.entry_price) / self.entry_price * 100 - pnl_dollar = pnl_percent / 100 * self.position_size - - # Apply fees - pnl_dollar -= self.calculate_fees(self.position_size) - - # Update balance - self.balance += pnl_dollar - self.total_pnl += pnl_dollar - - # Record trade - self.trades.append({ - 'type': 'long', - 'entry': self.entry_price, - 'exit': self.current_price, - 'entry_time': self.data[self.entry_index]['timestamp'], - 'exit_time': self.data[self.current_step]['timestamp'], - 'pnl_percent': pnl_percent, - 'pnl_dollar': pnl_dollar - }) - # Reward based on PnL if pnl_dollar > 0: - reward += 1.0 + pnl_dollar / 10 # Positive reward for profit + reward = 1.0 + pnl_dollar / 10 # Positive reward for profit self.win_count += 1 + + # Extra reward for closing at a predicted low + if hasattr(self, 'has_predicted_low') and self.has_predicted_low: + reward += 0.5 + logger.info("Closing short at predicted low - additional reward") else: - reward -= 1.0 # Negative reward for loss - self.loss_count += 1 - - logger.info(f"CLOSED long at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}") - - # Now open short - self.position = 'short' - self.entry_price = self.current_price - self.position_size = self.calculate_position_size() - self.stop_loss = self.entry_price * (1 + STOP_LOSS_PERCENT/100) - self.take_profit = self.entry_price * (1 - TAKE_PROFIT_PERCENT/100) - - # Check if this is an optimal sell point - current_idx = len(self.features['price']) - 1 - if hasattr(self, 'optimal_tops') and current_idx in self.optimal_tops: - reward += 2.0 # Bonus for selling at a top - - logger.info(f"OPENED SHORT at {self.entry_price} | Stop loss: {self.stop_loss} | Take profit: {self.take_profit}") - - elif action == 3: # CLOSE - if self.position == 'long': - # Close long position - pnl_percent = (self.current_price - self.entry_price) / self.entry_price * 100 - pnl_dollar = pnl_percent / 100 * self.position_size - - # Apply fees - pnl_dollar -= self.calculate_fees(self.position_size) - - # Update balance - self.balance += pnl_dollar - self.total_pnl += pnl_dollar - self.episode_pnl += pnl_dollar - - # Update max drawdown - if self.balance > self.peak_balance: - self.peak_balance = self.balance - drawdown = (self.peak_balance - self.balance) / self.peak_balance - self.max_drawdown = max(self.max_drawdown, drawdown) - - # Record trade - self.trades.append({ - 'type': 'long', - 'entry': self.entry_price, - 'exit': self.current_price, - 'entry_time': self.data[self.entry_index]['timestamp'], - 'exit_time': self.data[self.current_step]['timestamp'], - 'pnl_percent': pnl_percent, - 'pnl_dollar': pnl_dollar - }) - - # Reward based on PnL - if pnl_dollar > 0: - reward += 1.0 + pnl_dollar / 10 # Positive reward for profit - self.win_count += 1 - else: - reward -= 1.0 # Negative reward for loss - self.loss_count += 1 - - logger.info(f"CLOSED long at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}") - - # Reset position - self.position = 'flat' - self.entry_price = 0 - self.position_size = 0 - self.stop_loss = 0 - self.take_profit = 0 - - elif self.position == 'short': - # Close short position - pnl_percent = (self.entry_price - self.current_price) / self.entry_price * 100 - pnl_dollar = pnl_percent / 100 * self.position_size - - # Apply fees - pnl_dollar -= self.calculate_fees(self.position_size) - - # Update balance - self.balance += pnl_dollar - self.total_pnl += pnl_dollar - self.episode_pnl += pnl_dollar - - # Update max drawdown - if self.balance > self.peak_balance: - self.peak_balance = self.balance - drawdown = (self.peak_balance - self.balance) / self.peak_balance - self.max_drawdown = max(self.max_drawdown, drawdown) - - # Record trade - self.trades.append({ - 'type': 'short', - 'entry': self.entry_price, - 'exit': self.current_price, - 'entry_time': self.data[self.entry_index]['timestamp'], - 'exit_time': self.data[self.current_step]['timestamp'], - 'pnl_percent': pnl_percent, - 'pnl_dollar': pnl_dollar - }) - - # Reward based on PnL - if pnl_dollar > 0: - reward += 1.0 + pnl_dollar / 10 # Positive reward for profit - self.win_count += 1 - else: - reward -= 1.0 # Negative reward for loss + reward = -1.0 # Negative reward for loss self.loss_count += 1 logger.info(f"CLOSED short at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}") @@ -1011,6 +1370,7 @@ class TradingEnvironment: # Reset position self.position = 'flat' self.entry_price = 0 + self.entry_index = 0 self.position_size = 0 self.stop_loss = 0 self.take_profit = 0 @@ -1038,12 +1398,25 @@ class TradingEnvironment: # Add reward/penalty based on market trend alignment market_direction = self.get_market_direction() - if (self.position == 'long' and market_direction == 'uptrend') or \ - (self.position == 'short' and market_direction == 'downtrend'): - reward += 0.2 # Reward for trading with the trend - elif (self.position == 'long' and market_direction == 'downtrend') or \ - (self.position == 'short' and market_direction == 'uptrend'): - reward -= 0.3 # Stronger penalty for trading against the trend + + if self.position == 'long' and market_direction == 'uptrend': + reward += 0.1 # Reward for being long in uptrend + elif self.position == 'short' and market_direction == 'downtrend': + reward += 0.1 # Reward for being short in downtrend + elif self.position == 'long' and market_direction == 'downtrend': + reward -= 0.1 # Penalty for being long in downtrend + elif self.position == 'short' and market_direction == 'uptrend': + reward -= 0.1 # Penalty for being short in uptrend + + # Add reward for trading with volume + if action in [1, 2] and self.position != 'flat': # Opening a position + current_volume = self.data[self.current_step]['volume'] + avg_volume = np.mean([candle['volume'] for candle in self.data[max(0, self.current_step-10):self.current_step]]) + + if current_volume > avg_volume * 1.5: + # Trading with high volume + reward += 0.2 + logger.info("Trading with high volume - additional reward") return reward @@ -1242,43 +1615,80 @@ class TradingEnvironment: self.price_predictor.to(device) self.price_predictor_optimizer = optim.Adam(self.price_predictor.parameters(), lr=1e-3) self.predicted_prices = np.array([]) + self.predicted_extrema = np.array([]) + self.extrema_threshold = 0.7 # Threshold for extrema prediction confidence def train_price_predictor(self): - """Train the price prediction model on recent data""" + """Train the price prediction model on recent data with focus on extrema""" if len(self.features['price']) < 35: - return 0.0 + return 0.0, 0.0 - # Get price history + # Get price and volume history price_history = self.features['price'] + volume_history = self.features['volume'] - # Train the model - loss = self.price_predictor.train_on_new_data( + # Train the model with emphasis on extrema prediction + price_loss, extrema_loss = self.price_predictor.train_on_new_data( price_history, + volume_history, self.price_predictor_optimizer, - epochs=5 + epochs=5, + extrema_weight=1.5 # Give more weight to extrema prediction ) - return loss + logger.info(f"Price predictor training - Price loss: {price_loss:.6f}, Extrema loss: {extrema_loss:.6f}") + + return price_loss, extrema_loss def update_price_predictions(self): - """Update price predictions""" + """Update price and extrema predictions""" if len(self.features['price']) < 30: self.predicted_prices = np.array([]) + self.predicted_extrema = np.array([]) return - # Get price history + # Get price and volume history price_history = self.features['price'] + volume_history = self.features['volume'] # Get predictions - self.predicted_prices = self.price_predictor.predict_next_candles(price_history, num_candles=5) - + self.predicted_prices, self.predicted_extrema = self.price_predictor.predict_next_candles( + price_history, + volume_history, + num_candles=5 + ) + + # Log predictions + logger.info(f"Predicted prices for next 5 candles: {self.predicted_prices}") + + # Identify predicted extrema points + predicted_lows = [] + predicted_highs = [] + + for i, (p_low, p_high) in enumerate(self.predicted_extrema): + if p_low > self.extrema_threshold: + predicted_lows.append(i) + logger.info(f"Predicted low at candle +{i+1} with confidence {p_low:.2f}") + if p_high > self.extrema_threshold: + predicted_highs.append(i) + logger.info(f"Predicted high at candle +{i+1} with confidence {p_high:.2f}") + + # Store predicted extrema indices + self.predicted_lows = predicted_lows + self.predicted_highs = predicted_highs + def identify_optimal_trades(self): - """Identify optimal entry and exit points based on local extrema""" + """Identify optimal entry and exit points based on local extrema and volume""" if len(self.features['price']) < 20: return - # Find local bottoms and tops - bottoms, tops = find_local_extrema(self.features['price'], window=5) + # Find local bottoms and tops with volume confirmation + bottoms, tops = find_local_extrema( + self.features['price'], + window=5, + volumes=self.features['volume'], + volume_threshold=0.7 + ) # Store optimal trade points self.optimal_bottoms = bottoms # Buy points @@ -1294,6 +1704,21 @@ class TradingEnvironment: self.optimal_signals[i] = -1 # Sell signal logger.info(f"Identified {len(bottoms)} optimal buy points and {len(tops)} optimal sell points") + + # Integrate predicted extrema into decision making + if hasattr(self, 'predicted_extrema') and len(self.predicted_extrema) > 0: + # Check if we predict any extrema in the near future + has_predicted_low = any(p_low > self.extrema_threshold for p_low, _ in self.predicted_extrema) + has_predicted_high = any(p_high > self.extrema_threshold for _, p_high in self.predicted_extrema) + + if has_predicted_low: + logger.info("Predicting a significant low point in the next few candles") + if has_predicted_high: + logger.info("Predicting a significant high point in the next few candles") + + # Store these predictions for use in action selection + self.has_predicted_low = has_predicted_low + self.has_predicted_high = has_predicted_high def calculate_position_size(self): """Calculate position size based on current balance, volatility and risk parameters""" @@ -1392,8 +1817,11 @@ class Agent: self.epsilon_end = EPSILON_END self.epsilon_decay = EPSILON_DECAY - # Initialize mixed precision scaler - self.scaler = amp.GradScaler() + # Initialize mixed precision scaler with the new format + if self.device.type == "cuda": + self.scaler = amp.GradScaler('cuda') + else: + self.scaler = amp.GradScaler('cpu') # Initialize TensorBoard writer self.writer = SummaryWriter(f'runs/trading_agent_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}') @@ -1748,6 +2176,10 @@ async def train_agent(agent, env, num_episodes=1000, max_steps_per_episode=1000, env.price_predictor = price_predictor env.price_predictor_optimizer = price_predictor_optimizer + # Initialize price predictor + env.initialize_price_predictor(device=agent.device) + logger.info("Price predictor initialized") + for episode in range(num_episodes): try: # Update curriculum stage if needed @@ -1768,7 +2200,7 @@ async def train_agent(agent, env, num_episodes=1000, max_steps_per_episode=1000, env.risk_factor = risk_factor # Update training data if exchange is available - if exchange and args.refresh_data: + if exchange and args and hasattr(args, 'refresh_data') and args.refresh_data: # Fetch new data at the start of each episode logger.info(f"Refreshing data for episode {episode}") await env.fetch_new_data(exchange, "ETH/USDT", "1m", 100) @@ -1793,7 +2225,7 @@ async def train_agent(agent, env, num_episodes=1000, max_steps_per_episode=1000, } # Train price predictor - prediction_loss = env.train_price_predictor() + prediction_loss, extrema_loss = env.train_price_predictor() # Update price predictions env.update_price_predictions() @@ -2276,6 +2708,26 @@ async def live_trading(agent, env, exchange, demo=True): logger.info(f"Starting live trading (demo mode: {demo})") try: + # Initialize trading client for live trading if not in demo mode + trading_client = None + if not demo: + trading_client = MexcTradingClient(symbol="ETH/USDT") + if not trading_client.client: + logger.error("Failed to initialize MEXC trading client. Check API keys.") + return + + # Update environment with trading client + env.trading_client = trading_client + + # Fetch initial account balance + balance = await trading_client.fetch_account_balance() + if balance > 0: + logger.info(f"Initial account balance: ${balance:.2f}") + env.balance = balance + env.initial_balance = balance + else: + logger.warning("Could not fetch account balance, using default") + # Subscribe to websocket for real-time data symbol = "ETH/USDT" timeframe = "1m" @@ -2286,6 +2738,20 @@ async def live_trading(agent, env, exchange, demo=True): logger.error("Failed to initialize with historical data") return + # Initialize price predictor + env.initialize_price_predictor(device=agent.device) + logger.info("Price predictor initialized") + + # Initialize TensorBoard writer if not already present + if not hasattr(agent, 'writer'): + from torch.utils.tensorboard import SummaryWriter + log_dir = os.path.join("logs", "live_trading", datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) + agent.writer = SummaryWriter(log_dir) + logger.info(f"TensorBoard logs will be saved to {log_dir}") + + # Initialize step counter for TensorBoard + tb_step = 0 + # Main trading loop while True: # Wait for the next candle (1 minute) @@ -2301,6 +2767,10 @@ async def live_trading(agent, env, exchange, demo=True): # Update environment with new data env.add_data(latest_candle) + # Train price predictor and update predictions + env.train_price_predictor() + env.update_price_predictions() + # Get current state state = env.get_state() @@ -2328,11 +2798,86 @@ async def live_trading(agent, env, exchange, demo=True): if trade_analysis: logger.info(f"Recent Performance: Win Rate={trade_analysis.get('uptrend_win_rate', 0):.1f}% in uptrends, " f"{trade_analysis.get('downtrend_win_rate', 0):.1f}% in downtrends") + + # If not in demo mode, update actual balance from exchange + if not demo and trading_client: + try: + actual_balance = await trading_client.fetch_account_balance() + if actual_balance > 0: + # Update environment balance with actual balance + env.balance = actual_balance + logger.info(f"Updated actual account balance: ${actual_balance:.2f}") + except Exception as e: + logger.error(f"Error updating account balance: {e}") + + # Log to TensorBoard every 5 steps + if tb_step % 5 == 0: + # Create a DataFrame from the environment's data + df_ohlcv = pd.DataFrame([{ + 'timestamp': candle['timestamp'], + 'open': candle['open'], + 'high': candle['high'], + 'low': candle['low'], + 'close': candle['close'], + 'volume': candle['volume'] + } for candle in env.data[-100:]]) # Use last 100 candles + + # Convert timestamp to datetime + df_ohlcv['timestamp'] = pd.to_datetime(df_ohlcv['timestamp'], unit='ms') + df_ohlcv.set_index('timestamp', inplace=True) + + # Extract buy/sell signals from trades + buy_signals = [] + sell_signals = [] + + if hasattr(env, 'trades') and env.trades: + for trade in env.trades: + if 'entry_time' in trade and 'entry' in trade: + if trade['type'] == 'long': + # Buy signal + entry_time = pd.to_datetime(trade['entry_time'], unit='ms') + buy_signals.append((entry_time, trade['entry'])) + + # Sell signal if closed + if 'exit_time' in trade and 'exit' in trade and trade['exit'] > 0: + exit_time = pd.to_datetime(trade['exit_time'], unit='ms') + sell_signals.append((exit_time, trade['exit'])) + + elif trade['type'] == 'short': + # Sell short signal + entry_time = pd.to_datetime(trade['entry_time'], unit='ms') + sell_signals.append((entry_time, trade['entry'])) + + # Buy to cover signal if closed + if 'exit_time' in trade and 'exit' in trade and trade['exit'] > 0: + exit_time = pd.to_datetime(trade['exit_time'], unit='ms') + buy_signals.append((exit_time, trade['exit'])) + + # Log to TensorBoard with a fixed tag to overwrite previous charts + log_ohlcv_to_tensorboard( + agent.writer, + df_ohlcv, + buy_signals, + sell_signals, + tb_step, + tag_prefix="live_trading" + ) + + # Log additional metrics + agent.writer.add_scalar("live/balance", env.balance, tb_step) + agent.writer.add_scalar("live/total_pnl", env.total_pnl, tb_step) + if env.trades: + agent.writer.add_scalar("live/win_rate", win_rate, tb_step) + agent.writer.add_scalar("live/trade_count", len(env.trades), tb_step) + + # Increment TensorBoard step counter + tb_step += 1 except KeyboardInterrupt: logger.info("Live trading stopped by user") except Exception as e: logger.error(f"Error in live trading: {e}") + logger.error(traceback.format_exc()) raise async def get_latest_candle(exchange, symbol): @@ -2393,35 +2938,43 @@ async def fetch_ohlcv_data(exchange, symbol, timeframe, limit): async def main(): """Main function to run the trading bot""" - parser = argparse.ArgumentParser(description='Crypto Trading Bot') - parser.add_argument('--mode', type=str, default='train', - choices=['train', 'evaluate', 'live', 'continuous'], - help='Mode to run the bot in (train, evaluate, live, or continuous)') - parser.add_argument('--episodes', type=int, default=1000, help='Number of episodes to train') + # Parse command line arguments + import argparse + import traceback + + parser = argparse.ArgumentParser(description='Run the trading bot') + parser.add_argument('--mode', type=str, default='train', choices=['train', 'evaluate', 'live'], help='Mode to run the bot in') + parser.add_argument('--episodes', type=int, default=100, help='Number of episodes to train for') parser.add_argument('--demo', action='store_true', help='Run in demo mode (no real trades)') - parser.add_argument('--refresh-data', action='store_true', help='Refresh data during training') - parser.add_argument('--device', type=str, default='gpu', choices=['gpu', 'cpu'], - help='Device to use for training (gpu or cpu)') + parser.add_argument('--device', type=str, default='auto', choices=['cpu', 'gpu', 'auto'], help='Device to use for training') args = parser.parse_args() - # Get device based on argument and availability + # Set device device = get_device(args.device) + logger.info(f"Using device: {device}") - exchange = None try: # Initialize exchange exchange = await initialize_exchange() + # Determine if we're in demo mode + demo_mode = args.demo + if args.mode != 'live': + # Always use demo mode for training and evaluation + demo_mode = True + + logger.info(f"Running in {'demo' if demo_mode else 'live trading'} mode") + # Create environment with the correct parameters env = TradingEnvironment( initial_balance=INITIAL_BALANCE, window_size=30, - demo=args.demo or args.mode != 'live' + demo=demo_mode ) # Fetch initial data logger.info("Fetching initial data for ETH/USDT") - await env.fetch_initial_data(exchange, "ETH/USDT", "1m", 500) + await env.fetch_initial_data(exchange, "ETH/USDT", "1m", 1500) # Initialize agent agent = Agent(STATE_SIZE, 4, hidden_size=384, lstm_layers=2, attention_heads=4, device=device) @@ -2431,39 +2984,16 @@ async def main(): logger.info(f"Starting training for {args.episodes} episodes...") stats = await train_agent(agent, env, num_episodes=args.episodes, exchange=exchange, args=args) - elif args.mode == 'continuous': - # Run in continuous mode - train indefinitely - logger.info("Starting continuous training mode. Press Ctrl+C to stop.") - episode_counter = 0 - try: - while True: # Run indefinitely until manually stopped - # Train for a batch of episodes - batch_size = 50 # Train in batches of 50 episodes - logger.info(f"Starting training batch {episode_counter // batch_size + 1}") - - # Refresh data at the start of each batch - if exchange and args.refresh_data: - logger.info("Refreshing data for new training batch") - await env.fetch_new_data(exchange, "ETH/USDT", "1m", 500) - logger.info(f"Updated environment with fresh candles") - - # Train for a batch of episodes - stats = await train_agent(agent, env, num_episodes=args.episodes, exchange=exchange, args=args) - - # Save model after each batch - agent.save(f"models/trading_agent_continuous_{episode_counter}.pt") - - # Increment counter - episode_counter += batch_size - - # Sleep briefly to prevent excessive API calls - await asyncio.sleep(5) - - except KeyboardInterrupt: - logger.info("Continuous training stopped by user") - # Save final model - agent.save("models/trading_agent_continuous_final.pt") - logger.info("Final model saved") + # Plot training results + plot_training_results(stats) + + # Save the trained agent + agent.save("models/trading_agent_latest.pt") + + # Evaluate the agent + logger.info("Evaluating agent...") + results = evaluate_agent(agent, env, num_episodes=10) + logger.info(f"Evaluation results: {results}") elif args.mode == 'evaluate': # Load the best model @@ -2480,7 +3010,7 @@ async def main(): # Run live trading logger.info("Starting live trading...") - await live_trading(agent, env, exchange, demo=args.demo) + await live_trading(agent, env, exchange, demo=demo_mode) except Exception as e: logger.error(f"Error: {e}") @@ -2690,6 +3220,7 @@ def log_ohlcv_to_tensorboard(writer, df_ohlcv, buy_signals, sell_signals, step, from matplotlib.figure import Figure from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas import numpy as np + from mplfinance.original_flavor import candlestick_ohlc # Check if DataFrame is empty if df_ohlcv.empty: @@ -2700,59 +3231,107 @@ def log_ohlcv_to_tensorboard(writer, df_ohlcv, buy_signals, sell_signals, step, fig = Figure(figsize=(12, 8)) canvas = FigureCanvas(fig) - # Create subplots for price and volume - ax1 = fig.add_subplot(2, 1, 1) # Price chart - ax2 = fig.add_subplot(2, 1, 2, sharex=ax1) # Volume chart + # Create subplots for price, predictions, and volume + ax1 = fig.add_subplot(3, 1, 1) # Price chart + ax2 = fig.add_subplot(3, 1, 2, sharex=ax1) # Predictions chart + ax3 = fig.add_subplot(3, 1, 3, sharex=ax1) # Volume chart - # Plot OHLC - dates = mdates.date2num(df_ohlcv.index.to_pydatetime()) - ohlc = np.column_stack((dates, df_ohlcv['open'], df_ohlcv['high'], df_ohlcv['low'], df_ohlcv['close'])) + # Convert DataFrame to OHLC format for candlestick + df_ohlc = df_ohlcv.copy() + df_ohlc.reset_index(inplace=True) + df_ohlc['date_num'] = mdates.date2num(df_ohlc['timestamp'].dt.to_pydatetime()) + ohlc_data = df_ohlc[['date_num', 'open', 'high', 'low', 'close']].values - # Plot candlestick chart - from matplotlib.lines import Line2D - from matplotlib.patches import Rectangle + # Plot candlestick chart using mplfinance + candlestick_ohlc(ax1, ohlc_data, width=0.6/(24*3), colorup='green', colordown='red') - width = 0.6 / (len(df_ohlcv) + 1) # Adjust width based on number of candles + # Add a dummy line for the legend + ax1.plot([], [], color='green', label='Bullish Candle') + ax1.plot([], [], color='red', label='Bearish Candle') - for i, (date, open_price, high, low, close) in enumerate(ohlc): - # Determine candle color - if close >= open_price: - color = 'green' - body_bottom = open_price - body_height = close - open_price - else: - color = 'red' - body_bottom = close - body_height = open_price - close - - # Plot candle body - rect = Rectangle( - xy=(date - width/2, body_bottom), - width=width, - height=body_height, - facecolor=color, - edgecolor='black', - alpha=0.8 - ) - ax1.add_patch(rect) - - # Plot wick - ax1.plot([date, date], [low, high], color='black', linewidth=1) - - # Plot buy signals + # Plot buy signals with proper alignment if buy_signals: - buy_dates = mdates.date2num([x[0] for x in buy_signals]) - buy_prices = [x[1] for x in buy_signals] - ax1.scatter(buy_dates, buy_prices, marker='^', color='green', s=100, label='Buy') + # Convert datetime to numeric format for plotting + buy_dates = [signal[0] for signal in buy_signals] + buy_dates_num = mdates.date2num(buy_dates) + buy_prices = [signal[1] for signal in buy_signals] + + # Filter signals to only include those within the chart timeframe + valid_indices = [] + for i, date_num in enumerate(buy_dates_num): + if min(df_ohlc['date_num']) <= date_num <= max(df_ohlc['date_num']): + valid_indices.append(i) + + if valid_indices: + filtered_dates = [buy_dates_num[i] for i in valid_indices] + filtered_prices = [buy_prices[i] for i in valid_indices] + ax1.scatter(filtered_dates, filtered_prices, marker='^', color='green', s=100, label='Buy') - # Plot sell signals + # Plot sell signals with proper alignment if sell_signals: - sell_dates = mdates.date2num([x[0] for x in sell_signals]) - sell_prices = [x[1] for x in sell_signals] - ax1.scatter(sell_dates, sell_prices, marker='v', color='red', s=100, label='Sell') + # Convert datetime to numeric format for plotting + sell_dates = [signal[0] for signal in sell_signals] + sell_dates_num = mdates.date2num(sell_dates) + sell_prices = [signal[1] for signal in sell_signals] + + # Filter signals to only include those within the chart timeframe + valid_indices = [] + for i, date_num in enumerate(sell_dates_num): + if min(df_ohlc['date_num']) <= date_num <= max(df_ohlc['date_num']): + valid_indices.append(i) + + if valid_indices: + filtered_dates = [sell_dates_num[i] for i in valid_indices] + filtered_prices = [sell_prices[i] for i in valid_indices] + ax1.scatter(filtered_dates, filtered_prices, marker='v', color='red', s=100, label='Sell') + + # Plot predicted prices if available in the environment + from inspect import currentframe, getframeinfo + frame = currentframe() + has_predictions = False + while frame: + if 'env' in frame.f_locals: + env = frame.f_locals['env'] + if hasattr(env, 'predicted_prices') and len(env.predicted_prices) > 0: + # Get the last timestamp from the data + last_timestamp = df_ohlc['timestamp'].iloc[-1] + + # Create future timestamps for predictions (assuming 1-minute intervals) + future_timestamps = [last_timestamp + pd.Timedelta(minutes=i+1) for i in range(len(env.predicted_prices))] + future_dates_num = mdates.date2num(future_timestamps) + + # Plot actual close prices + ax2.plot(df_ohlc['date_num'], df_ohlc['close'], color='blue', label='Actual Price') + + # Plot predicted prices + ax2.plot(future_dates_num, env.predicted_prices, color='orange', linestyle='--', marker='o', label='Predicted Price') + + # Add shading to indicate prediction area + ax2.axvspan(df_ohlc['date_num'].iloc[-1], future_dates_num[-1], alpha=0.2, color='yellow') + + # Set title and legend + ax2.set_title('Price Predictions') + ax2.set_ylabel('Price') + ax2.legend() + ax2.grid(True) + has_predictions = True + break + frame = frame.f_back + + # If no predictions were found, use the subplot for something else + if not has_predictions: + # Plot moving averages instead + if len(df_ohlc) >= 20: + df_ohlc['MA20'] = df_ohlc['close'].rolling(window=20).mean() + ax2.plot(df_ohlc['date_num'], df_ohlc['close'], color='blue', label='Close Price') + ax2.plot(df_ohlc['date_num'], df_ohlc['MA20'], color='orange', label='20-period MA') + ax2.set_title('Price and Moving Average') + ax2.set_ylabel('Price') + ax2.legend() + ax2.grid(True) # Plot volume - ax2.bar(dates, df_ohlcv['volume'], width=width, color='blue', alpha=0.5) + ax3.bar(df_ohlc['date_num'], df_ohlc['volume'], width=0.6/(24*3), color='blue', alpha=0.5) # Format axes ax1.set_title(f'OHLC with Buy/Sell Signals - {tag_prefix}') @@ -2760,13 +3339,25 @@ def log_ohlcv_to_tensorboard(writer, df_ohlcv, buy_signals, sell_signals, step, ax1.legend() ax1.grid(True) - ax2.set_xlabel('Date') - ax2.set_ylabel('Volume') - ax2.grid(True) + ax3.set_xlabel('Date') + ax3.set_ylabel('Volume') + ax3.grid(True) - # Format date + # Format date for all axes date_format = mdates.DateFormatter('%Y-%m-%d %H:%M') + ax1.xaxis.set_major_formatter(date_format) ax2.xaxis.set_major_formatter(date_format) + ax3.xaxis.set_major_formatter(date_format) + + # Set x-axis limits to ensure all subplots show the same time range + # Include future predictions if available + if has_predictions and 'future_dates_num' in locals(): + x_min = min(df_ohlc['date_num']) + x_max = max(future_dates_num) + ax1.set_xlim(x_min, x_max) + ax2.set_xlim(x_min, x_max) + ax3.set_xlim(x_min, x_max) + fig.autofmt_xdate() # Adjust layout