diff --git a/crypto/gogo2/main.py b/crypto/gogo2/main.py
index e5c7e02..41d30f6 100644
--- a/crypto/gogo2/main.py
+++ b/crypto/gogo2/main.py
@@ -17,66 +17,114 @@ from dotenv import load_dotenv
 import ccxt
 import websockets
 from torch.utils.tensorboard import SummaryWriter
-import torch.cuda.amp as amp  # Add this import at the top
+import torch.amp as amp  # Update import to use torch.amp instead of torch.cuda.amp
 from sklearn.preprocessing import MinMaxScaler
-import copy
-import argparse
 import traceback
 import math
-import matplotlib.dates as mdates
-from matplotlib.figure import Figure
-from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
-
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[logging.FileHandler("trading_bot.log"), logging.StreamHandler()]
-)
-logger = logging.getLogger("trading_bot")
+from mexc_trading import MexcTradingClient
 
 # Load environment variables
 load_dotenv()
 MEXC_API_KEY = os.getenv('MEXC_API_KEY')
 MEXC_SECRET_KEY = os.getenv('MEXC_SECRET_KEY')
 
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger('trading_bot')
+
 # Constants
-INITIAL_BALANCE = 100  # USD
-MAX_LEVERAGE = 100
-STOP_LOSS_PERCENT = 0.5  # Very tight stop loss (0.5%) due to high leverage
-TAKE_PROFIT_PERCENT = 1.5  # Take profit at 1.5%
-MEMORY_SIZE = 100000
-BATCH_SIZE = 64
-GAMMA = 0.99  # Discount factor
-EPSILON_START = 1.0
-EPSILON_END = 0.05
-EPSILON_DECAY = 10000
-STATE_SIZE = 40  # Size of our state representation
-LEARNING_RATE = 1e-4
-TARGET_UPDATE = 10  # Update target network every 10 episodes
+INITIAL_BALANCE = 1000.0  # Starting balance in USDT
+MAX_LEVERAGE = 1  # Maximum leverage to use
+STOP_LOSS_PERCENT = 2.0  # Default stop loss percentage
+TAKE_PROFIT_PERCENT = 4.0  # Default take profit percentage
+STATE_SIZE = 40  # Size of the state vector for the agent (matches saved model)
+LEARNING_RATE = 1e-4  # Learning rate for the optimizer
+MEMORY_SIZE = 100000  # Size of the replay memory
+BATCH_SIZE = 64  # Batch size for training
+GAMMA = 0.99  # Discount factor for future rewards
+EPSILON_START = 1.0  # Starting value of epsilon for exploration
+EPSILON_END = 0.05  # Minimum value of epsilon
+EPSILON_DECAY = 10000  # Decay rate for epsilon
+TARGET_UPDATE = 10  # Update target network every N episodes
 
 # Experience replay tuple
 Experience = namedtuple('Experience', ['state', 'action', 'reward', 'next_state', 'done'])
 
 # Add this function near the top of the file, after the imports but before any classes
-def find_local_extrema(prices, window=5):
-    """Find local minima (bottoms) and maxima (tops) in price data"""
+def find_local_extrema(prices, window=5, volumes=None, volume_threshold=0.7):
+    """
+    Find local minima (bottoms) and maxima (tops) in price data with volume confirmation
+    
+    Parameters:
+    -----------
+    prices : list or array
+        Price data
+    window : int
+        Window size for extrema detection
+    volumes : list or array, optional
+        Volume data corresponding to prices
+    volume_threshold : float
+        Threshold for volume significance (percentile)
+    
+    Returns:
+    --------
+    bottoms : list
+        Indices of local bottoms
+    tops : list
+        Indices of local tops
+    """
     bottoms = []
     tops = []
     
     if len(prices) < window * 2 + 1:
         return bottoms, tops
     
+    # Determine volume significance if volumes are provided
+    volume_significance = None
+    if volumes is not None and len(volumes) == len(prices):
+        # Calculate rolling average volume
+        rolling_vol = []
+        for i in range(len(volumes)):
+            start_idx = max(0, i - window * 2)
+            end_idx = i + 1
+            rolling_vol.append(np.mean(volumes[start_idx:end_idx]))
+        
+        # Calculate volume threshold based on percentile
+        volume_significance = np.percentile(rolling_vol, volume_threshold * 100)
+    
     for i in range(window, len(prices) - window):
         # Check if this is a local minimum (bottom)
-        if all(prices[i] <= prices[i-j] for j in range(1, window+1)) and \
-           all(prices[i] <= prices[i+j] for j in range(1, window+1)):
-            bottoms.append(i)
+        is_bottom = all(prices[i] <= prices[i-j] for j in range(1, window+1)) and \
+                   all(prices[i] <= prices[i+j] for j in range(1, window+1))
         
         # Check if this is a local maximum (top)
-        if all(prices[i] >= prices[i-j] for j in range(1, window+1)) and \
-           all(prices[i] >= prices[i+j] for j in range(1, window+1)):
-            tops.append(i)
+        is_top = all(prices[i] >= prices[i-j] for j in range(1, window+1)) and \
+                all(prices[i] >= prices[i+j] for j in range(1, window+1))
+        
+        # Apply volume filter if available
+        if volume_significance is not None:
+            # For bottoms, we want to see increased volume on the reversal
+            if is_bottom:
+                # Check if volume is significant at this point or in the next few candles
+                vol_confirmed = False
+                for j in range(3):  # Check current and next 2 candles
+                    if i+j < len(volumes) and volumes[i+j] > volume_significance:
+                        vol_confirmed = True
+                        break
+                
+                if vol_confirmed:
+                    bottoms.append(i)
+            
+            # For tops, we want to see increased volume at the peak
+            elif is_top:
+                if volumes[i] > volume_significance:
+                    tops.append(i)
+        else:
+            # If no volume data, just use price action
+            if is_bottom:
+                bottoms.append(i)
+            elif is_top:
+                tops.append(i)
     
     return bottoms, tops
 
@@ -191,106 +239,213 @@ class DQN(nn.Module):
 class PricePredictionModel(nn.Module):
     def __init__(self, input_size=30, hidden_size=128, output_size=5, num_layers=2):
         super(PricePredictionModel, self).__init__()
-        self.lstm = nn.LSTM(1, hidden_size, num_layers=num_layers, batch_first=True, dropout=0.2)
-        self.fc = nn.Linear(hidden_size, output_size)
-        self.scaler = MinMaxScaler(feature_range=(0, 1))
-        self.is_fitted = False
+        # Input features: price and volume
+        self.lstm = nn.LSTM(2, hidden_size, num_layers=num_layers, batch_first=True, dropout=0.2)
+        self.attention = nn.MultiheadAttention(hidden_size, num_heads=4, batch_first=True)
+        
+        # Price prediction head
+        self.price_fc = nn.Linear(hidden_size, output_size)
+        
+        # Extrema prediction head (probability of high/low in next candles)
+        self.extrema_fc = nn.Linear(hidden_size, output_size * 2)  # For each candle: [p_low, p_high]
+        
+        # Scalers
+        self.price_scaler = MinMaxScaler(feature_range=(0, 1))
+        self.volume_scaler = MinMaxScaler(feature_range=(0, 1))
+        self.is_fitted_price = False
+        self.is_fitted_volume = False
         
     def forward(self, x):
-        # x shape: [batch_size, seq_len, 1]
+        # x shape: [batch_size, seq_len, 2] (price, volume)
         lstm_out, _ = self.lstm(x)
-        # Use the last time step output
-        predictions = self.fc(lstm_out[:, -1, :])
-        return predictions
-    
-    def preprocess(self, data):
-        # Reshape data for scaler
-        data_reshaped = np.array(data).reshape(-1, 1)
         
-        # Fit scaler if not already fitted
-        if not self.is_fitted:
-            self.scaler.fit(data_reshaped)
-            self.is_fitted = True
+        # Apply attention mechanism
+        attn_out, _ = self.attention(lstm_out, lstm_out, lstm_out)
+        
+        # Use the last time step output with attention
+        last_hidden = attn_out[:, -1, :]
+        
+        # Price predictions
+        price_pred = self.price_fc(last_hidden)
+        
+        # Extrema predictions (probabilities of highs and lows)
+        extrema_logits = self.extrema_fc(last_hidden)
+        extrema_pred = torch.sigmoid(extrema_logits)  # Convert to probabilities
+        
+        return price_pred, extrema_pred
+    
+    def preprocess(self, price_history, volume_history=None):
+        # Reshape price data for scaler
+        price_reshaped = np.array(price_history).reshape(-1, 1)
+        
+        # Fit price scaler if not already fitted
+        if not self.is_fitted_price:
+            self.price_scaler.fit(price_reshaped)
+            self.is_fitted_price = True
+            
+        # Transform price data
+        scaled_price = self.price_scaler.transform(price_reshaped)
+        
+        # If volume data is provided
+        if volume_history is not None:
+            # Reshape volume data for scaler
+            volume_reshaped = np.array(volume_history).reshape(-1, 1)
+            
+            # Fit volume scaler if not already fitted
+            if not self.is_fitted_volume:
+                self.volume_scaler.fit(volume_reshaped)
+                self.is_fitted_volume = True
+                
+            # Transform volume data
+            scaled_volume = self.volume_scaler.transform(volume_reshaped)
+            
+            # Combine price and volume data
+            scaled_data = np.hstack((scaled_price, scaled_volume))
+        else:
+            # If no volume data, use zeros
+            scaled_volume = np.zeros_like(scaled_price)
+            scaled_data = np.hstack((scaled_price, scaled_volume))
             
-        # Transform data
-        scaled_data = self.scaler.transform(data_reshaped)
         return scaled_data
     
-    def postprocess(self, scaled_predictions):
+    def postprocess_price(self, scaled_predictions):
         # Inverse transform to get actual price values
-        return self.scaler.inverse_transform(scaled_predictions.reshape(-1, 1)).flatten()
+        return self.price_scaler.inverse_transform(scaled_predictions.reshape(-1, 1)).flatten()
     
-    def predict_next_candles(self, price_history, num_candles=5):
+    def predict_next_candles(self, price_history, volume_history=None, num_candles=5):
         if len(price_history) < 30:  # Need enough history
-            return np.zeros(num_candles)
+            return np.zeros(num_candles), np.zeros((num_candles, 2))
+            
+        # Use default volume if not provided
+        if volume_history is None:
+            volume_history = np.ones_like(price_history)
+            
+        # Ensure same length
+        min_len = min(len(price_history), len(volume_history))
+        price_history = price_history[-min_len:]
+        volume_history = volume_history[-min_len:]
             
         # Preprocess data
-        scaled_data = self.preprocess(price_history)
+        scaled_data = self.preprocess(price_history, volume_history)
         
         # Create sequence
-        sequence = scaled_data[-30:].reshape(1, 30, 1)
+        sequence = scaled_data[-30:].reshape(1, 30, 2)
         sequence_tensor = torch.FloatTensor(sequence).to(next(self.parameters()).device)
         
         # Get predictions
         with torch.no_grad():
-            scaled_predictions = self(sequence_tensor).cpu().numpy()[0]
+            price_pred, extrema_pred = self(sequence_tensor)
+            scaled_price_predictions = price_pred.cpu().numpy()[0]
+            extrema_predictions = extrema_pred.cpu().numpy()[0].reshape(num_candles, 2)
             
-        # Postprocess predictions
-        predictions = self.postprocess(scaled_predictions)
-        return predictions
+        # Postprocess price predictions
+        price_predictions = self.postprocess_price(scaled_price_predictions)
+        
+        return price_predictions, extrema_predictions
     
-    def train_on_new_data(self, price_history, optimizer, epochs=5):
-        """Train the model on new price data"""
+    def train_on_new_data(self, price_history, volume_history=None, optimizer=None, epochs=5, extrema_weight=1.0):
+        """Train the model on new price and volume data with focus on extrema"""
         # Convert to numpy array if it's not already
         if isinstance(price_history, list):
-            price_history = np.array(price_history, dtype=np.float32)  # Force float32
+            price_history = np.array(price_history, dtype=np.float32)
+            
+        if volume_history is None:
+            volume_history = np.ones_like(price_history)
+        elif isinstance(volume_history, list):
+            volume_history = np.array(volume_history, dtype=np.float32)
+            
+        # Ensure same length
+        min_len = min(len(price_history), len(volume_history))
+        price_history = price_history[-min_len:]
+        volume_history = volume_history[-min_len:]
         
         if len(price_history) < 35:  # Need enough history for training
-            return 0.0
+            return 0.0, 0.0
+            
+        # Find local extrema in the price history
+        bottoms, tops = find_local_extrema(price_history, window=5)
+        
+        # Create extrema labels (0: normal, 1: bottom/buy, 2: top/sell)
+        extrema_labels = np.zeros(len(price_history))
+        for idx in bottoms:
+            if 0 <= idx < len(extrema_labels):
+                extrema_labels[idx] = 1  # Bottom/Buy
+        for idx in tops:
+            if 0 <= idx < len(extrema_labels):
+                extrema_labels[idx] = 2  # Top/Sell
             
         # Preprocess data
-        scaled_data = self.preprocess(price_history)
+        scaled_data = self.preprocess(price_history, volume_history)
         
         # Create sequences and targets
         sequences = []
-        targets = []
+        price_targets = []
+        extrema_targets = []
         
         for i in range(len(scaled_data) - 35):
-            # Sequence: 30 time steps
+            # Sequence: 30 time steps of price and volume
             seq = scaled_data[i:i+30]
-            # Target: next 5 time steps
-            target = scaled_data[i+30:i+35].flatten()
+            
+            # Price target: next 5 time steps
+            price_target = scaled_data[i+30:i+35, 0].flatten()  # Only price column
+            
+            # Extrema target: binary indicators for next 5 time steps
+            # For each of the next 5 candles, we predict [p_low, p_high]
+            extrema_target = np.zeros(5 * 2)
+            for j in range(5):
+                idx = i + 30 + j
+                if idx < len(extrema_labels):
+                    if extrema_labels[idx] == 1:  # Bottom/Buy
+                        extrema_target[j*2] = 1.0  # p_low
+                    elif extrema_labels[idx] == 2:  # Top/Sell
+                        extrema_target[j*2 + 1] = 1.0  # p_high
             
             sequences.append(seq)
-            targets.append(target)
+            price_targets.append(price_target)
+            extrema_targets.append(extrema_target)
         
         if not sequences:  # If no sequences were created
-            return 0.0
+            return 0.0, 0.0
             
         # Convert to tensors
-        sequences_tensor = torch.FloatTensor(np.array(sequences).reshape(-1, 30, 1)).to(next(self.parameters()).device)
-        targets_tensor = torch.FloatTensor(np.array(targets)).to(next(self.parameters()).device)
+        sequences_tensor = torch.FloatTensor(np.array(sequences)).to(next(self.parameters()).device)
+        price_targets_tensor = torch.FloatTensor(np.array(price_targets)).to(next(self.parameters()).device)
+        extrema_targets_tensor = torch.FloatTensor(np.array(extrema_targets)).to(next(self.parameters()).device)
+        
+        # Create optimizer if not provided
+        if optimizer is None:
+            optimizer = optim.Adam(self.parameters(), lr=1e-3)
         
         # Training loop
-        total_loss = 0
+        total_price_loss = 0
+        total_extrema_loss = 0
+        
         for _ in range(epochs):
             # Forward pass
-            predictions = self(sequences_tensor)
+            price_pred, extrema_pred = self(sequences_tensor)
             
-            # Calculate loss
-            loss = F.mse_loss(predictions, targets_tensor)
+            # Reshape extrema predictions to match targets
+            extrema_pred_flat = extrema_pred.reshape(-1, 10)  # 5 candles * 2 (low/high)
+            
+            # Calculate losses
+            price_loss = F.mse_loss(price_pred, price_targets_tensor)
+            extrema_loss = F.binary_cross_entropy(extrema_pred_flat, extrema_targets_tensor)
+            
+            # Combined loss with weighting
+            combined_loss = price_loss + extrema_weight * extrema_loss
             
             # Backward pass and optimize
             optimizer.zero_grad()
-            loss.backward()
+            combined_loss.backward()
             optimizer.step()
             
-            total_loss += loss.item()
+            total_price_loss += price_loss.item()
+            total_extrema_loss += extrema_loss.item()
         
-        return total_loss / epochs
+        return total_price_loss / epochs, total_extrema_loss / epochs
 
 class TradingEnvironment:
-    def __init__(self, initial_balance=INITIAL_BALANCE, window_size=30, demo=True):
+    def __init__(self, initial_balance=INITIAL_BALANCE, window_size=30, demo=True, trading_client=None):
         """Initialize the trading environment"""
         self.initial_balance = initial_balance
         self.balance = initial_balance
@@ -313,6 +468,9 @@ class TradingEnvironment:
         self.current_step = 0
         self.current_price = 0
         
+        # Initialize trading client for live trading
+        self.trading_client = trading_client
+        
         # Initialize features
         self.features = {
             'price': [],
@@ -462,6 +620,11 @@ class TradingEnvironment:
         self.current_price = self.data[self.current_step]['close']
         
         # Process action (0: HOLD, 1: BUY/LONG, 2: SELL/SHORT, 3: CLOSE)
+        if not self.demo and self.trading_client:
+            # Execute real trades in live mode
+            asyncio.create_task(self._execute_live_action(action))
+        
+        # Calculate reward (simulation still runs in parallel with live trading)
         reward = self.calculate_reward(action)
         
         # Check for stop loss / take profit hits
@@ -476,6 +639,105 @@ class TradingEnvironment:
         
         return next_state, reward, done
     
+    async def _execute_live_action(self, action):
+        """Execute live trading action using the trading client"""
+        if not self.trading_client:
+            logger.warning("No trading client available for live trading")
+            return
+            
+        try:
+            if action == 0:  # HOLD
+                # No action needed
+                pass
+                
+            elif action == 1:  # BUY/LONG
+                if self.position == 'flat':
+                    # Open long position
+                    position_size = self.calculate_position_size()
+                    stop_loss = self.current_price * (1 - STOP_LOSS_PERCENT / 100)
+                    take_profit = self.current_price * (1 + TAKE_PROFIT_PERCENT / 100)
+                    
+                    success = await self.trading_client.open_position(
+                        position_type='long',
+                        size=position_size,
+                        entry_price=self.current_price,
+                        stop_loss=stop_loss,
+                        take_profit=take_profit
+                    )
+                    
+                    if success:
+                        logger.info(f"LIVE: Successfully opened LONG position at {self.current_price}")
+                    else:
+                        logger.error("LIVE: Failed to open LONG position")
+                        
+                elif self.position == 'short':
+                    # Close short position and open long
+                    await self.trading_client.close_position(reason="switch_to_long")
+                    
+                    # Open new long position
+                    position_size = self.calculate_position_size()
+                    stop_loss = self.current_price * (1 - STOP_LOSS_PERCENT / 100)
+                    take_profit = self.current_price * (1 + TAKE_PROFIT_PERCENT / 100)
+                    
+                    await self.trading_client.open_position(
+                        position_type='long',
+                        size=position_size,
+                        entry_price=self.current_price,
+                        stop_loss=stop_loss,
+                        take_profit=take_profit
+                    )
+                    
+            elif action == 2:  # SELL/SHORT
+                if self.position == 'flat':
+                    # Open short position
+                    position_size = self.calculate_position_size()
+                    stop_loss = self.current_price * (1 + STOP_LOSS_PERCENT / 100)
+                    take_profit = self.current_price * (1 - TAKE_PROFIT_PERCENT / 100)
+                    
+                    success = await self.trading_client.open_position(
+                        position_type='short',
+                        size=position_size,
+                        entry_price=self.current_price,
+                        stop_loss=stop_loss,
+                        take_profit=take_profit
+                    )
+                    
+                    if success:
+                        logger.info(f"LIVE: Successfully opened SHORT position at {self.current_price}")
+                    else:
+                        logger.error("LIVE: Failed to open SHORT position")
+                        
+                elif self.position == 'long':
+                    # Close long position and open short
+                    await self.trading_client.close_position(reason="switch_to_short")
+                    
+                    # Open new short position
+                    position_size = self.calculate_position_size()
+                    stop_loss = self.current_price * (1 + STOP_LOSS_PERCENT / 100)
+                    take_profit = self.current_price * (1 - TAKE_PROFIT_PERCENT / 100)
+                    
+                    await self.trading_client.open_position(
+                        position_type='short',
+                        size=position_size,
+                        entry_price=self.current_price,
+                        stop_loss=stop_loss,
+                        take_profit=take_profit
+                    )
+                    
+            elif action == 3:  # CLOSE
+                if self.position != 'flat':
+                    # Close any open position
+                    success = await self.trading_client.close_position(reason="manual_close")
+                    
+                    if success:
+                        logger.info(f"LIVE: Successfully closed {self.position} position")
+                    else:
+                        logger.error(f"LIVE: Failed to close {self.position} position")
+                        
+        except Exception as e:
+            logger.error(f"Error executing live action: {e}")
+            logger.error(traceback.format_exc())
+    
     def check_sl_tp(self):
         """Check if stop loss or take profit has been hit"""
         if self.position == 'flat':
@@ -718,11 +980,20 @@ class TradingEnvironment:
         # Add predicted prices (if available)
         if hasattr(self, 'predicted_prices') and len(self.predicted_prices) > 0:
             # Normalize predictions relative to current price
-            pred_norm = np.array(self.predicted_prices[:3]) / latest_price - 1.0
+            pred_norm = np.array(self.predicted_prices[:5]) / latest_price - 1.0
             state_components.append(pred_norm)
         else:
             # Add zeros if no predictions
-            state_components.append(np.zeros(3))
+            state_components.append(np.zeros(5))
+        
+        # Add predicted extrema probabilities (if available)
+        if hasattr(self, 'predicted_extrema') and len(self.predicted_extrema) > 0:
+            # Flatten the extrema predictions [p_low1, p_high1, p_low2, p_high2, ...]
+            extrema_probs = self.predicted_extrema.flatten()
+            state_components.append(extrema_probs)
+        else:
+            # Add zeros if no extrema predictions
+            state_components.append(np.zeros(10))  # 5 candles * 2 (low/high)
         
         # Add extrema signals (if available)
         if hasattr(self, 'optimal_signals') and len(self.optimal_signals) > 0:
@@ -739,6 +1010,14 @@ class TradingEnvironment:
             # Add zeros if no signals
             state_components.append(np.zeros(5))
         
+        # Add predicted extrema flags
+        extrema_flags = np.zeros(2)
+        if hasattr(self, 'has_predicted_low') and self.has_predicted_low:
+            extrema_flags[0] = 1.0  # Predicted low
+        if hasattr(self, 'has_predicted_high') and self.has_predicted_high:
+            extrema_flags[1] = 1.0  # Predicted high
+        state_components.append(extrema_flags)
+        
         # Position info
         position_info = np.zeros(5)
         if self.position == 'long':
@@ -776,39 +1055,69 @@ class TradingEnvironment:
     
     def calculate_reward(self, action):
         """Calculate reward for the given action with improved penalties for losing trades"""
-        reward = 0
-        
-        # Store previous balance for direct PnL calculation
+        # Store previous balance for reward calculation
         prev_balance = self.balance
         
-        # Base reward for actions
-        if action == 0:  # HOLD
-            # Small penalty for doing nothing to encourage action
-            # But make it context-dependent - holding during high volatility should be penalized more
-            volatility = self.get_recent_volatility()
-            reward = -0.01 * (1 + volatility)
+        # Initialize reward
+        reward = 0.0
         
+        # Process action (0: HOLD, 1: BUY/LONG, 2: SELL/SHORT, 3: CLOSE)
+        if action == 0:  # HOLD
+            # Small negative reward for holding to encourage action
+            reward = -0.01
+            
+            # But give positive reward for holding if we're predicting an extrema soon
+            if hasattr(self, 'has_predicted_low') and self.has_predicted_low and self.position == 'flat':
+                # Reward for waiting for a predicted bottom
+                reward = 0.05
+            elif hasattr(self, 'has_predicted_high') and self.has_predicted_high and self.position == 'flat':
+                # Reward for waiting for a predicted top
+                reward = 0.05
+            
+            # Check if holding a profitable position
+            if self.position == 'long' and self.current_price > self.entry_price:
+                # Reward for holding a profitable long
+                profit_pct = (self.current_price - self.entry_price) / self.entry_price
+                reward += profit_pct * 0.5
+            elif self.position == 'short' and self.current_price < self.entry_price:
+                # Reward for holding a profitable short
+                profit_pct = (self.entry_price - self.current_price) / self.entry_price
+                reward += profit_pct * 0.5
+                
         elif action == 1:  # BUY/LONG
             if self.position == 'flat':
-                # Opening a long position
+                # Open long position
                 self.position = 'long'
                 self.entry_price = self.current_price
                 self.entry_index = self.current_step
                 self.position_size = self.calculate_position_size()
                 
-                # Calculate stop loss and take profit levels
-                self.stop_loss = self.entry_price * (1 - STOP_LOSS_PERCENT / 100)
-                self.take_profit = self.entry_price * (1 + TAKE_PROFIT_PERCENT / 100)
+                # Set stop loss and take profit
+                self.stop_loss = self.current_price * (1 - STOP_LOSS_PERCENT / 100)
+                self.take_profit = self.current_price * (1 + TAKE_PROFIT_PERCENT / 100)
                 
-                # Check if this is a good entry point based on technical indicators
+                # Calculate entry quality
                 entry_quality = self.evaluate_entry_quality('long')
-                reward += entry_quality * 0.5  # Scale the reward based on entry quality
                 
-                logger.info(f"OPENED LONG at {self.entry_price} | Stop loss: {self.stop_loss} | Take profit: {self.take_profit}")
+                # Base reward on entry quality
+                reward = entry_quality
+                
+                # Additional reward if buying at a predicted low
+                if hasattr(self, 'has_predicted_low') and self.has_predicted_low:
+                    reward += 0.5
+                    logger.info("Buying at predicted low point - additional reward")
+                
+                # Penalize buying at a predicted high
+                if hasattr(self, 'has_predicted_high') and self.has_predicted_high:
+                    reward -= 0.5
+                    logger.info("Buying at predicted high point - penalty applied")
+                
+                logger.info(f"OPENED long at {self.current_price} | Size: {self.position_size:.2f} | Entry quality: {entry_quality:.2f}")
                 
             elif self.position == 'short':
-                # Closing a short position
-                pnl_percent = (self.entry_price - self.current_price) / self.entry_price * 100
+                # Close short position
+                price_diff = self.entry_price - self.current_price
+                pnl_percent = price_diff / self.entry_price * 100
                 pnl_dollar = pnl_percent / 100 * self.position_size
                 
                 # Apply fees
@@ -819,6 +1128,216 @@ class TradingEnvironment:
                 self.total_pnl += pnl_dollar
                 self.episode_pnl += pnl_dollar
                 
+                # Update max drawdown
+                if self.balance < self.peak_balance:
+                    current_drawdown = (self.peak_balance - self.balance) / self.peak_balance * 100
+                    self.max_drawdown = max(self.max_drawdown, current_drawdown)
+                elif self.balance > self.peak_balance:
+                    self.peak_balance = self.balance
+                
+                # Record trade
+                self.trades.append({
+                    'type': 'short',
+                    'entry': self.entry_price,
+                    'exit': self.current_price,
+                    'entry_time': self.data[self.entry_index]['timestamp'],
+                    'exit_time': self.data[self.current_step]['timestamp'],
+                    'pnl_percent': pnl_percent,
+                    'pnl_dollar': pnl_dollar,
+                    'duration': self.current_step - self.entry_index,
+                    'market_direction': self.get_market_direction(),
+                    'reason': 'switch_to_long'
+                })
+                
+                # Reward based on PnL
+                if pnl_dollar > 0:
+                    reward = 1.0 + pnl_dollar / 10  # Positive reward for profit
+                    self.win_count += 1
+                else:
+                    reward = -1.0  # Negative reward for loss
+                    self.loss_count += 1
+                
+                logger.info(f"CLOSED short at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}")
+                
+                # Open new long position
+                self.position = 'long'
+                self.entry_price = self.current_price
+                self.entry_index = self.current_step
+                self.position_size = self.calculate_position_size()
+                
+                # Set stop loss and take profit
+                self.stop_loss = self.current_price * (1 - STOP_LOSS_PERCENT / 100)
+                self.take_profit = self.current_price * (1 + TAKE_PROFIT_PERCENT / 100)
+                
+                logger.info(f"OPENED long at {self.current_price} | Size: {self.position_size:.2f}")
+                
+        elif action == 2:  # SELL/SHORT
+            if self.position == 'flat':
+                # Open short position
+                self.position = 'short'
+                self.entry_price = self.current_price
+                self.entry_index = self.current_step
+                self.position_size = self.calculate_position_size()
+                
+                # Set stop loss and take profit
+                self.stop_loss = self.current_price * (1 + STOP_LOSS_PERCENT / 100)
+                self.take_profit = self.current_price * (1 - TAKE_PROFIT_PERCENT / 100)
+                
+                # Calculate entry quality
+                entry_quality = self.evaluate_entry_quality('short')
+                
+                # Base reward on entry quality
+                reward = entry_quality
+                
+                # Additional reward if selling at a predicted high
+                if hasattr(self, 'has_predicted_high') and self.has_predicted_high:
+                    reward += 0.5
+                    logger.info("Selling at predicted high point - additional reward")
+                
+                # Penalize selling at a predicted low
+                if hasattr(self, 'has_predicted_low') and self.has_predicted_low:
+                    reward -= 0.5
+                    logger.info("Selling at predicted low point - penalty applied")
+                
+                logger.info(f"OPENED short at {self.current_price} | Size: {self.position_size:.2f} | Entry quality: {entry_quality:.2f}")
+                
+            elif self.position == 'long':
+                # Close long position
+                price_diff = self.current_price - self.entry_price
+                pnl_percent = price_diff / self.entry_price * 100
+                pnl_dollar = pnl_percent / 100 * self.position_size
+                
+                # Apply fees
+                pnl_dollar -= self.calculate_fees(self.position_size)
+                
+                # Update balance
+                self.balance += pnl_dollar
+                self.total_pnl += pnl_dollar
+                self.episode_pnl += pnl_dollar
+                
+                # Update max drawdown
+                if self.balance < self.peak_balance:
+                    current_drawdown = (self.peak_balance - self.balance) / self.peak_balance * 100
+                    self.max_drawdown = max(self.max_drawdown, current_drawdown)
+                elif self.balance > self.peak_balance:
+                    self.peak_balance = self.balance
+                
+                # Record trade
+                self.trades.append({
+                    'type': 'long',
+                    'entry': self.entry_price,
+                    'exit': self.current_price,
+                    'entry_time': self.data[self.entry_index]['timestamp'],
+                    'exit_time': self.data[self.current_step]['timestamp'],
+                    'pnl_percent': pnl_percent,
+                    'pnl_dollar': pnl_dollar,
+                    'duration': self.current_step - self.entry_index,
+                    'market_direction': self.get_market_direction(),
+                    'reason': 'switch_to_short'
+                })
+                
+                # Reward based on PnL
+                if pnl_dollar > 0:
+                    reward = 1.0 + pnl_dollar / 10  # Positive reward for profit
+                    self.win_count += 1
+                else:
+                    reward = -1.0  # Negative reward for loss
+                    self.loss_count += 1
+                
+                logger.info(f"CLOSED long at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}")
+                
+                # Open new short position
+                self.position = 'short'
+                self.entry_price = self.current_price
+                self.entry_index = self.current_step
+                self.position_size = self.calculate_position_size()
+                
+                # Set stop loss and take profit
+                self.stop_loss = self.current_price * (1 + STOP_LOSS_PERCENT / 100)
+                self.take_profit = self.current_price * (1 - TAKE_PROFIT_PERCENT / 100)
+                
+                logger.info(f"OPENED short at {self.current_price} | Size: {self.position_size:.2f}")
+                
+        elif action == 3:  # CLOSE
+            if self.position == 'long':
+                # Close long position
+                price_diff = self.current_price - self.entry_price
+                pnl_percent = price_diff / self.entry_price * 100
+                pnl_dollar = pnl_percent / 100 * self.position_size
+                
+                # Apply fees
+                pnl_dollar -= self.calculate_fees(self.position_size)
+                
+                # Update balance
+                self.balance += pnl_dollar
+                self.total_pnl += pnl_dollar
+                self.episode_pnl += pnl_dollar
+                
+                # Update max drawdown
+                if self.balance < self.peak_balance:
+                    current_drawdown = (self.peak_balance - self.balance) / self.peak_balance * 100
+                    self.max_drawdown = max(self.max_drawdown, current_drawdown)
+                elif self.balance > self.peak_balance:
+                    self.peak_balance = self.balance
+                
+                # Record trade
+                self.trades.append({
+                    'type': 'long',
+                    'entry': self.entry_price,
+                    'exit': self.current_price,
+                    'entry_time': self.data[self.entry_index]['timestamp'],
+                    'exit_time': self.data[self.current_step]['timestamp'],
+                    'pnl_percent': pnl_percent,
+                    'pnl_dollar': pnl_dollar,
+                    'duration': self.current_step - self.entry_index,
+                    'market_direction': self.get_market_direction(),
+                    'reason': 'manual_close'
+                })
+                
+                # Reward based on PnL
+                if pnl_dollar > 0:
+                    reward = 1.0 + pnl_dollar / 10  # Positive reward for profit
+                    self.win_count += 1
+                    
+                    # Extra reward for closing at a predicted high
+                    if hasattr(self, 'has_predicted_high') and self.has_predicted_high:
+                        reward += 0.5
+                        logger.info("Closing long at predicted high - additional reward")
+                else:
+                    reward = -1.0  # Negative reward for loss
+                    self.loss_count += 1
+                
+                logger.info(f"CLOSED long at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}")
+                
+                # Reset position
+                self.position = 'flat'
+                self.entry_price = 0
+                self.entry_index = 0
+                self.position_size = 0
+                self.stop_loss = 0
+                self.take_profit = 0
+                
+            elif self.position == 'short':
+                # Close short position
+                price_diff = self.entry_price - self.current_price
+                pnl_percent = price_diff / self.entry_price * 100
+                pnl_dollar = pnl_percent / 100 * self.position_size
+                
+                # Apply fees
+                pnl_dollar -= self.calculate_fees(self.position_size)
+                
+                # Update balance
+                self.balance += pnl_dollar
+                self.total_pnl += pnl_dollar
+                self.episode_pnl += pnl_dollar
+                
+                # Update max drawdown
+                if self.balance < self.peak_balance:
+                    current_drawdown = (self.peak_balance - self.balance) / self.peak_balance * 100
+                    self.max_drawdown = max(self.max_drawdown, current_drawdown)
+                elif self.balance > self.peak_balance:
+                    self.peak_balance = self.balance
+                
                 # Record trade
                 self.trades.append({
                     'type': 'short',
@@ -833,177 +1352,17 @@ class TradingEnvironment:
                     'reason': 'manual_close'
                 })
                 
-                # Update win/loss count
-                if pnl_dollar > 0:
-                    self.win_count += 1
-                    reward += 1.0 + (pnl_percent / 2)  # Bonus for winning trade
-                else:
-                    self.loss_count += 1
-                    reward -= 1.0 + (abs(pnl_percent) / 2)  # Penalty for losing trade
-                
-                logger.info(f"CLOSED short at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}")
-                
-                # Reset position and open new long
-                self.position = 'long'
-                self.entry_price = self.current_price
-                self.entry_index = self.current_step
-                self.position_size = self.calculate_position_size()
-                
-                # Calculate stop loss and take profit levels
-                self.stop_loss = self.entry_price * (1 - STOP_LOSS_PERCENT / 100)
-                self.take_profit = self.entry_price * (1 + TAKE_PROFIT_PERCENT / 100)
-                
-                logger.info(f"OPENED LONG at {self.entry_price} | Stop loss: {self.stop_loss} | Take profit: {self.take_profit}")
-        
-        elif action == 2:  # SELL/SHORT
-            if self.position == 'flat':
-                # Opening a short position
-                self.position = 'short'
-                self.entry_price = self.current_price
-                self.position_size = self.calculate_position_size()
-                self.stop_loss = self.entry_price * (1 + STOP_LOSS_PERCENT/100)
-                self.take_profit = self.entry_price * (1 - TAKE_PROFIT_PERCENT/100)
-                
-                # Check if this is an optimal sell point (top)
-                current_idx = len(self.features['price']) - 1
-                if hasattr(self, 'optimal_tops') and current_idx in self.optimal_tops:
-                    reward += 2.0  # Bonus for selling at a top
-                else:
-                    reward += 0.1  # Small reward for opening a position
-                
-                logger.info(f"OPENED SHORT at {self.entry_price} | Stop loss: {self.stop_loss} | Take profit: {self.take_profit}")
-                
-            elif self.position == 'long':
-                # Close long and open short
-                pnl_percent = (self.current_price - self.entry_price) / self.entry_price * 100
-                pnl_dollar = pnl_percent / 100 * self.position_size
-                
-                # Apply fees
-                pnl_dollar -= self.calculate_fees(self.position_size)
-                
-                # Update balance
-                self.balance += pnl_dollar
-                self.total_pnl += pnl_dollar
-                
-                # Record trade
-                self.trades.append({
-                    'type': 'long',
-                    'entry': self.entry_price,
-                    'exit': self.current_price,
-                    'entry_time': self.data[self.entry_index]['timestamp'],
-                    'exit_time': self.data[self.current_step]['timestamp'],
-                    'pnl_percent': pnl_percent,
-                    'pnl_dollar': pnl_dollar
-                })
-                
                 # Reward based on PnL
                 if pnl_dollar > 0:
-                    reward += 1.0 + pnl_dollar / 10  # Positive reward for profit
+                    reward = 1.0 + pnl_dollar / 10  # Positive reward for profit
                     self.win_count += 1
+                    
+                    # Extra reward for closing at a predicted low
+                    if hasattr(self, 'has_predicted_low') and self.has_predicted_low:
+                        reward += 0.5
+                        logger.info("Closing short at predicted low - additional reward")
                 else:
-                    reward -= 1.0  # Negative reward for loss
-                    self.loss_count += 1
-                
-                logger.info(f"CLOSED long at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}")
-                
-                # Now open short
-                self.position = 'short'
-                self.entry_price = self.current_price
-                self.position_size = self.calculate_position_size()
-                self.stop_loss = self.entry_price * (1 + STOP_LOSS_PERCENT/100)
-                self.take_profit = self.entry_price * (1 - TAKE_PROFIT_PERCENT/100)
-                
-                # Check if this is an optimal sell point
-                current_idx = len(self.features['price']) - 1
-                if hasattr(self, 'optimal_tops') and current_idx in self.optimal_tops:
-                    reward += 2.0  # Bonus for selling at a top
-                
-                logger.info(f"OPENED SHORT at {self.entry_price} | Stop loss: {self.stop_loss} | Take profit: {self.take_profit}")
-        
-        elif action == 3:  # CLOSE
-            if self.position == 'long':
-                # Close long position
-                pnl_percent = (self.current_price - self.entry_price) / self.entry_price * 100
-                pnl_dollar = pnl_percent / 100 * self.position_size
-                
-                # Apply fees
-                pnl_dollar -= self.calculate_fees(self.position_size)
-                
-                # Update balance
-                self.balance += pnl_dollar
-                self.total_pnl += pnl_dollar
-                self.episode_pnl += pnl_dollar
-                
-                # Update max drawdown
-                if self.balance > self.peak_balance:
-                    self.peak_balance = self.balance
-                drawdown = (self.peak_balance - self.balance) / self.peak_balance
-                self.max_drawdown = max(self.max_drawdown, drawdown)
-                
-                # Record trade
-                self.trades.append({
-                    'type': 'long',
-                    'entry': self.entry_price,
-                    'exit': self.current_price,
-                    'entry_time': self.data[self.entry_index]['timestamp'],
-                    'exit_time': self.data[self.current_step]['timestamp'],
-                    'pnl_percent': pnl_percent,
-                    'pnl_dollar': pnl_dollar
-                })
-                
-                # Reward based on PnL
-                if pnl_dollar > 0:
-                    reward += 1.0 + pnl_dollar / 10  # Positive reward for profit
-                    self.win_count += 1
-                else:
-                    reward -= 1.0  # Negative reward for loss
-                    self.loss_count += 1
-                
-                logger.info(f"CLOSED long at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}")
-                
-                # Reset position
-                self.position = 'flat'
-                self.entry_price = 0
-                self.position_size = 0
-                self.stop_loss = 0
-                self.take_profit = 0
-                
-            elif self.position == 'short':
-                # Close short position
-                pnl_percent = (self.entry_price - self.current_price) / self.entry_price * 100
-                pnl_dollar = pnl_percent / 100 * self.position_size
-                
-                # Apply fees
-                pnl_dollar -= self.calculate_fees(self.position_size)
-                
-                # Update balance
-                self.balance += pnl_dollar
-                self.total_pnl += pnl_dollar
-                self.episode_pnl += pnl_dollar
-                
-                # Update max drawdown
-                if self.balance > self.peak_balance:
-                    self.peak_balance = self.balance
-                drawdown = (self.peak_balance - self.balance) / self.peak_balance
-                self.max_drawdown = max(self.max_drawdown, drawdown)
-                
-                # Record trade
-                self.trades.append({
-                    'type': 'short',
-                    'entry': self.entry_price,
-                    'exit': self.current_price,
-                    'entry_time': self.data[self.entry_index]['timestamp'],
-                    'exit_time': self.data[self.current_step]['timestamp'],
-                    'pnl_percent': pnl_percent,
-                    'pnl_dollar': pnl_dollar
-                })
-                
-                # Reward based on PnL
-                if pnl_dollar > 0:
-                    reward += 1.0 + pnl_dollar / 10  # Positive reward for profit
-                    self.win_count += 1
-                else:
-                    reward -= 1.0  # Negative reward for loss
+                    reward = -1.0  # Negative reward for loss
                     self.loss_count += 1
                 
                 logger.info(f"CLOSED short at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}")
@@ -1011,6 +1370,7 @@ class TradingEnvironment:
                 # Reset position
                 self.position = 'flat'
                 self.entry_price = 0
+                self.entry_index = 0
                 self.position_size = 0
                 self.stop_loss = 0
                 self.take_profit = 0
@@ -1038,12 +1398,25 @@ class TradingEnvironment:
         
         # Add reward/penalty based on market trend alignment
         market_direction = self.get_market_direction()
-        if (self.position == 'long' and market_direction == 'uptrend') or \
-           (self.position == 'short' and market_direction == 'downtrend'):
-            reward += 0.2  # Reward for trading with the trend
-        elif (self.position == 'long' and market_direction == 'downtrend') or \
-             (self.position == 'short' and market_direction == 'uptrend'):
-            reward -= 0.3  # Stronger penalty for trading against the trend
+        
+        if self.position == 'long' and market_direction == 'uptrend':
+            reward += 0.1  # Reward for being long in uptrend
+        elif self.position == 'short' and market_direction == 'downtrend':
+            reward += 0.1  # Reward for being short in downtrend
+        elif self.position == 'long' and market_direction == 'downtrend':
+            reward -= 0.1  # Penalty for being long in downtrend
+        elif self.position == 'short' and market_direction == 'uptrend':
+            reward -= 0.1  # Penalty for being short in uptrend
+            
+        # Add reward for trading with volume
+        if action in [1, 2] and self.position != 'flat':  # Opening a position
+            current_volume = self.data[self.current_step]['volume']
+            avg_volume = np.mean([candle['volume'] for candle in self.data[max(0, self.current_step-10):self.current_step]])
+            
+            if current_volume > avg_volume * 1.5:
+                # Trading with high volume
+                reward += 0.2
+                logger.info("Trading with high volume - additional reward")
         
         return reward
 
@@ -1242,43 +1615,80 @@ class TradingEnvironment:
         self.price_predictor.to(device)
         self.price_predictor_optimizer = optim.Adam(self.price_predictor.parameters(), lr=1e-3)
         self.predicted_prices = np.array([])
+        self.predicted_extrema = np.array([])
+        self.extrema_threshold = 0.7  # Threshold for extrema prediction confidence
     
     def train_price_predictor(self):
-        """Train the price prediction model on recent data"""
+        """Train the price prediction model on recent data with focus on extrema"""
         if len(self.features['price']) < 35:
-            return 0.0
+            return 0.0, 0.0
         
-        # Get price history
+        # Get price and volume history
         price_history = self.features['price']
+        volume_history = self.features['volume']
         
-        # Train the model
-        loss = self.price_predictor.train_on_new_data(
+        # Train the model with emphasis on extrema prediction
+        price_loss, extrema_loss = self.price_predictor.train_on_new_data(
             price_history, 
+            volume_history,
             self.price_predictor_optimizer,
-            epochs=5
+            epochs=5,
+            extrema_weight=1.5  # Give more weight to extrema prediction
         )
         
-        return loss
+        logger.info(f"Price predictor training - Price loss: {price_loss:.6f}, Extrema loss: {extrema_loss:.6f}")
+        
+        return price_loss, extrema_loss
     
     def update_price_predictions(self):
-        """Update price predictions"""
+        """Update price and extrema predictions"""
         if len(self.features['price']) < 30:
             self.predicted_prices = np.array([])
+            self.predicted_extrema = np.array([])
             return
         
-        # Get price history
+        # Get price and volume history
         price_history = self.features['price']
+        volume_history = self.features['volume']
         
         # Get predictions
-        self.predicted_prices = self.price_predictor.predict_next_candles(price_history, num_candles=5)
-
+        self.predicted_prices, self.predicted_extrema = self.price_predictor.predict_next_candles(
+            price_history, 
+            volume_history,
+            num_candles=5
+        )
+        
+        # Log predictions
+        logger.info(f"Predicted prices for next 5 candles: {self.predicted_prices}")
+        
+        # Identify predicted extrema points
+        predicted_lows = []
+        predicted_highs = []
+        
+        for i, (p_low, p_high) in enumerate(self.predicted_extrema):
+            if p_low > self.extrema_threshold:
+                predicted_lows.append(i)
+                logger.info(f"Predicted low at candle +{i+1} with confidence {p_low:.2f}")
+            if p_high > self.extrema_threshold:
+                predicted_highs.append(i)
+                logger.info(f"Predicted high at candle +{i+1} with confidence {p_high:.2f}")
+        
+        # Store predicted extrema indices
+        self.predicted_lows = predicted_lows
+        self.predicted_highs = predicted_highs
+    
     def identify_optimal_trades(self):
-        """Identify optimal entry and exit points based on local extrema"""
+        """Identify optimal entry and exit points based on local extrema and volume"""
         if len(self.features['price']) < 20:
             return
         
-        # Find local bottoms and tops
-        bottoms, tops = find_local_extrema(self.features['price'], window=5)
+        # Find local bottoms and tops with volume confirmation
+        bottoms, tops = find_local_extrema(
+            self.features['price'], 
+            window=5, 
+            volumes=self.features['volume'],
+            volume_threshold=0.7
+        )
         
         # Store optimal trade points
         self.optimal_bottoms = bottoms  # Buy points
@@ -1294,6 +1704,21 @@ class TradingEnvironment:
                 self.optimal_signals[i] = -1  # Sell signal
         
         logger.info(f"Identified {len(bottoms)} optimal buy points and {len(tops)} optimal sell points")
+        
+        # Integrate predicted extrema into decision making
+        if hasattr(self, 'predicted_extrema') and len(self.predicted_extrema) > 0:
+            # Check if we predict any extrema in the near future
+            has_predicted_low = any(p_low > self.extrema_threshold for p_low, _ in self.predicted_extrema)
+            has_predicted_high = any(p_high > self.extrema_threshold for _, p_high in self.predicted_extrema)
+            
+            if has_predicted_low:
+                logger.info("Predicting a significant low point in the next few candles")
+            if has_predicted_high:
+                logger.info("Predicting a significant high point in the next few candles")
+                
+            # Store these predictions for use in action selection
+            self.has_predicted_low = has_predicted_low
+            self.has_predicted_high = has_predicted_high
 
     def calculate_position_size(self):
         """Calculate position size based on current balance, volatility and risk parameters"""
@@ -1392,8 +1817,11 @@ class Agent:
         self.epsilon_end = EPSILON_END
         self.epsilon_decay = EPSILON_DECAY
         
-        # Initialize mixed precision scaler
-        self.scaler = amp.GradScaler()
+        # Initialize mixed precision scaler with the new format
+        if self.device.type == "cuda":
+            self.scaler = amp.GradScaler('cuda')
+        else:
+            self.scaler = amp.GradScaler('cpu')
         
         # Initialize TensorBoard writer
         self.writer = SummaryWriter(f'runs/trading_agent_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}')
@@ -1748,6 +2176,10 @@ async def train_agent(agent, env, num_episodes=1000, max_steps_per_episode=1000,
         env.price_predictor = price_predictor
         env.price_predictor_optimizer = price_predictor_optimizer
         
+        # Initialize price predictor
+        env.initialize_price_predictor(device=agent.device)
+        logger.info("Price predictor initialized")
+        
         for episode in range(num_episodes):
             try:
                 # Update curriculum stage if needed
@@ -1768,7 +2200,7 @@ async def train_agent(agent, env, num_episodes=1000, max_steps_per_episode=1000,
                 env.risk_factor = risk_factor
                 
                 # Update training data if exchange is available
-                if exchange and args.refresh_data:
+                if exchange and args and hasattr(args, 'refresh_data') and args.refresh_data:
                     # Fetch new data at the start of each episode
                     logger.info(f"Refreshing data for episode {episode}")
                     await env.fetch_new_data(exchange, "ETH/USDT", "1m", 100)
@@ -1793,7 +2225,7 @@ async def train_agent(agent, env, num_episodes=1000, max_steps_per_episode=1000,
                 }
                 
                 # Train price predictor
-                prediction_loss = env.train_price_predictor()
+                prediction_loss, extrema_loss = env.train_price_predictor()
                 
                 # Update price predictions
                 env.update_price_predictions()
@@ -2276,6 +2708,26 @@ async def live_trading(agent, env, exchange, demo=True):
     logger.info(f"Starting live trading (demo mode: {demo})")
     
     try:
+        # Initialize trading client for live trading if not in demo mode
+        trading_client = None
+        if not demo:
+            trading_client = MexcTradingClient(symbol="ETH/USDT")
+            if not trading_client.client:
+                logger.error("Failed to initialize MEXC trading client. Check API keys.")
+                return
+            
+            # Update environment with trading client
+            env.trading_client = trading_client
+            
+            # Fetch initial account balance
+            balance = await trading_client.fetch_account_balance()
+            if balance > 0:
+                logger.info(f"Initial account balance: ${balance:.2f}")
+                env.balance = balance
+                env.initial_balance = balance
+            else:
+                logger.warning("Could not fetch account balance, using default")
+        
         # Subscribe to websocket for real-time data
         symbol = "ETH/USDT"
         timeframe = "1m"
@@ -2286,6 +2738,20 @@ async def live_trading(agent, env, exchange, demo=True):
             logger.error("Failed to initialize with historical data")
             return
         
+        # Initialize price predictor
+        env.initialize_price_predictor(device=agent.device)
+        logger.info("Price predictor initialized")
+        
+        # Initialize TensorBoard writer if not already present
+        if not hasattr(agent, 'writer'):
+            from torch.utils.tensorboard import SummaryWriter
+            log_dir = os.path.join("logs", "live_trading", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
+            agent.writer = SummaryWriter(log_dir)
+            logger.info(f"TensorBoard logs will be saved to {log_dir}")
+        
+        # Initialize step counter for TensorBoard
+        tb_step = 0
+        
         # Main trading loop
         while True:
             # Wait for the next candle (1 minute)
@@ -2301,6 +2767,10 @@ async def live_trading(agent, env, exchange, demo=True):
             # Update environment with new data
             env.add_data(latest_candle)
             
+            # Train price predictor and update predictions
+            env.train_price_predictor()
+            env.update_price_predictions()
+            
             # Get current state
             state = env.get_state()
             
@@ -2328,11 +2798,86 @@ async def live_trading(agent, env, exchange, demo=True):
                 if trade_analysis:
                     logger.info(f"Recent Performance: Win Rate={trade_analysis.get('uptrend_win_rate', 0):.1f}% in uptrends, "
                               f"{trade_analysis.get('downtrend_win_rate', 0):.1f}% in downtrends")
+            
+            # If not in demo mode, update actual balance from exchange
+            if not demo and trading_client:
+                try:
+                    actual_balance = await trading_client.fetch_account_balance()
+                    if actual_balance > 0:
+                        # Update environment balance with actual balance
+                        env.balance = actual_balance
+                        logger.info(f"Updated actual account balance: ${actual_balance:.2f}")
+                except Exception as e:
+                    logger.error(f"Error updating account balance: {e}")
+            
+            # Log to TensorBoard every 5 steps
+            if tb_step % 5 == 0:
+                # Create a DataFrame from the environment's data
+                df_ohlcv = pd.DataFrame([{
+                    'timestamp': candle['timestamp'],
+                    'open': candle['open'],
+                    'high': candle['high'],
+                    'low': candle['low'],
+                    'close': candle['close'],
+                    'volume': candle['volume']
+                } for candle in env.data[-100:]])  # Use last 100 candles
+                
+                # Convert timestamp to datetime
+                df_ohlcv['timestamp'] = pd.to_datetime(df_ohlcv['timestamp'], unit='ms')
+                df_ohlcv.set_index('timestamp', inplace=True)
+                
+                # Extract buy/sell signals from trades
+                buy_signals = []
+                sell_signals = []
+                
+                if hasattr(env, 'trades') and env.trades:
+                    for trade in env.trades:
+                        if 'entry_time' in trade and 'entry' in trade:
+                            if trade['type'] == 'long':
+                                # Buy signal
+                                entry_time = pd.to_datetime(trade['entry_time'], unit='ms')
+                                buy_signals.append((entry_time, trade['entry']))
+                                
+                                # Sell signal if closed
+                                if 'exit_time' in trade and 'exit' in trade and trade['exit'] > 0:
+                                    exit_time = pd.to_datetime(trade['exit_time'], unit='ms')
+                                    sell_signals.append((exit_time, trade['exit']))
+                            
+                            elif trade['type'] == 'short':
+                                # Sell short signal
+                                entry_time = pd.to_datetime(trade['entry_time'], unit='ms')
+                                sell_signals.append((entry_time, trade['entry']))
+                                
+                                # Buy to cover signal if closed
+                                if 'exit_time' in trade and 'exit' in trade and trade['exit'] > 0:
+                                    exit_time = pd.to_datetime(trade['exit_time'], unit='ms')
+                                    buy_signals.append((exit_time, trade['exit']))
+                
+                # Log to TensorBoard with a fixed tag to overwrite previous charts
+                log_ohlcv_to_tensorboard(
+                    agent.writer, 
+                    df_ohlcv, 
+                    buy_signals, 
+                    sell_signals, 
+                    tb_step, 
+                    tag_prefix="live_trading"
+                )
+                
+                # Log additional metrics
+                agent.writer.add_scalar("live/balance", env.balance, tb_step)
+                agent.writer.add_scalar("live/total_pnl", env.total_pnl, tb_step)
+                if env.trades:
+                    agent.writer.add_scalar("live/win_rate", win_rate, tb_step)
+                    agent.writer.add_scalar("live/trade_count", len(env.trades), tb_step)
+            
+            # Increment TensorBoard step counter
+            tb_step += 1
     
     except KeyboardInterrupt:
         logger.info("Live trading stopped by user")
     except Exception as e:
         logger.error(f"Error in live trading: {e}")
+        logger.error(traceback.format_exc())
         raise
 
 async def get_latest_candle(exchange, symbol):
@@ -2393,35 +2938,43 @@ async def fetch_ohlcv_data(exchange, symbol, timeframe, limit):
 
 async def main():
     """Main function to run the trading bot"""
-    parser = argparse.ArgumentParser(description='Crypto Trading Bot')
-    parser.add_argument('--mode', type=str, default='train', 
-                        choices=['train', 'evaluate', 'live', 'continuous'],
-                        help='Mode to run the bot in (train, evaluate, live, or continuous)')
-    parser.add_argument('--episodes', type=int, default=1000, help='Number of episodes to train')
+    # Parse command line arguments
+    import argparse
+    import traceback
+    
+    parser = argparse.ArgumentParser(description='Run the trading bot')
+    parser.add_argument('--mode', type=str, default='train', choices=['train', 'evaluate', 'live'], help='Mode to run the bot in')
+    parser.add_argument('--episodes', type=int, default=100, help='Number of episodes to train for')
     parser.add_argument('--demo', action='store_true', help='Run in demo mode (no real trades)')
-    parser.add_argument('--refresh-data', action='store_true', help='Refresh data during training')
-    parser.add_argument('--device', type=str, default='gpu', choices=['gpu', 'cpu'],
-                        help='Device to use for training (gpu or cpu)')
+    parser.add_argument('--device', type=str, default='auto', choices=['cpu', 'gpu', 'auto'], help='Device to use for training')
     args = parser.parse_args()
     
-    # Get device based on argument and availability
+    # Set device
     device = get_device(args.device)
+    logger.info(f"Using device: {device}")
     
-    exchange = None
     try:
         # Initialize exchange
         exchange = await initialize_exchange()
         
+        # Determine if we're in demo mode
+        demo_mode = args.demo
+        if args.mode != 'live':
+            # Always use demo mode for training and evaluation
+            demo_mode = True
+        
+        logger.info(f"Running in {'demo' if demo_mode else 'live trading'} mode")
+        
         # Create environment with the correct parameters
         env = TradingEnvironment(
             initial_balance=INITIAL_BALANCE, 
             window_size=30, 
-            demo=args.demo or args.mode != 'live'
+            demo=demo_mode
         )
         
         # Fetch initial data
         logger.info("Fetching initial data for ETH/USDT")
-        await env.fetch_initial_data(exchange, "ETH/USDT", "1m", 500)
+        await env.fetch_initial_data(exchange, "ETH/USDT", "1m", 1500)
         
         # Initialize agent
         agent = Agent(STATE_SIZE, 4, hidden_size=384, lstm_layers=2, attention_heads=4, device=device)
@@ -2431,39 +2984,16 @@ async def main():
             logger.info(f"Starting training for {args.episodes} episodes...")
             stats = await train_agent(agent, env, num_episodes=args.episodes, exchange=exchange, args=args)
             
-        elif args.mode == 'continuous':
-            # Run in continuous mode - train indefinitely
-            logger.info("Starting continuous training mode. Press Ctrl+C to stop.")
-            episode_counter = 0
-            try:
-                while True:  # Run indefinitely until manually stopped
-                    # Train for a batch of episodes
-                    batch_size = 50  # Train in batches of 50 episodes
-                    logger.info(f"Starting training batch {episode_counter // batch_size + 1}")
-                    
-                    # Refresh data at the start of each batch
-                    if exchange and args.refresh_data:
-                        logger.info("Refreshing data for new training batch")
-                        await env.fetch_new_data(exchange, "ETH/USDT", "1m", 500)
-                        logger.info(f"Updated environment with fresh candles")
-                    
-                    # Train for a batch of episodes
-                    stats = await train_agent(agent, env, num_episodes=args.episodes, exchange=exchange, args=args)
-                    
-                    # Save model after each batch
-                    agent.save(f"models/trading_agent_continuous_{episode_counter}.pt")
-                    
-                    # Increment counter
-                    episode_counter += batch_size
-                    
-                    # Sleep briefly to prevent excessive API calls
-                    await asyncio.sleep(5)
-                    
-            except KeyboardInterrupt:
-                logger.info("Continuous training stopped by user")
-                # Save final model
-                agent.save("models/trading_agent_continuous_final.pt")
-                logger.info("Final model saved")
+            # Plot training results
+            plot_training_results(stats)
+            
+            # Save the trained agent
+            agent.save("models/trading_agent_latest.pt")
+            
+            # Evaluate the agent
+            logger.info("Evaluating agent...")
+            results = evaluate_agent(agent, env, num_episodes=10)
+            logger.info(f"Evaluation results: {results}")
             
         elif args.mode == 'evaluate':
             # Load the best model
@@ -2480,7 +3010,7 @@ async def main():
             
             # Run live trading
             logger.info("Starting live trading...")
-            await live_trading(agent, env, exchange, demo=args.demo)
+            await live_trading(agent, env, exchange, demo=demo_mode)
         
     except Exception as e:
         logger.error(f"Error: {e}")
@@ -2690,6 +3220,7 @@ def log_ohlcv_to_tensorboard(writer, df_ohlcv, buy_signals, sell_signals, step,
         from matplotlib.figure import Figure
         from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
         import numpy as np
+        from mplfinance.original_flavor import candlestick_ohlc
         
         # Check if DataFrame is empty
         if df_ohlcv.empty:
@@ -2700,59 +3231,107 @@ def log_ohlcv_to_tensorboard(writer, df_ohlcv, buy_signals, sell_signals, step,
         fig = Figure(figsize=(12, 8))
         canvas = FigureCanvas(fig)
         
-        # Create subplots for price and volume
-        ax1 = fig.add_subplot(2, 1, 1)  # Price chart
-        ax2 = fig.add_subplot(2, 1, 2, sharex=ax1)  # Volume chart
+        # Create subplots for price, predictions, and volume
+        ax1 = fig.add_subplot(3, 1, 1)  # Price chart
+        ax2 = fig.add_subplot(3, 1, 2, sharex=ax1)  # Predictions chart
+        ax3 = fig.add_subplot(3, 1, 3, sharex=ax1)  # Volume chart
         
-        # Plot OHLC
-        dates = mdates.date2num(df_ohlcv.index.to_pydatetime())
-        ohlc = np.column_stack((dates, df_ohlcv['open'], df_ohlcv['high'], df_ohlcv['low'], df_ohlcv['close']))
+        # Convert DataFrame to OHLC format for candlestick
+        df_ohlc = df_ohlcv.copy()
+        df_ohlc.reset_index(inplace=True)
+        df_ohlc['date_num'] = mdates.date2num(df_ohlc['timestamp'].dt.to_pydatetime())
+        ohlc_data = df_ohlc[['date_num', 'open', 'high', 'low', 'close']].values
         
-        # Plot candlestick chart
-        from matplotlib.lines import Line2D
-        from matplotlib.patches import Rectangle
+        # Plot candlestick chart using mplfinance
+        candlestick_ohlc(ax1, ohlc_data, width=0.6/(24*3), colorup='green', colordown='red')
         
-        width = 0.6 / (len(df_ohlcv) + 1)  # Adjust width based on number of candles
+        # Add a dummy line for the legend
+        ax1.plot([], [], color='green', label='Bullish Candle')
+        ax1.plot([], [], color='red', label='Bearish Candle')
         
-        for i, (date, open_price, high, low, close) in enumerate(ohlc):
-            # Determine candle color
-            if close >= open_price:
-                color = 'green'
-                body_bottom = open_price
-                body_height = close - open_price
-            else:
-                color = 'red'
-                body_bottom = close
-                body_height = open_price - close
-            
-            # Plot candle body
-            rect = Rectangle(
-                xy=(date - width/2, body_bottom),
-                width=width,
-                height=body_height,
-                facecolor=color,
-                edgecolor='black',
-                alpha=0.8
-            )
-            ax1.add_patch(rect)
-            
-            # Plot wick
-            ax1.plot([date, date], [low, high], color='black', linewidth=1)
-        
-        # Plot buy signals
+        # Plot buy signals with proper alignment
         if buy_signals:
-            buy_dates = mdates.date2num([x[0] for x in buy_signals])
-            buy_prices = [x[1] for x in buy_signals]
-            ax1.scatter(buy_dates, buy_prices, marker='^', color='green', s=100, label='Buy')
+            # Convert datetime to numeric format for plotting
+            buy_dates = [signal[0] for signal in buy_signals]
+            buy_dates_num = mdates.date2num(buy_dates)
+            buy_prices = [signal[1] for signal in buy_signals]
+            
+            # Filter signals to only include those within the chart timeframe
+            valid_indices = []
+            for i, date_num in enumerate(buy_dates_num):
+                if min(df_ohlc['date_num']) <= date_num <= max(df_ohlc['date_num']):
+                    valid_indices.append(i)
+            
+            if valid_indices:
+                filtered_dates = [buy_dates_num[i] for i in valid_indices]
+                filtered_prices = [buy_prices[i] for i in valid_indices]
+                ax1.scatter(filtered_dates, filtered_prices, marker='^', color='green', s=100, label='Buy')
         
-        # Plot sell signals
+        # Plot sell signals with proper alignment
         if sell_signals:
-            sell_dates = mdates.date2num([x[0] for x in sell_signals])
-            sell_prices = [x[1] for x in sell_signals]
-            ax1.scatter(sell_dates, sell_prices, marker='v', color='red', s=100, label='Sell')
+            # Convert datetime to numeric format for plotting
+            sell_dates = [signal[0] for signal in sell_signals]
+            sell_dates_num = mdates.date2num(sell_dates)
+            sell_prices = [signal[1] for signal in sell_signals]
+            
+            # Filter signals to only include those within the chart timeframe
+            valid_indices = []
+            for i, date_num in enumerate(sell_dates_num):
+                if min(df_ohlc['date_num']) <= date_num <= max(df_ohlc['date_num']):
+                    valid_indices.append(i)
+            
+            if valid_indices:
+                filtered_dates = [sell_dates_num[i] for i in valid_indices]
+                filtered_prices = [sell_prices[i] for i in valid_indices]
+                ax1.scatter(filtered_dates, filtered_prices, marker='v', color='red', s=100, label='Sell')
+        
+        # Plot predicted prices if available in the environment
+        from inspect import currentframe, getframeinfo
+        frame = currentframe()
+        has_predictions = False
+        while frame:
+            if 'env' in frame.f_locals:
+                env = frame.f_locals['env']
+                if hasattr(env, 'predicted_prices') and len(env.predicted_prices) > 0:
+                    # Get the last timestamp from the data
+                    last_timestamp = df_ohlc['timestamp'].iloc[-1]
+                    
+                    # Create future timestamps for predictions (assuming 1-minute intervals)
+                    future_timestamps = [last_timestamp + pd.Timedelta(minutes=i+1) for i in range(len(env.predicted_prices))]
+                    future_dates_num = mdates.date2num(future_timestamps)
+                    
+                    # Plot actual close prices
+                    ax2.plot(df_ohlc['date_num'], df_ohlc['close'], color='blue', label='Actual Price')
+                    
+                    # Plot predicted prices
+                    ax2.plot(future_dates_num, env.predicted_prices, color='orange', linestyle='--', marker='o', label='Predicted Price')
+                    
+                    # Add shading to indicate prediction area
+                    ax2.axvspan(df_ohlc['date_num'].iloc[-1], future_dates_num[-1], alpha=0.2, color='yellow')
+                    
+                    # Set title and legend
+                    ax2.set_title('Price Predictions')
+                    ax2.set_ylabel('Price')
+                    ax2.legend()
+                    ax2.grid(True)
+                    has_predictions = True
+                break
+            frame = frame.f_back
+        
+        # If no predictions were found, use the subplot for something else
+        if not has_predictions:
+            # Plot moving averages instead
+            if len(df_ohlc) >= 20:
+                df_ohlc['MA20'] = df_ohlc['close'].rolling(window=20).mean()
+                ax2.plot(df_ohlc['date_num'], df_ohlc['close'], color='blue', label='Close Price')
+                ax2.plot(df_ohlc['date_num'], df_ohlc['MA20'], color='orange', label='20-period MA')
+                ax2.set_title('Price and Moving Average')
+                ax2.set_ylabel('Price')
+                ax2.legend()
+                ax2.grid(True)
         
         # Plot volume
-        ax2.bar(dates, df_ohlcv['volume'], width=width, color='blue', alpha=0.5)
+        ax3.bar(df_ohlc['date_num'], df_ohlc['volume'], width=0.6/(24*3), color='blue', alpha=0.5)
         
         # Format axes
         ax1.set_title(f'OHLC with Buy/Sell Signals - {tag_prefix}')
@@ -2760,13 +3339,25 @@ def log_ohlcv_to_tensorboard(writer, df_ohlcv, buy_signals, sell_signals, step,
         ax1.legend()
         ax1.grid(True)
         
-        ax2.set_xlabel('Date')
-        ax2.set_ylabel('Volume')
-        ax2.grid(True)
+        ax3.set_xlabel('Date')
+        ax3.set_ylabel('Volume')
+        ax3.grid(True)
         
-        # Format date
+        # Format date for all axes
         date_format = mdates.DateFormatter('%Y-%m-%d %H:%M')
+        ax1.xaxis.set_major_formatter(date_format)
         ax2.xaxis.set_major_formatter(date_format)
+        ax3.xaxis.set_major_formatter(date_format)
+        
+        # Set x-axis limits to ensure all subplots show the same time range
+        # Include future predictions if available
+        if has_predictions and 'future_dates_num' in locals():
+            x_min = min(df_ohlc['date_num'])
+            x_max = max(future_dates_num)
+            ax1.set_xlim(x_min, x_max)
+            ax2.set_xlim(x_min, x_max)
+            ax3.set_xlim(x_min, x_max)
+        
         fig.autofmt_xdate()
         
         # Adjust layout