diff --git a/crypto/gogo2/_notes.md b/crypto/gogo2/_notes.md index 0cb4fd6..5ef19f9 100644 --- a/crypto/gogo2/_notes.md +++ b/crypto/gogo2/_notes.md @@ -16,4 +16,9 @@ C:\Users\popov\miniforge3\Lib\site-packages\torch\amp\grad_scaler.py:132: UserWa 2025-03-10 12:11:30,927 - INFO - Starting training on device: cpu 2025-03-10 12:11:30,928 - ERROR - Training failed: 'TradingEnvironment' object has no attribute 'initialize_price_predictor' 2025-03-10 12:11:30,928 - INFO - Exchange connection closed -Backend tkagg is interactive backend. Turning interactive mode on. \ No newline at end of file +Backend tkagg is interactive backend. Turning interactive mode on. + + + + +2025-03-10 12:35:14,489 - INFO - Episode 34: Reward=232.41, Balance=$98.47, Win Rate=70.6%, Trades=17, Episode PnL=$-1.33, Total PnL=$-559.78, Max Drawdown=7.0%, Pred Accuracy=99.9% \ No newline at end of file diff --git a/crypto/gogo2/cuda.py b/crypto/gogo2/cuda.py new file mode 100644 index 0000000..727e960 --- /dev/null +++ b/crypto/gogo2/cuda.py @@ -0,0 +1,4 @@ +import torch +print(f"PyTorch version: {torch.__version__}") +print(f"CUDA available: {torch.cuda.is_available()}") +print(f"CUDA version: {torch.version.cuda if torch.cuda.is_available() else 'Not available'}") \ No newline at end of file diff --git a/crypto/gogo2/main.py b/crypto/gogo2/main.py index d149888..a580720 100644 --- a/crypto/gogo2/main.py +++ b/crypto/gogo2/main.py @@ -22,6 +22,7 @@ from sklearn.preprocessing import MinMaxScaler import copy import argparse import traceback +import math # Configure logging logging.basicConfig( @@ -93,6 +94,9 @@ class DQN(nn.Module): def __init__(self, state_size, action_size, hidden_size=384, lstm_layers=2, attention_heads=4): super(DQN, self).__init__() + # Ensure model parameters are float32 + self.float() + self.state_size = state_size self.hidden_size = hidden_size self.lstm_layers = lstm_layers @@ -224,7 +228,12 @@ class PricePredictionModel(nn.Module): predictions = self.postprocess(scaled_predictions) return predictions - def train_on_new_data(self, price_history, optimizer, epochs=10): + def train_on_new_data(self, price_history, optimizer, epochs=5): + """Train the model on new price data""" + # Convert to numpy array if it's not already + if isinstance(price_history, list): + price_history = np.array(price_history, dtype=np.float32) # Force float32 + if len(price_history) < 35: # Need enough history for training return 0.0 @@ -320,6 +329,9 @@ class TradingEnvironment: self.optimal_tops = [] self.optimal_signals = np.array([]) + # Add risk factor for curriculum learning + self.risk_factor = 1.0 # Default risk factor + def reset(self): """Reset the environment to initial state""" self.balance = self.initial_balance @@ -635,14 +647,14 @@ class TradingEnvironment: """Create state representation for the agent""" if len(self.data) < 30 or len(self.features['price']) == 0: # Return zeros if not enough data - return np.zeros(STATE_SIZE) + return np.zeros(STATE_SIZE, dtype=np.float32) # Ensure float32 # Create a normalized state vector with recent price action and indicators state_components = [] # Price features (normalize recent prices by the latest price) latest_price = self.features['price'][-1] - price_features = np.array(self.features['price'][-10:]) / latest_price - 1.0 + price_features = np.array(self.features['price'][-10:], dtype=np.float32) / latest_price - 1.0 state_components.append(price_features) # Volume features (normalize by max volume) @@ -732,43 +744,46 @@ class TradingEnvironment: state = state[:STATE_SIZE] elif len(state) < STATE_SIZE: # Pad with zeros if too short - padding = np.zeros(STATE_SIZE - len(state)) + padding = np.zeros(STATE_SIZE - len(state), dtype=np.float32) # Ensure float32 state = np.concatenate([state, padding]) - return state + # Ensure float32 type + return state.astype(np.float32) def calculate_reward(self, action): """Calculate reward for the given action with improved penalties for losing trades""" reward = 0 + # Store previous balance for direct PnL calculation + prev_balance = self.balance + # Base reward for actions if action == 0: # HOLD - reward = -0.01 # Small penalty for doing nothing + # Small penalty for doing nothing to encourage action + # But make it context-dependent - holding during high volatility should be penalized more + volatility = self.get_recent_volatility() + reward = -0.01 * (1 + volatility) elif action == 1: # BUY/LONG if self.position == 'flat': # Opening a long position self.position = 'long' self.entry_price = self.current_price + self.entry_index = self.current_step self.position_size = self.calculate_position_size() - self.stop_loss = self.entry_price * (1 - STOP_LOSS_PERCENT/100) - self.take_profit = self.entry_price * (1 + TAKE_PROFIT_PERCENT/100) - # Check if this is an optimal buy point (bottom) - current_idx = len(self.features['price']) - 1 - if hasattr(self, 'optimal_bottoms') and current_idx in self.optimal_bottoms: - reward += 2.0 # Bonus for buying at a bottom - else: - # Check if we're buying in a downtrend (bad) - if self.is_downtrend(): - reward -= 0.5 # Penalty for buying in downtrend - else: - reward += 0.1 # Small reward for opening a position + # Calculate stop loss and take profit levels + self.stop_loss = self.entry_price * (1 - STOP_LOSS_PERCENT / 100) + self.take_profit = self.entry_price * (1 + TAKE_PROFIT_PERCENT / 100) + + # Check if this is a good entry point based on technical indicators + entry_quality = self.evaluate_entry_quality('long') + reward += entry_quality * 0.5 # Scale the reward based on entry quality logger.info(f"OPENED LONG at {self.entry_price} | Stop loss: {self.stop_loss} | Take profit: {self.take_profit}") elif self.position == 'short': - # Close short and open long + # Closing a short position pnl_percent = (self.entry_price - self.current_price) / self.entry_price * 100 pnl_dollar = pnl_percent / 100 * self.position_size @@ -778,46 +793,39 @@ class TradingEnvironment: # Update balance self.balance += pnl_dollar self.total_pnl += pnl_dollar + self.episode_pnl += pnl_dollar # Record trade - trade_duration = len(self.features['price']) - self.entry_index self.trades.append({ 'type': 'short', 'entry': self.entry_price, 'exit': self.current_price, 'pnl_percent': pnl_percent, 'pnl_dollar': pnl_dollar, - 'duration': trade_duration, - 'market_direction': self.get_market_direction() + 'duration': self.current_step - self.entry_index, + 'market_direction': self.get_market_direction(), + 'reason': 'manual_close' }) - # Reward based on PnL with stronger penalties for losses + # Update win/loss count if pnl_dollar > 0: - reward += 1.0 + pnl_dollar / 10 # Positive reward for profit self.win_count += 1 + reward += 1.0 + (pnl_percent / 2) # Bonus for winning trade else: - # Stronger penalty for losses, scaled by the size of the loss - loss_penalty = 1.0 + abs(pnl_dollar) / 5 - reward -= loss_penalty self.loss_count += 1 - - # Extra penalty for closing a losing trade too quickly - if trade_duration < 5: - reward -= 0.5 # Penalty for very short losing trades + reward -= 1.0 + (abs(pnl_percent) / 2) # Penalty for losing trade logger.info(f"CLOSED short at {self.current_price} | PnL: {pnl_percent:.2f}% | ${pnl_dollar:.2f}") - # Now open long + # Reset position and open new long self.position = 'long' self.entry_price = self.current_price - self.entry_index = len(self.features['price']) - 1 + self.entry_index = self.current_step self.position_size = self.calculate_position_size() - self.stop_loss = self.entry_price * (1 - STOP_LOSS_PERCENT/100) - self.take_profit = self.entry_price * (1 + TAKE_PROFIT_PERCENT/100) - # Check if this is an optimal buy point - if hasattr(self, 'optimal_bottoms') and self.entry_index in self.optimal_bottoms: - reward += 2.0 # Bonus for buying at a bottom + # Calculate stop loss and take profit levels + self.stop_loss = self.entry_price * (1 - STOP_LOSS_PERCENT / 100) + self.take_profit = self.entry_price * (1 + TAKE_PROFIT_PERCENT / 100) logger.info(f"OPENED LONG at {self.entry_price} | Stop loss: {self.stop_loss} | Take profit: {self.take_profit}") @@ -975,7 +983,14 @@ class TradingEnvironment: self.stop_loss = 0 self.take_profit = 0 - # Add prediction accuracy component to reward + # Add reward based on direct PnL change + balance_change = self.balance - prev_balance + if balance_change > 0: + reward += balance_change * 0.5 # Positive reward for making money + else: + reward += balance_change * 1.0 # Stronger negative reward for losing money + + # Add reward for predicted price movement alignment if hasattr(self, 'predicted_prices') and len(self.predicted_prices) > 0: # Compare the first prediction with actual price if len(self.data) > 1: @@ -985,12 +1000,106 @@ class TradingEnvironment: # Reward accurate predictions, penalize bad ones if prediction_error < 0.005: # Less than 0.5% error - reward += 0.5 + reward += 0.2 elif prediction_error > 0.02: # More than 2% error - reward -= 0.5 + reward -= 0.2 + + # Add reward/penalty based on market trend alignment + market_direction = self.get_market_direction() + if (self.position == 'long' and market_direction == 'uptrend') or \ + (self.position == 'short' and market_direction == 'downtrend'): + reward += 0.2 # Reward for trading with the trend + elif (self.position == 'long' and market_direction == 'downtrend') or \ + (self.position == 'short' and market_direction == 'uptrend'): + reward -= 0.3 # Stronger penalty for trading against the trend return reward + def evaluate_entry_quality(self, position_type): + """Evaluate the quality of an entry point based on technical indicators""" + score = 0 + + # Get current indicators + rsi = self.features['rsi'][-1] if len(self.features['rsi']) > 0 else 50 + macd = self.features['macd'][-1] if len(self.features['macd']) > 0 else 0 + macd_signal = self.features['macd_signal'][-1] if len(self.features['macd_signal']) > 0 else 0 + stoch_k = self.features['stoch_k'][-1] if len(self.features['stoch_k']) > 0 else 50 + stoch_d = self.features['stoch_d'][-1] if len(self.features['stoch_d']) > 0 else 50 + + if position_type == 'long': + # RSI oversold condition (good for long) + if rsi < 30: + score += 0.5 + elif rsi < 40: + score += 0.2 + elif rsi > 70: + score -= 0.5 # Overbought, bad for long + + # MACD crossover (bullish) + if macd > macd_signal and macd > 0: + score += 0.3 + elif macd < macd_signal and macd < 0: + score -= 0.3 + + # Stochastic oversold + if stoch_k < 20 and stoch_d < 20: + score += 0.3 + elif stoch_k > 80 and stoch_d > 80: + score -= 0.3 + + elif position_type == 'short': + # RSI overbought condition (good for short) + if rsi > 70: + score += 0.5 + elif rsi > 60: + score += 0.2 + elif rsi < 30: + score -= 0.5 # Oversold, bad for short + + # MACD crossover (bearish) + if macd < macd_signal and macd < 0: + score += 0.3 + elif macd > macd_signal and macd > 0: + score -= 0.3 + + # Stochastic overbought + if stoch_k > 80 and stoch_d > 80: + score += 0.3 + elif stoch_k < 20 and stoch_d < 20: + score -= 0.3 + + # Check price relative to moving averages + if len(self.features['ema_9']) > 0 and len(self.features['ema_21']) > 0: + ema_9 = self.features['ema_9'][-1] + ema_21 = self.features['ema_21'][-1] + + if position_type == 'long': + if self.current_price > ema_9 > ema_21: # Strong uptrend + score += 0.4 + elif self.current_price < ema_9 < ema_21: # Strong downtrend + score -= 0.4 + elif position_type == 'short': + if self.current_price < ema_9 < ema_21: # Strong downtrend + score += 0.4 + elif self.current_price > ema_9 > ema_21: # Strong uptrend + score -= 0.4 + + return score + + def get_recent_volatility(self): + """Calculate recent price volatility""" + if len(self.features['price']) < 10: + return 0 + + # Use ATR if available + if len(self.features['atr']) > 0: + return self.features['atr'][-1] / self.current_price + + # Otherwise calculate simple volatility + recent_prices = self.features['price'][-10:] + returns = [recent_prices[i] / recent_prices[i-1] - 1 for i in range(1, len(recent_prices))] + return np.std(returns) * 100 # Volatility as percentage + def is_downtrend(self): """Check if the market is in a downtrend""" if len(self.features['price']) < 20: @@ -1016,13 +1125,49 @@ class TradingEnvironment: return short_ema > long_ema def get_market_direction(self): - """Get the current market direction""" - if self.is_uptrend(): - return "uptrend" - elif self.is_downtrend(): - return "downtrend" - else: - return "sideways" + """Determine the current market direction (uptrend, downtrend, or sideways)""" + if len(self.features['price']) < 20: + return 'unknown' + + # Use EMAs to determine trend + if len(self.features['ema_9']) > 0 and len(self.features['ema_21']) > 0: + ema_9 = self.features['ema_9'][-5:] + ema_21 = self.features['ema_21'][-5:] + price = self.features['price'][-5:] + + # Check if price is above/below EMAs + price_above_ema9 = sum(p > e for p, e in zip(price, ema_9)) + price_above_ema21 = sum(p > e for p, e in zip(price, ema_21)) + ema9_above_ema21 = sum(e9 > e21 for e9, e21 in zip(ema_9, ema_21)) + + # Strong uptrend: price > EMA9 > EMA21 + if price_above_ema9 >= 4 and price_above_ema21 >= 4 and ema9_above_ema21 >= 4: + return 'uptrend' + + # Strong downtrend: price < EMA9 < EMA21 + elif price_above_ema9 <= 1 and price_above_ema21 <= 1 and ema9_above_ema21 <= 1: + return 'downtrend' + + # Check price action + price_data = self.features['price'][-20:] + price_change = (price_data[-1] / price_data[0] - 1) * 100 + + if price_change > 1.0: + return 'uptrend' + elif price_change < -1.0: + return 'downtrend' + + # Check RSI for trend confirmation + if len(self.features['rsi']) > 0: + rsi = self.features['rsi'][-5:] + avg_rsi = sum(rsi) / len(rsi) + + if avg_rsi > 60: + return 'uptrend' + elif avg_rsi < 40: + return 'downtrend' + + return 'sideways' def analyze_trades(self): """Analyze completed trades to identify patterns""" @@ -1119,12 +1264,17 @@ class TradingEnvironment: logger.info(f"Identified {len(bottoms)} optimal buy points and {len(tops)} optimal sell points") def calculate_position_size(self): - """Calculate position size based on current balance and risk parameters""" - # Use a fixed percentage of balance for each trade - risk_percent = 5.0 # Risk 5% of balance per trade + """Calculate position size based on current balance, volatility and risk parameters""" + # Base risk percentage (adjust based on volatility) + volatility = self.get_recent_volatility() + + # Reduce risk during high volatility + base_risk = 5.0 # Base risk percentage + adjusted_risk = base_risk / (1 + volatility * 5) # Reduce risk as volatility increases + adjusted_risk = max(1.0, min(adjusted_risk, base_risk)) # Cap between 1% and base_risk # Calculate position size with leverage - position_size = self.balance * (risk_percent / 100) * MAX_LEVERAGE + position_size = self.balance * (adjusted_risk / 100) * MAX_LEVERAGE # Apply a safety factor to avoid liquidation safety_factor = 0.8 @@ -1138,6 +1288,14 @@ class TradingEnvironment: max_position = self.balance * MAX_LEVERAGE position_size = min(position_size, max_position) + # Adjust stop loss based on volatility + global STOP_LOSS_PERCENT, TAKE_PROFIT_PERCENT + STOP_LOSS_PERCENT = 0.5 * (1 + volatility) # Wider stop loss during high volatility + TAKE_PROFIT_PERCENT = 1.5 * (1 + volatility * 0.5) # Higher take profit during high volatility + + # Apply risk factor from curriculum learning + position_size *= self.risk_factor + return position_size def calculate_fees(self, position_size): @@ -1152,15 +1310,15 @@ class TradingEnvironment: # Ensure GPU usage if available def get_device(): - """Get the best available device (CUDA GPU or CPU)""" + """Get the device to use (GPU or CPU)""" if torch.cuda.is_available(): device = torch.device("cuda") + # Set default tensor type to float32 for CUDA + torch.set_default_tensor_type(torch.FloatTensor) logger.info(f"Using GPU: {torch.cuda.get_device_name(0)}") - # Set up for mixed precision training - torch.backends.cudnn.benchmark = True else: device = torch.device("cpu") - logger.info("GPU not available, using CPU") + logger.info("Using CPU") return device # Update Agent class to use GPU properly @@ -1180,6 +1338,8 @@ class Agent: # Initialize policy and target networks self.policy_net = DQN(state_size, action_size, hidden_size, lstm_layers, attention_heads).to(self.device) self.target_net = DQN(state_size, action_size, hidden_size, lstm_layers, attention_heads).to(self.device) + ensure_float32(self.policy_net) + ensure_float32(self.target_net) self.target_net.load_state_dict(self.policy_net.state_dict()) self.target_net.eval() @@ -1194,6 +1354,16 @@ class Agent: # Create models directory if it doesn't exist os.makedirs("models", exist_ok=True) + + # Use pinned memory for faster CPU-to-GPU transfers + if self.device.type == "cuda": + self.use_pinned_memory = True + else: + self.use_pinned_memory = False + + # Ensure models are using float32 + self.policy_net.float() + self.target_net.float() def expand_model(self, new_state_size, new_hidden_size=512, new_lstm_layers=3, new_attention_heads=8): """Expand the model to handle more features or increase capacity""" @@ -1245,20 +1415,34 @@ class Agent: return True def select_action(self, state, training=True): + """Select an action using epsilon-greedy policy""" sample = random.random() + eps_threshold = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ + math.exp(-1. * self.steps_done / self.epsilon_decay) if training: - # Epsilon decay - self.epsilon = EPSILON_END + (EPSILON_START - EPSILON_END) * \ - np.exp(-1. * self.steps_done / EPSILON_DECAY) - self.steps_done += 1 - - if sample > self.epsilon or not training: - with torch.no_grad(): - state_tensor = torch.FloatTensor(state).to(self.device) - action_values = self.policy_net(state_tensor) - return action_values.max(1)[1].item() + self.epsilon = eps_threshold else: + self.epsilon = 0.0 # No exploration during evaluation/live trading + + self.steps_done += 1 + + if sample > self.epsilon: + with torch.no_grad(): + # Convert state to tensor and ensure it's float32 (not double/float64) + state_tensor = torch.FloatTensor(state).to(self.device) + + # Ensure state has correct shape + if state_tensor.dim() == 1: + state_tensor = state_tensor.unsqueeze(0) + + # Get Q values + q_values = self.policy_net(state_tensor) + + # Return action with highest Q value + return q_values.max(1)[1].item() + else: + # Random action return random.randrange(self.action_size) def learn(self): @@ -1270,12 +1454,27 @@ class Agent: # Sample a batch of experiences experiences = self.memory.sample(BATCH_SIZE) - # Convert experiences to tensors - states = torch.FloatTensor([e.state for e in experiences]).to(self.device) - actions = torch.LongTensor([e.action for e in experiences]).to(self.device) - rewards = torch.FloatTensor([e.reward for e in experiences]).to(self.device) - next_states = torch.FloatTensor([e.next_state for e in experiences]).to(self.device) - dones = torch.FloatTensor([e.done for e in experiences]).to(self.device) + # Convert experiences to tensors more efficiently + # First create numpy arrays, then convert to tensors + states_np = np.array([e.state for e in experiences], dtype=np.float32) # Ensure float32 + actions_np = np.array([e.action for e in experiences], dtype=np.int64) # Ensure int64 + rewards_np = np.array([e.reward for e in experiences], dtype=np.float32) # Ensure float32 + next_states_np = np.array([e.next_state for e in experiences], dtype=np.float32) # Ensure float32 + dones_np = np.array([e.done for e in experiences], dtype=np.float32) # Ensure float32 + + # Convert numpy arrays to tensors with pinned memory if using GPU + if self.use_pinned_memory: + states = torch.from_numpy(states_np).pin_memory().to(self.device, non_blocking=True) + actions = torch.from_numpy(actions_np).long().pin_memory().to(self.device, non_blocking=True) + rewards = torch.from_numpy(rewards_np).pin_memory().to(self.device, non_blocking=True) + next_states = torch.from_numpy(next_states_np).pin_memory().to(self.device, non_blocking=True) + dones = torch.from_numpy(dones_np).pin_memory().to(self.device, non_blocking=True) + else: + states = torch.FloatTensor(states_np).to(self.device) + actions = torch.LongTensor(actions_np).to(self.device) + rewards = torch.FloatTensor(rewards_np).to(self.device) + next_states = torch.FloatTensor(next_states_np).to(self.device) + dones = torch.FloatTensor(dones_np).to(self.device) # Use mixed precision for forward/backward passes if self.device.type == "cuda": @@ -1346,29 +1545,60 @@ class Agent: def update_target_network(self): self.target_net.load_state_dict(self.policy_net.state_dict()) - def save(self, path="models/trading_agent.pt"): - os.makedirs(os.path.dirname(path), exist_ok=True) - torch.save({ - 'policy_net': self.policy_net.state_dict(), - 'target_net': self.target_net.state_dict(), - 'optimizer': self.optimizer.state_dict(), - 'epsilon': self.epsilon, - 'steps_done': self.steps_done - }, path) - logger.info(f"Model saved to {path}") - - def load(self, path="models/trading_agent.pt"): - if os.path.isfile(path): - checkpoint = torch.load(path) - self.policy_net.load_state_dict(checkpoint['policy_net']) - self.target_net.load_state_dict(checkpoint['target_net']) - self.optimizer.load_state_dict(checkpoint['optimizer']) - self.epsilon = checkpoint['epsilon'] - self.steps_done = checkpoint['steps_done'] - logger.info(f"Model loaded from {path}") - return True - logger.warning(f"No model found at {path}") - return False + def save(self, path): + """Save model to path""" + try: + # Create directory if it doesn't exist + os.makedirs(os.path.dirname(path), exist_ok=True) + + # Save model state + torch.save({ + 'policy_net': self.policy_net.state_dict(), + 'target_net': self.target_net.state_dict(), + 'optimizer': self.optimizer.state_dict(), + 'steps_done': self.steps_done + }, path) + + logger.info(f"Model saved to {path}") + except Exception as e: + logger.error(f"Failed to save model: {e}") + logger.error(f"Traceback: {traceback.format_exc()}") + + def load(self, path): + """Load model from path with proper error handling for PyTorch 2.6+""" + try: + logger.info(f"Loading model from {path}") + + # First try with weights_only=True (safer) + try: + # Add numpy scalar to safe globals first + import torch.serialization + torch.serialization.add_safe_globals(['numpy._core.multiarray.scalar']) + + # Load the model + checkpoint = torch.load(path, map_location=self.device) + self.policy_net.load_state_dict(checkpoint['policy_net']) + self.target_net.load_state_dict(checkpoint['target_net']) + self.optimizer.load_state_dict(checkpoint['optimizer']) + self.steps_done = checkpoint.get('steps_done', 0) + logger.info(f"Model loaded successfully with weights_only=True") + + except Exception as e: + logger.warning(f"Could not load with weights_only=True: {e}") + logger.warning("Attempting to load with weights_only=False (less secure)") + + # Fall back to weights_only=False (less secure but more compatible) + checkpoint = torch.load(path, map_location=self.device, weights_only=False) + self.policy_net.load_state_dict(checkpoint['policy_net']) + self.target_net.load_state_dict(checkpoint['target_net']) + self.optimizer.load_state_dict(checkpoint['optimizer']) + self.steps_done = checkpoint.get('steps_done', 0) + logger.info(f"Model loaded successfully with weights_only=False") + + except Exception as e: + logger.error(f"Failed to load model: {e}") + logger.error(f"Traceback: {traceback.format_exc()}") + raise async def get_live_prices(symbol="ETH/USDT", timeframe="1m"): """Get live price data using websockets""" @@ -1408,10 +1638,28 @@ async def get_live_prices(symbol="ETH/USDT", timeframe="1m"): await asyncio.sleep(5) break -async def train_agent(agent, env, num_episodes=1000, max_steps_per_episode=1000): +async def train_agent(agent, env, num_episodes=1000, max_steps_per_episode=1000, exchange=None): """Train the agent using historical and live data with GPU acceleration""" logger.info(f"Starting training on device: {agent.device}") + # Add early stopping based on performance + patience = 50 # Episodes to wait for improvement + best_pnl = -float('inf') + episodes_without_improvement = 0 + + # Add adaptive learning rate + initial_lr = LEARNING_RATE + min_lr = LEARNING_RATE / 10 + + # Add curriculum learning + curriculum_stages = [ + {"episodes": 100, "risk_factor": 0.5, "exploration": 0.3}, # Conservative trading + {"episodes": 200, "risk_factor": 0.75, "exploration": 0.2}, # Moderate risk + {"episodes": 300, "risk_factor": 1.0, "exploration": 0.1}, # Normal risk + {"episodes": 400, "risk_factor": 1.25, "exploration": 0.05} # Aggressive trading + ] + current_stage = 0 + stats = { 'episode_rewards': [], 'episode_lengths': [], @@ -1420,19 +1668,45 @@ async def train_agent(agent, env, num_episodes=1000, max_steps_per_episode=1000) 'episode_pnls': [], 'cumulative_pnl': [], 'drawdowns': [], - 'prediction_accuracy': [], - 'trade_analysis': [] + 'prediction_accuracy': [] } - best_reward = -float('inf') - best_pnl = -float('inf') - try: # Initialize price predictor env.initialize_price_predictor(agent.device) for episode in range(num_episodes): try: + # Update curriculum stage if needed + if current_stage < len(curriculum_stages) - 1 and episode >= curriculum_stages[current_stage]["episodes"]: + current_stage += 1 + logger.info(f"Moving to curriculum stage {current_stage+1}: " + f"risk_factor={curriculum_stages[current_stage]['risk_factor']}, " + f"exploration={curriculum_stages[current_stage]['exploration']}") + + # Apply curriculum settings + risk_factor = curriculum_stages[current_stage]["risk_factor"] + exploration = curriculum_stages[current_stage]["exploration"] + + # Set exploration rate for this episode + agent.epsilon = exploration + + # Set risk factor for this episode + env.risk_factor = risk_factor + + # Refresh data with latest candles if exchange is provided + if exchange is not None: + try: + logger.info(f"Fetching latest data for episode {episode}") + latest_data = await fetch_ohlcv_data(exchange, "ETH/USDT", "1m", 100) + if latest_data: + # Add new data to environment + for candle in latest_data: + env.add_data(candle) + logger.info(f"Added {len(latest_data)} new candles for episode {episode}") + except Exception as e: + logger.error(f"Error refreshing data: {e}") + # Reset environment state = env.reset() episode_reward = 0 @@ -1457,49 +1731,34 @@ async def train_agent(agent, env, num_episodes=1000, max_steps_per_episode=1000) # Store experience agent.memory.push(state, action, reward, next_state, done) + # Learn from experience + loss = agent.learn() + + # Update state and reward state = next_state episode_reward += reward - # Learn from experience with mixed precision - try: - loss = agent.learn() - if loss is not None: - agent.writer.add_scalar('Loss/train', loss, agent.steps_done) - except Exception as e: - logger.error(f"Learning error in episode {episode}, step {step}: {e}") - - # Update price predictions periodically - if step % 10 == 0: - env.update_price_predictions() - + # Break if done if done: break - # Update target network - if episode % TARGET_UPDATE == 0: - agent.target_net.load_state_dict(agent.policy_net.state_dict()) - # Calculate win rate - if len(env.trades) > 0: - wins = sum(1 for trade in env.trades if trade.get('pnl_percent', 0) > 0) - win_rate = wins / len(env.trades) * 100 - else: - win_rate = 0 - - # Analyze trades - trade_analysis = env.analyze_trades() - stats['trade_analysis'].append(trade_analysis) + total_trades = env.win_count + env.loss_count + win_rate = (env.win_count / total_trades * 100) if total_trades > 0 else 0 # Calculate prediction accuracy - prediction_accuracy = 0.0 if hasattr(env, 'predicted_prices') and len(env.predicted_prices) > 0: - if len(env.data) > 5: - actual_prices = [candle['close'] for candle in env.data[-5:]] - predicted = env.predicted_prices[:min(5, len(actual_prices))] - errors = [abs(p - a) / a for p, a in zip(predicted, actual_prices[:len(predicted)])] - prediction_accuracy = 100 * (1 - sum(errors) / len(errors)) + # Compare predictions with actual prices + actual_prices = env.features['price'][-len(env.predicted_prices):] + prediction_errors = np.abs(env.predicted_prices - actual_prices) / actual_prices + prediction_accuracy = 100 * (1 - np.mean(prediction_errors)) + else: + prediction_accuracy = 0 - # Log statistics + # Analyze trades + trade_analysis = env.analyze_trades() if hasattr(env, 'analyze_trades') else {} + + # Update stats stats['episode_rewards'].append(episode_reward) stats['episode_lengths'].append(step + 1) stats['balances'].append(env.balance) @@ -1534,18 +1793,60 @@ async def train_agent(agent, env, num_episodes=1000, max_steps_per_episode=1000) if episode_reward > best_reward: best_reward = episode_reward agent.save("models/trading_agent_best_reward.pt") + logger.info(f"New best reward model saved: {episode_reward:.2f}") # Save best model by PnL if env.episode_pnl > best_pnl: best_pnl = env.episode_pnl agent.save("models/trading_agent_best_pnl.pt") + logger.info(f"New best PnL model saved: ${env.episode_pnl:.2f}") - # Save checkpoint + # Save best model by win rate (if enough trades) + if total_trades >= 10 and win_rate > best_win_rate: + best_win_rate = win_rate + agent.save("models/trading_agent_best_winrate.pt") + logger.info(f"New best win rate model saved: {win_rate:.1f}%") + + # Save checkpoint every 10 episodes if episode % 10 == 0: - agent.save(f"models/trading_agent_episode_{episode}.pt") + checkpoint_path = f"checkpoints/trading_agent_episode_{episode}.pt" + agent.save(checkpoint_path) + # Save best metrics to resume training if interrupted + best_metrics = { + 'best_reward': float(best_reward), + 'best_pnl': float(best_pnl), + 'best_win_rate': float(best_win_rate), + 'last_episode': episode, + 'timestamp': datetime.datetime.now().isoformat() + } + with open("checkpoints/best_metrics.json", 'w') as f: + json.dump(best_metrics, f) + + logger.info(f"Checkpoint saved at episode {episode}") + + # Check for early stopping + if env.episode_pnl > best_pnl: + best_pnl = env.episode_pnl + episodes_without_improvement = 0 + else: + episodes_without_improvement += 1 + + # Adjust learning rate based on performance + if episodes_without_improvement > 20: + # Reduce learning rate + for param_group in agent.optimizer.param_groups: + param_group['lr'] = max(param_group['lr'] * 0.9, min_lr) + logger.info(f"Reducing learning rate to {agent.optimizer.param_groups[0]['lr']:.6f}") + + # Early stopping check + if episodes_without_improvement >= patience: + logger.info(f"Early stopping triggered after {episode+1} episodes without improvement") + break + except Exception as e: logger.error(f"Error in episode {episode}: {e}") + logger.error(f"Traceback: {traceback.format_exc()}") continue # Save final model @@ -1556,7 +1857,14 @@ async def train_agent(agent, env, num_episodes=1000, max_steps_per_episode=1000) except Exception as e: logger.error(f"Training failed: {e}") - raise + logger.error(f"Traceback: {traceback.format_exc()}") + + # Save emergency checkpoint + try: + agent.save("models/trading_agent_emergency.pt") + logger.info("Emergency model saved due to training failure") + except Exception as save_error: + logger.error(f"Failed to save emergency model: {save_error}") return stats @@ -1901,6 +2209,7 @@ async def main(): help='Mode to run the bot in') parser.add_argument('--episodes', type=int, default=1000, help='Number of episodes to train') parser.add_argument('--demo', action='store_true', help='Run in demo mode (no real trades)') + parser.add_argument('--refresh-data', action='store_true', help='Refresh data during training') args = parser.parse_args() # Get device (GPU or CPU) @@ -1924,7 +2233,13 @@ async def main(): if args.mode == 'train': # Train the agent logger.info(f"Starting training for {args.episodes} episodes...") - stats = await train_agent(agent, env, num_episodes=args.episodes) + + # Pass exchange to training function if refresh-data is enabled + if args.refresh_data: + logger.info("Data refresh enabled during training") + stats = await train_agent(agent, env, num_episodes=args.episodes, exchange=exchange) + else: + stats = await train_agent(agent, env, num_episodes=args.episodes) elif args.mode == 'evaluate': # Load trained model @@ -1955,6 +2270,11 @@ async def main(): except Exception as e: logger.warning(f"Could not properly close exchange connection: {e}") +def ensure_float32(model): + """Ensure all model parameters are float32""" + for param in model.parameters(): + param.data = param.data.float() # Convert to float32 + if __name__ == "__main__": try: asyncio.run(main()) diff --git a/crypto/gogo2/readme.md b/crypto/gogo2/readme.md index e6a8ce8..72eb4d4 100644 --- a/crypto/gogo2/readme.md +++ b/crypto/gogo2/readme.md @@ -46,6 +46,12 @@ pip install -r requirements.txt ```bash MEXC_API_KEY=your_api_key MEXC_API_SECRET=your_api_secret + + +cuda support + +```bash +pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 ``` ## Usage