From 2b1f00cbfc770c9742967e5e817fa01f627b483c Mon Sep 17 00:00:00 2001 From: Dobromir Popov Date: Mon, 10 Mar 2025 13:15:30 +0200 Subject: [PATCH] working on GPU --- crypto/gogo2/checkpoints/best_metrics.json | 1 + crypto/gogo2/main.py | 240 ++++++++++++++++++--- 2 files changed, 214 insertions(+), 27 deletions(-) create mode 100644 crypto/gogo2/checkpoints/best_metrics.json diff --git a/crypto/gogo2/checkpoints/best_metrics.json b/crypto/gogo2/checkpoints/best_metrics.json new file mode 100644 index 0000000..5f5dcc1 --- /dev/null +++ b/crypto/gogo2/checkpoints/best_metrics.json @@ -0,0 +1 @@ +{"best_reward": 202.7441047517104, "best_pnl": -10.072078721366783, "best_win_rate": 30.864197530864196, "last_episode": 10, "timestamp": "2025-03-10T12:45:27.247997"} \ No newline at end of file diff --git a/crypto/gogo2/main.py b/crypto/gogo2/main.py index a580720..7dd2f6b 100644 --- a/crypto/gogo2/main.py +++ b/crypto/gogo2/main.py @@ -124,16 +124,24 @@ class DQN(nn.Module): self.advantage_stream = nn.Linear(hidden_size // 2, action_size) # Transformer encoder for more complex pattern recognition - encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=attention_heads, dropout=0.1) + encoder_layer = nn.TransformerEncoderLayer( + d_model=hidden_size, + nhead=attention_heads, + dropout=0.1, + batch_first=True # Add this parameter + ) self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2) def forward(self, x): batch_size = x.size(0) if x.dim() > 1 else 1 - # Ensure input has correct shape + # Ensure input has correct shape and type if x.dim() == 1: x = x.unsqueeze(0) # Add batch dimension - + + # Ensure float32 type + x = x.float() + # Check if state size matches expected input size if x.size(1) != self.state_size: # Handle mismatched input by either truncating or padding @@ -158,8 +166,8 @@ class DQN(nn.Module): # Process through transformer for more complex patterns transformer_input = x.unsqueeze(1) if x.dim() == 2 else x - transformer_out = self.transformer_encoder(transformer_input.transpose(0, 1)) - transformer_out = transformer_out.transpose(0, 1).mean(dim=1) + transformer_out = self.transformer_encoder(transformer_input) + transformer_out = transformer_out.mean(dim=1) # Average across sequence dimension # Combine LSTM and transformer outputs x = lstm_out + transformer_out @@ -1309,48 +1317,62 @@ class TradingEnvironment: return fee # Ensure GPU usage if available -def get_device(): - """Get the device to use (GPU or CPU)""" - if torch.cuda.is_available(): +def get_device(device_preference='gpu'): + """Get the device to use (GPU or CPU) based on preference and availability""" + if device_preference.lower() == 'gpu' and torch.cuda.is_available(): device = torch.device("cuda") # Set default tensor type to float32 for CUDA torch.set_default_tensor_type(torch.FloatTensor) logger.info(f"Using GPU: {torch.cuda.get_device_name(0)}") else: device = torch.device("cpu") - logger.info("Using CPU") + if device_preference.lower() == 'gpu': + logger.info("GPU requested but not available, using CPU instead") + else: + logger.info("Using CPU as requested") return device # Update Agent class to use GPU properly class Agent: - def __init__(self, state_size, action_size, hidden_size=256, lstm_layers=2, attention_heads=4, - device=None): - if device is None: - self.device = get_device() - else: - self.device = device - + def __init__(self, state_size, action_size, hidden_size=256, lstm_layers=2, attention_heads=4, device=None): + """Initialize the agent with the policy and target networks""" self.state_size = state_size self.action_size = action_size - self.memory = ReplayMemory(MEMORY_SIZE) - self.steps_done = 0 - # Initialize policy and target networks + # Set device (GPU or CPU) + if device is None: + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + else: + self.device = device + + # Initialize networks self.policy_net = DQN(state_size, action_size, hidden_size, lstm_layers, attention_heads).to(self.device) self.target_net = DQN(state_size, action_size, hidden_size, lstm_layers, attention_heads).to(self.device) - ensure_float32(self.policy_net) - ensure_float32(self.target_net) + ensure_model_float32(self.policy_net) + ensure_model_float32(self.target_net) self.target_net.load_state_dict(self.policy_net.state_dict()) self.target_net.eval() # Initialize optimizer with weight decay for regularization self.optimizer = optim.Adam(self.policy_net.parameters(), lr=LEARNING_RATE, weight_decay=1e-5) - # Initialize gradient scaler for mixed precision training + # Initialize experience replay memory + self.memory = ReplayMemory(MEMORY_SIZE) + + # Initialize steps counter + self.steps_done = 0 + + # Initialize epsilon for exploration + self.epsilon = EPSILON_START + self.epsilon_start = EPSILON_START + self.epsilon_end = EPSILON_END + self.epsilon_decay = EPSILON_DECAY + + # Initialize mixed precision scaler self.scaler = amp.GradScaler() - # TensorBoard writer - self.writer = SummaryWriter() + # Initialize TensorBoard writer + self.writer = SummaryWriter(f'runs/trading_agent_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}') # Create models directory if it doesn't exist os.makedirs("models", exist_ok=True) @@ -2210,10 +2232,12 @@ async def main(): parser.add_argument('--episodes', type=int, default=1000, help='Number of episodes to train') parser.add_argument('--demo', action='store_true', help='Run in demo mode (no real trades)') parser.add_argument('--refresh-data', action='store_true', help='Refresh data during training') + parser.add_argument('--device', type=str, default='gpu', choices=['gpu', 'cpu'], + help='Device to use for training (gpu or cpu)') args = parser.parse_args() - # Get device (GPU or CPU) - device = get_device() + # Get device based on argument and availability + device = get_device(args.device) exchange = None @@ -2270,10 +2294,172 @@ async def main(): except Exception as e: logger.warning(f"Could not properly close exchange connection: {e}") -def ensure_float32(model): +def ensure_model_float32(model): """Ensure all model parameters are float32""" for param in model.parameters(): param.data = param.data.float() # Convert to float32 + return model + +def ensure_float32(tensor_or_array): + """Ensure the input is a float32 tensor or numpy array""" + if isinstance(tensor_or_array, torch.Tensor): + return tensor_or_array.float() + elif isinstance(tensor_or_array, np.ndarray): + return tensor_or_array.astype(np.float32) + else: + return np.array(tensor_or_array, dtype=np.float32) + +def visualize_training_results(env, agent, episode_num): + """Visualize the training results with OHLCV data, actions, and predictions""" + try: + # Create directory for visualizations if it doesn't exist + os.makedirs("visualizations", exist_ok=True) + + # Get the data for visualization + if len(env.data) < 100: + logger.warning("Not enough data for visualization") + return + + # Use the last 100 candles for visualization + data = env.data[-100:] + + # Create a pandas DataFrame for easier plotting + df = pd.DataFrame([{ + 'timestamp': candle['timestamp'], + 'open': candle['open'], + 'high': candle['high'], + 'low': candle['low'], + 'close': candle['close'], + 'volume': candle['volume'] + } for candle in data]) + + # Convert timestamp to datetime + df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms') + df.set_index('timestamp', inplace=True) + + # Create the plot + plt.figure(figsize=(16, 12)) + + # Plot OHLC data + ax1 = plt.subplot(3, 1, 1) + ax1.set_title(f'Training Visualization - Episode {episode_num}') + + # Plot candlestick chart + from mplfinance.original_flavor import candlestick_ohlc + import matplotlib.dates as mdates + + # Convert date to numerical format for candlestick + df_ohlc = df.reset_index() + df_ohlc['timestamp'] = df_ohlc['timestamp'].map(mdates.date2num) + + candlestick_ohlc(ax1, df_ohlc[['timestamp', 'open', 'high', 'low', 'close']].values, + width=0.6, colorup='green', colordown='red') + + ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M')) + ax1.set_ylabel('Price (USD)') + + # Plot buy/sell actions if available + if hasattr(env, 'trades') and env.trades: + # Filter trades that occurred in the visualization window + recent_trades = [t for t in env.trades if t.get('timestamp', 0) >= df.index[0].timestamp() * 1000] + + for trade in recent_trades: + if trade['type'] == 'long': + # Buy point + entry_time = pd.to_datetime(trade['entry_time'], unit='ms') + ax1.scatter(mdates.date2num(entry_time), trade['entry'], + marker='^', color='green', s=100, label='Buy') + + # Sell point if closed + if 'exit' in trade and trade['exit'] > 0: + exit_time = pd.to_datetime(trade['exit_time'], unit='ms') + ax1.scatter(mdates.date2num(exit_time), trade['exit'], + marker='v', color='blue', s=100, label='Sell Long') + + # Draw line connecting entry and exit + ax1.plot([mdates.date2num(entry_time), mdates.date2num(exit_time)], + [trade['entry'], trade['exit']], 'g--', alpha=0.5) + + elif trade['type'] == 'short': + # Sell short point + entry_time = pd.to_datetime(trade['entry_time'], unit='ms') + ax1.scatter(mdates.date2num(entry_time), trade['entry'], + marker='v', color='red', s=100, label='Sell Short') + + # Buy to cover point if closed + if 'exit' in trade and trade['exit'] > 0: + exit_time = pd.to_datetime(trade['exit_time'], unit='ms') + ax1.scatter(mdates.date2num(exit_time), trade['exit'], + marker='^', color='orange', s=100, label='Buy to Cover') + + # Draw line connecting entry and exit + ax1.plot([mdates.date2num(entry_time), mdates.date2num(exit_time)], + [trade['entry'], trade['exit']], 'r--', alpha=0.5) + + # Plot predicted prices if available + if hasattr(env, 'predicted_prices') and len(env.predicted_prices) > 0: + ax2 = plt.subplot(3, 1, 2, sharex=ax1) + ax2.set_title('Price Predictions vs Actual') + + # Plot actual prices + ax2.plot(df.index[-len(env.predicted_prices):], df['close'][-len(env.predicted_prices):], + label='Actual Price', color='blue') + + # Plot predicted prices + # Align predictions with their corresponding timestamps + prediction_dates = df.index[-len(env.predicted_prices)-1:-1] + if len(prediction_dates) == len(env.predicted_prices): + ax2.plot(prediction_dates, env.predicted_prices, + label='Predicted Price', color='orange', linestyle='--') + + # Calculate prediction error + actual = df['close'][-len(env.predicted_prices)-1:-1].values + predicted = env.predicted_prices + mape = np.mean(np.abs((actual - predicted) / actual)) * 100 + ax2.set_ylabel('Price (USD)') + ax2.set_title(f'Price Predictions vs Actual (MAPE: {mape:.2f}%)') + ax2.legend() + + # Plot technical indicators + ax3 = plt.subplot(3, 1, 3, sharex=ax1) + ax3.set_title('Technical Indicators') + + # Plot RSI if available + if 'rsi' in env.features and len(env.features['rsi']) > 0: + rsi_values = env.features['rsi'][-len(df):] + if len(rsi_values) == len(df): + ax3.plot(df.index, rsi_values, label='RSI', color='purple') + + # Add RSI overbought/oversold lines + ax3.axhline(y=70, color='r', linestyle='-', alpha=0.3) + ax3.axhline(y=30, color='g', linestyle='-', alpha=0.3) + + # Plot MACD if available + if 'macd' in env.features and 'macd_signal' in env.features: + macd_values = env.features['macd'][-len(df):] + signal_values = env.features['macd_signal'][-len(df):] + + if len(macd_values) == len(df) and len(signal_values) == len(df): + ax3.plot(df.index, macd_values, label='MACD', color='blue') + ax3.plot(df.index, signal_values, label='Signal', color='red') + + ax3.set_ylabel('Indicator Value') + ax3.legend() + + # Format x-axis + plt.xticks(rotation=45) + plt.tight_layout() + + # Save the figure + plt.savefig(f"visualizations/training_episode_{episode_num}.png") + logger.info(f"Visualization saved for episode {episode_num}") + + # Close the figure to free memory + plt.close() + + except Exception as e: + logger.error(f"Error creating visualization: {e}") + logger.error(f"Traceback: {traceback.format_exc()}") if __name__ == "__main__": try: