working training on CPU

2025-03-10 11:19:27 +02:00
parent 783e411242
commit efb85a3634
5 changed files with 8415 additions and 41 deletions
--- a/crypto/gogo2/main.py
+++ b/crypto/gogo2/main.py
@@ -17,6 +17,7 @@ from dotenv import load_dotenv
 import ccxt
 import websockets
 from torch.utils.tensorboard import SummaryWriter
+import torch.cuda.amp as amp  # Add this import at the top

 # Configure logging
 logging.basicConfig(
@@ -63,7 +64,7 @@ class ReplayMemory:
        return len(self.memory)

 class DQN(nn.Module):
-    def __init__(self, state_size, action_size, hidden_size=256, lstm_layers=2, attention_heads=4):
+    def __init__(self, state_size, action_size, hidden_size=384, lstm_layers=2, attention_heads=4):
        super(DQN, self).__init__()
        
        self.state_size = state_size
@@ -73,9 +74,10 @@ class DQN(nn.Module):
        # Initial feature extraction
        self.fc1 = nn.Linear(state_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
+        self.dropout1 = nn.Dropout(0.2)  # Add dropout for regularization
        
        # LSTM layer for sequential data
-        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers=lstm_layers, batch_first=True)
+        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers=lstm_layers, batch_first=True, dropout=0.2)
        
        # Attention mechanism
        self.attention = nn.MultiheadAttention(hidden_size, attention_heads)
@@ -83,6 +85,7 @@ class DQN(nn.Module):
        # Output layers with increased capacity
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.bn2 = nn.BatchNorm1d(hidden_size)
+        self.dropout2 = nn.Dropout(0.2)
        self.fc3 = nn.Linear(hidden_size, hidden_size // 2)
        
        # Dueling DQN architecture
@@ -90,7 +93,7 @@ class DQN(nn.Module):
        self.advantage_stream = nn.Linear(hidden_size // 2, action_size)
        
        # Transformer encoder for more complex pattern recognition
-        encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=attention_heads)
+        encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=attention_heads, dropout=0.1)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)
        
    def forward(self, x):
@@ -105,16 +108,15 @@ class DQN(nn.Module):
            # Handle mismatched input by either truncating or padding
            if x.size(1) > self.state_size:
                x = x[:, :self.state_size]  # Truncate
-                print(f"Warning: Input truncated from {x.size(1)} to {self.state_size}")
            else:
                # Pad with zeros
                padding = torch.zeros(batch_size, self.state_size - x.size(1), device=x.device)
                x = torch.cat([x, padding], dim=1)
-                print(f"Warning: Input padded from {x.size(1) - padding.size(1)} to {self.state_size}")
        
        # Initial feature extraction
        x = self.fc1(x)
        x = F.relu(self.bn1(x) if batch_size > 1 else self.bn1(x.unsqueeze(0)).squeeze(0))
+        x = self.dropout1(x)
        
        # Reshape for LSTM
        x_lstm = x.unsqueeze(1) if x.dim() == 2 else x
@@ -134,6 +136,7 @@ class DQN(nn.Module):
        # Final layers
        x = self.fc2(x)
        x = F.relu(self.bn2(x) if batch_size > 1 else self.bn2(x.unsqueeze(0)).squeeze(0))
+        x = self.dropout2(x)
        x = F.relu(self.fc3(x))
        
        # Dueling architecture
@@ -641,6 +644,12 @@ class Agent:
        self.device = device
        self.memory = ReplayMemory(MEMORY_SIZE)
        
+        # Configure for RTX 4060 (8GB VRAM)
+        if device == "cuda":
+            torch.backends.cudnn.benchmark = True  # Optimize for fixed input sizes
+            logger.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
+            logger.info(f"Available VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
+        
        # Q-Networks with configurable size
        self.policy_net = DQN(state_size, action_size, hidden_size, lstm_layers, attention_heads).to(device)
        self.target_net = DQN(state_size, action_size, hidden_size, lstm_layers, attention_heads).to(device)
@@ -653,12 +662,19 @@ class Agent:
        
        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=LEARNING_RATE)
        
+        # Mixed precision training
+        self.scaler = amp.GradScaler()
+        self.use_amp = device == "cuda"  # Only use mixed precision on GPU
+        
        self.epsilon = EPSILON_START
        self.steps_done = 0
        
        # TensorBoard logging
        self.writer = SummaryWriter(log_dir='runs/trading_agent')
        
+        # Create models directory if it doesn't exist
+        os.makedirs("models", exist_ok=True)
+    
    def expand_model(self, new_state_size, new_hidden_size=512, new_lstm_layers=3, new_attention_heads=8):
        """Expand the model to handle more features or increase capacity"""
        logger.info(f"Expanding model: {self.state_size} → {new_state_size}, " 
@@ -726,46 +742,79 @@ class Agent:
            return random.randrange(self.action_size)
    
    def learn(self):
+        """Learn from experience replay with mixed precision"""
        if len(self.memory) < BATCH_SIZE:
            return None
            
-        experiences = self.memory.sample(BATCH_SIZE)
-        batch = Experience(*zip(*experiences))
-        
-        # Convert to tensors
-        state_batch = torch.FloatTensor(batch.state).to(self.device)
-        action_batch = torch.LongTensor(batch.action).unsqueeze(1).to(self.device)
-        reward_batch = torch.FloatTensor(batch.reward).to(self.device)
-        next_state_batch = torch.FloatTensor(batch.next_state).to(self.device)
-        done_batch = torch.FloatTensor(batch.done).to(self.device)
-        
-        # Get Q values for chosen actions
-        q_values = self.policy_net(state_batch).gather(1, action_batch)
-        
-        # Double DQN: use policy net to select actions, target net to evaluate
-        with torch.no_grad():
-            # Get actions from policy net
-            next_actions = self.policy_net(next_state_batch).max(1)[1].unsqueeze(1)
-            # Evaluate using target net
-            next_q_values = self.target_net(next_state_batch).gather(1, next_actions)
-            next_q_values = next_q_values.squeeze(1)
+        try:
+            # Sample batch from memory
+            experiences = self.memory.sample(BATCH_SIZE)
            
-        # Compute target Q values
-        expected_q_values = reward_batch + (GAMMA * next_q_values * (1 - done_batch))
-        expected_q_values = expected_q_values.unsqueeze(1)
-        
-        # Compute loss (Huber loss for stability)
-        loss = F.smooth_l1_loss(q_values, expected_q_values)
-        
-        # Optimize the model
-        self.optimizer.zero_grad()
-        loss.backward()
-        # Gradient clipping
-        for param in self.policy_net.parameters():
-            param.grad.data.clamp_(-1, 1)
-        self.optimizer.step()
-        
-        return loss.item()
+            # Check if any experience has None values
+            for exp in experiences:
+                if exp.state is None or exp.next_state is None:
+                    return None
+            
+            # Convert to tensors
+            states = torch.FloatTensor([exp.state for exp in experiences]).to(self.device)
+            actions = torch.LongTensor([exp.action for exp in experiences]).unsqueeze(1).to(self.device)
+            rewards = torch.FloatTensor([exp.reward for exp in experiences]).to(self.device)
+            next_states = torch.FloatTensor([exp.next_state for exp in experiences]).to(self.device)
+            dones = torch.FloatTensor([exp.done for exp in experiences]).to(self.device)
+            
+            # Use mixed precision for forward/backward passes
+            if self.use_amp:
+                with amp.autocast():
+                    # Compute Q values
+                    current_q_values = self.policy_net(states).gather(1, actions)
+                    
+                    # Compute next state values using target network
+                    with torch.no_grad():
+                        next_q_values = self.target_net(next_states).max(1)[0]
+                        target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
+                        
+                    # Reshape target values to match current_q_values
+                    target_q_values = target_q_values.unsqueeze(1)
+                    
+                    # Compute loss
+                    loss = F.smooth_l1_loss(current_q_values, target_q_values)
+                
+                # Optimize with gradient scaling
+                self.optimizer.zero_grad()
+                self.scaler.scale(loss).backward()
+                self.scaler.unscale_(self.optimizer)
+                torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
+                self.scaler.step(self.optimizer)
+                self.scaler.update()
+            else:
+                # Standard precision training
+                # Compute Q values
+                current_q_values = self.policy_net(states).gather(1, actions)
+                
+                # Compute next state values using target network
+                with torch.no_grad():
+                    next_q_values = self.target_net(next_states).max(1)[0]
+                    target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
+                    
+                # Reshape target values to match current_q_values
+                target_q_values = target_q_values.unsqueeze(1)
+                
+                # Compute loss
+                loss = F.smooth_l1_loss(current_q_values, target_q_values)
+                
+                # Optimize the model
+                self.optimizer.zero_grad()
+                loss.backward()
+                torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
+                self.optimizer.step()
+            
+            return loss.item()
+            
+        except Exception as e:
+            logger.error(f"Error during learning: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return None
        
    def update_target_network(self):
        self.target_net.load_state_dict(self.policy_net.state_dict())
--- a/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741596450.GW-DOBRI.46872.0
+++ b/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741596450.GW-DOBRI.46872.0
--- a/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741596799.GW-DOBRI.38248.0
+++ b/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741596799.GW-DOBRI.38248.0
--- a/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741598305.GW-DOBRI.61848.0
+++ b/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741598305.GW-DOBRI.61848.0
--- a/crypto/gogo2/trading_bot.log
+++ b/crypto/gogo2/trading_bot.log