FIRST WORKING CASE

2025-03-10 12:27:47 +02:00
parent 14d70f938e
commit 66a2c41338
9 changed files with 25729 additions and 50 deletions
--- a/.gitignore
+++ b/.gitignore
@ -33,3 +33,5 @@ crypto/brian/models/best/*
 crypto/brian/models/last/*
 crypto/brian/live_chart.html
 crypto/gogo2/models/*
+crypto/gogo2/trading_bot.log
+*.log
--- a/crypto/gogo2/main.py
+++ b/crypto/gogo2/main.py
@ -21,6 +21,7 @@ import torch.cuda.amp as amp  # Add this import at the top
 from sklearn.preprocessing import MinMaxScaler
 import copy
 import argparse
+import traceback

 # Configure logging
 logging.basicConfig(
@ -428,30 +429,26 @@ class TradingEnvironment:
            return False
    
    def step(self, action):
-        """Take an action in the environment"""
-        # Store previous balance for reward calculation
-        prev_balance = self.balance
+        """Take an action in the environment and return the next state, reward, and done flag"""
+        # Store current price before taking action
+        self.current_price = self.data[self.current_step]['close']
        
-        # Update current price
-        if self.current_step < len(self.data) - 1:
-            self.current_step += 1
-            self.current_price = self.data[self.current_step]['close']
-        else:
-            # End of data
-            return self.get_state(), 0, True
-        
-        # Check for stop loss or take profit
-        self._check_sl_tp()
-        
-        # Calculate reward based on action
+        # Process action (0: HOLD, 1: BUY/LONG, 2: SELL/SHORT, 3: CLOSE)
        reward = self.calculate_reward(action)
        
-        # Check if we've reached the end of the data
+        # Check for stop loss / take profit hits
+        self.check_sl_tp()
+        
+        # Move to next step
+        self.current_step += 1
        done = self.current_step >= len(self.data) - 1
        
-        return self.get_state(), reward, done
+        # Get new state
+        next_state = self.get_state()
        
-    def _check_sl_tp(self):
+        return next_state, reward, done
+    
+    def check_sl_tp(self):
        """Check if stop loss or take profit has been hit"""
        if self.position == 'flat':
            return
@ -686,9 +683,6 @@ class TradingEnvironment:
        if hasattr(self, 'predicted_prices') and len(self.predicted_prices) > 0:
            # Normalize predictions relative to current price
            pred_norm = np.array(self.predicted_prices[:3]) / latest_price - 1.0
-            # Pad if needed
-            if len(pred_norm) < 3:
-                pred_norm = np.pad(pred_norm, (0, 3 - len(pred_norm)), 'constant')
            state_components.append(pred_norm)
        else:
            # Add zeros if no predictions
@ -1065,6 +1059,97 @@ class TradingEnvironment:
        
        return analysis
    
+    def initialize_price_predictor(self, device="cpu"):
+        """Initialize the price prediction model"""
+        self.price_predictor = PricePredictionModel(input_size=30, hidden_size=128, output_size=5)
+        self.price_predictor.to(device)
+        self.price_predictor_optimizer = optim.Adam(self.price_predictor.parameters(), lr=1e-3)
+        self.predicted_prices = np.array([])
+    
+    def train_price_predictor(self):
+        """Train the price prediction model on recent data"""
+        if len(self.features['price']) < 35:
+            return 0.0
+        
+        # Get price history
+        price_history = self.features['price']
+        
+        # Train the model
+        loss = self.price_predictor.train_on_new_data(
+            price_history, 
+            self.price_predictor_optimizer,
+            epochs=5
+        )
+        
+        return loss
+    
+    def update_price_predictions(self):
+        """Update price predictions"""
+        if len(self.features['price']) < 30:
+            self.predicted_prices = np.array([])
+            return
+        
+        # Get price history
+        price_history = self.features['price']
+        
+        # Get predictions
+        self.predicted_prices = self.price_predictor.predict_next_candles(price_history, num_candles=5)
+
+    def identify_optimal_trades(self):
+        """Identify optimal entry and exit points based on local extrema"""
+        if len(self.features['price']) < 20:
+            return
+        
+        # Find local bottoms and tops
+        bottoms, tops = find_local_extrema(self.features['price'], window=5)
+        
+        # Store optimal trade points
+        self.optimal_bottoms = bottoms  # Buy points
+        self.optimal_tops = tops  # Sell points
+        
+        # Create optimal trade signals
+        self.optimal_signals = np.zeros(len(self.features['price']))
+        for i in bottoms:
+            if 0 <= i < len(self.optimal_signals):  # Ensure index is valid
+                self.optimal_signals[i] = 1  # Buy signal
+        for i in tops:
+            if 0 <= i < len(self.optimal_signals):  # Ensure index is valid
+                self.optimal_signals[i] = -1  # Sell signal
+        
+        logger.info(f"Identified {len(bottoms)} optimal buy points and {len(tops)} optimal sell points")
+
+    def calculate_position_size(self):
+        """Calculate position size based on current balance and risk parameters"""
+        # Use a fixed percentage of balance for each trade
+        risk_percent = 5.0  # Risk 5% of balance per trade
+        
+        # Calculate position size with leverage
+        position_size = self.balance * (risk_percent / 100) * MAX_LEVERAGE
+        
+        # Apply a safety factor to avoid liquidation
+        safety_factor = 0.8
+        position_size *= safety_factor
+        
+        # Ensure minimum position size
+        min_position = 10.0  # Minimum position size in USD
+        position_size = max(position_size, min(min_position, self.balance * 0.5))
+        
+        # Ensure position size doesn't exceed balance * leverage
+        max_position = self.balance * MAX_LEVERAGE
+        position_size = min(position_size, max_position)
+        
+        return position_size
+
+    def calculate_fees(self, position_size):
+        """Calculate trading fees for a given position size"""
+        # Typical fee rate for crypto exchanges (0.1%)
+        fee_rate = 0.001
+        
+        # Calculate fee
+        fee = position_size * fee_rate
+        
+        return fee
+
 # Ensure GPU usage if available
 def get_device():
    """Get the best available device (CUDA GPU or CPU)"""
@ -1177,33 +1262,28 @@ class Agent:
            return random.randrange(self.action_size)
    
    def learn(self):
-        """Learn from experience replay with mixed precision"""
+        """Learn from a batch of experiences"""
        if len(self.memory) < BATCH_SIZE:
            return None
        
        try:
-            # Sample batch from memory
+            # Sample a batch of experiences
            experiences = self.memory.sample(BATCH_SIZE)
            
-            # Check if any experience has None values
-            for exp in experiences:
-                if exp.state is None or exp.next_state is None:
-                    return None
-            
-            # Convert to tensors
-            states = torch.FloatTensor([exp.state for exp in experiences]).to(self.device)
-            actions = torch.LongTensor([exp.action for exp in experiences]).unsqueeze(1).to(self.device)
-            rewards = torch.FloatTensor([exp.reward for exp in experiences]).to(self.device)
-            next_states = torch.FloatTensor([exp.next_state for exp in experiences]).to(self.device)
-            dones = torch.FloatTensor([exp.done for exp in experiences]).to(self.device)
+            # Convert experiences to tensors
+            states = torch.FloatTensor([e.state for e in experiences]).to(self.device)
+            actions = torch.LongTensor([e.action for e in experiences]).to(self.device)
+            rewards = torch.FloatTensor([e.reward for e in experiences]).to(self.device)
+            next_states = torch.FloatTensor([e.next_state for e in experiences]).to(self.device)
+            dones = torch.FloatTensor([e.done for e in experiences]).to(self.device)
            
            # Use mixed precision for forward/backward passes
            if self.device.type == "cuda":
                with amp.autocast():
                    # Compute Q values
-                    current_q_values = self.policy_net(states).gather(1, actions)
+                    current_q_values = self.policy_net(states).gather(1, actions.unsqueeze(1))
                    
-                    # Compute next state values using target network
+                    # Compute next Q values with target network
                    with torch.no_grad():
                        next_q_values = self.target_net(next_states).max(1)[0]
                        target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
@ -1214,21 +1294,25 @@ class Agent:
                    # Compute loss
                    loss = F.smooth_l1_loss(current_q_values, target_q_values)
                    
-                # Optimize with gradient scaling
+                # Backward pass with mixed precision
                self.optimizer.zero_grad()
                self.scaler.scale(loss).backward()
+                
+                # Gradient clipping to prevent exploding gradients
                self.scaler.unscale_(self.optimizer)
-                torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
+                torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), max_norm=1.0)
+                
                self.scaler.step(self.optimizer)
                self.scaler.update()
            else:
-                # Standard precision training
+                # Standard precision for CPU
                # Compute Q values
-                current_q_values = self.policy_net(states).gather(1, actions)
+                current_q_values = self.policy_net(states).gather(1, actions.unsqueeze(1))
                
-                # Compute next state values using target network
+                # Compute next Q values with target network
                with torch.no_grad():
                    next_q_values = self.target_net(next_states).max(1)[0]
+                    target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
                
                # Reshape target values to match current_q_values
                target_q_values = target_q_values.unsqueeze(1)
@ -1236,18 +1320,27 @@ class Agent:
                # Compute loss
                loss = F.smooth_l1_loss(current_q_values, target_q_values)
                
-                # Optimize the model
+                # Backward pass
                self.optimizer.zero_grad()
                loss.backward()
-                torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
+                
+                # Gradient clipping to prevent exploding gradients
+                torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), max_norm=1.0)
+                
                self.optimizer.step()
            
+            # Update steps done
+            self.steps_done += 1
+            
+            # Update target network
+            if self.steps_done % TARGET_UPDATE == 0:
+                self.target_net.load_state_dict(self.policy_net.state_dict())
+                
            return loss.item()
            
        except Exception as e:
            logger.error(f"Error during learning: {e}")
-            import traceback
-            logger.error(traceback.format_exc())
+            logger.error(f"Traceback: {traceback.format_exc()}")
            return None
    
    def update_target_network(self):
--- a/crypto/gogo2/runs/Mar10_12-21-45_GW-DOBRI/events.out.tfevents.1741602105.GW-DOBRI.27544.0
+++ b/crypto/gogo2/runs/Mar10_12-21-45_GW-DOBRI/events.out.tfevents.1741602105.GW-DOBRI.27544.0
--- a/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741596450.GW-DOBRI.46872.0
+++ b/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741596450.GW-DOBRI.46872.0
--- a/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741596799.GW-DOBRI.38248.0
+++ b/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741596799.GW-DOBRI.38248.0
--- a/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741598305.GW-DOBRI.61848.0
+++ b/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741598305.GW-DOBRI.61848.0
--- a/crypto/gogo2/trading_bot.log
+++ b/crypto/gogo2/trading_bot.log
--- a/crypto/gogo2/training_results.png
+++ b/crypto/gogo2/training_results.png
--- a/crypto/gogo2/training_stats.csv
+++ b/crypto/gogo2/training_stats.csv
@ -0,0 +1 @@
+episode_rewards,episode_lengths,balances,win_rates,episode_pnls,cumulative_pnl,drawdowns,prediction_accuracy
				`@ -0,0 +1 @@`
				`episode_rewards,episode_lengths,balances,win_rates,episode_pnls,cumulative_pnl,drawdowns,prediction_accuracy`