FIRST WORKING CASE

2025-03-10 12:27:47 +02:00
parent 14d70f938e
commit 66a2c41338
9 changed files with 25729 additions and 50 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -33,3 +33,5 @@ crypto/brian/models/best/*
 crypto/brian/models/last/*
 crypto/brian/live_chart.html
 crypto/gogo2/models/*
 crypto/gogo2/trading_bot.log
 *.log
--- a/crypto/gogo2/main.py
+++ b/crypto/gogo2/main.py
@@ -21,6 +21,7 @@ import torch.cuda.amp as amp  # Add this import at the top
 from sklearn.preprocessing import MinMaxScaler
 import copy
 import argparse
 import traceback
 # Configure logging
 logging.basicConfig(
@@ -428,30 +429,26 @@ class TradingEnvironment:
            return False
    def step(self, action):
-        """Take an action in the environment"""
+        """Take an action in the environment and return the next state, reward, and done flag"""
-        # Store previous balance for reward calculation
+        # Store current price before taking action
        prev_balance = self.balance
        # Update current price
        if self.current_step < len(self.data) - 1:
            self.current_step += 1
        self.current_price = self.data[self.current_step]['close']
        else:
            # End of data
            return self.get_state(), 0, True
-        # Check for stop loss or take profit
+        # Process action (0: HOLD, 1: BUY/LONG, 2: SELL/SHORT, 3: CLOSE)
        self._check_sl_tp()
        # Calculate reward based on action
        reward = self.calculate_reward(action)
-        # Check if we've reached the end of the data
+        # Check for stop loss / take profit hits
        self.check_sl_tp()
        # Move to next step
        self.current_step += 1
        done = self.current_step >= len(self.data) - 1
-        return self.get_state(), reward, done
+        # Get new state
        next_state = self.get_state()
-    def _check_sl_tp(self):
+        return next_state, reward, done
    def check_sl_tp(self):
        """Check if stop loss or take profit has been hit"""
        if self.position == 'flat':
            return
@@ -686,9 +683,6 @@ class TradingEnvironment:
        if hasattr(self, 'predicted_prices') and len(self.predicted_prices) > 0:
            # Normalize predictions relative to current price
            pred_norm = np.array(self.predicted_prices[:3]) / latest_price - 1.0
            # Pad if needed
            if len(pred_norm) < 3:
                pred_norm = np.pad(pred_norm, (0, 3 - len(pred_norm)), 'constant')
            state_components.append(pred_norm)
        else:
            # Add zeros if no predictions
@@ -1065,6 +1059,97 @@ class TradingEnvironment:
        return analysis
    def initialize_price_predictor(self, device="cpu"):
        """Initialize the price prediction model"""
        self.price_predictor = PricePredictionModel(input_size=30, hidden_size=128, output_size=5)
        self.price_predictor.to(device)
        self.price_predictor_optimizer = optim.Adam(self.price_predictor.parameters(), lr=1e-3)
        self.predicted_prices = np.array([])
    def train_price_predictor(self):
        """Train the price prediction model on recent data"""
        if len(self.features['price']) < 35:
            return 0.0
        # Get price history
        price_history = self.features['price']
        # Train the model
        loss = self.price_predictor.train_on_new_data(
            price_history, 
            self.price_predictor_optimizer,
            epochs=5
        )
        return loss
    def update_price_predictions(self):
        """Update price predictions"""
        if len(self.features['price']) < 30:
            self.predicted_prices = np.array([])
            return
        # Get price history
        price_history = self.features['price']
        # Get predictions
        self.predicted_prices = self.price_predictor.predict_next_candles(price_history, num_candles=5)
    def identify_optimal_trades(self):
        """Identify optimal entry and exit points based on local extrema"""
        if len(self.features['price']) < 20:
            return
        # Find local bottoms and tops
        bottoms, tops = find_local_extrema(self.features['price'], window=5)
        # Store optimal trade points
        self.optimal_bottoms = bottoms  # Buy points
        self.optimal_tops = tops  # Sell points
        # Create optimal trade signals
        self.optimal_signals = np.zeros(len(self.features['price']))
        for i in bottoms:
            if 0 <= i < len(self.optimal_signals):  # Ensure index is valid
                self.optimal_signals[i] = 1  # Buy signal
        for i in tops:
            if 0 <= i < len(self.optimal_signals):  # Ensure index is valid
                self.optimal_signals[i] = -1  # Sell signal
        logger.info(f"Identified {len(bottoms)} optimal buy points and {len(tops)} optimal sell points")
    def calculate_position_size(self):
        """Calculate position size based on current balance and risk parameters"""
        # Use a fixed percentage of balance for each trade
        risk_percent = 5.0  # Risk 5% of balance per trade
        # Calculate position size with leverage
        position_size = self.balance * (risk_percent / 100) * MAX_LEVERAGE
        # Apply a safety factor to avoid liquidation
        safety_factor = 0.8
        position_size *= safety_factor
        # Ensure minimum position size
        min_position = 10.0  # Minimum position size in USD
        position_size = max(position_size, min(min_position, self.balance * 0.5))
        # Ensure position size doesn't exceed balance * leverage
        max_position = self.balance * MAX_LEVERAGE
        position_size = min(position_size, max_position)
        return position_size
    def calculate_fees(self, position_size):
        """Calculate trading fees for a given position size"""
        # Typical fee rate for crypto exchanges (0.1%)
        fee_rate = 0.001
        # Calculate fee
        fee = position_size * fee_rate
        return fee
 # Ensure GPU usage if available
 def get_device():
    """Get the best available device (CUDA GPU or CPU)"""
@@ -1177,33 +1262,28 @@ class Agent:
            return random.randrange(self.action_size)
    def learn(self):
-        """Learn from experience replay with mixed precision"""
+        """Learn from a batch of experiences"""
        if len(self.memory) < BATCH_SIZE:
            return None
        try:
-            # Sample batch from memory
+            # Sample a batch of experiences
            experiences = self.memory.sample(BATCH_SIZE)
-            # Check if any experience has None values
+            # Convert experiences to tensors
-            for exp in experiences:
+            states = torch.FloatTensor([e.state for e in experiences]).to(self.device)
-                if exp.state is None or exp.next_state is None:
+            actions = torch.LongTensor([e.action for e in experiences]).to(self.device)
-                    return None
+            rewards = torch.FloatTensor([e.reward for e in experiences]).to(self.device)
-            
+            next_states = torch.FloatTensor([e.next_state for e in experiences]).to(self.device)
-            # Convert to tensors
+            dones = torch.FloatTensor([e.done for e in experiences]).to(self.device)
            states = torch.FloatTensor([exp.state for exp in experiences]).to(self.device)
            actions = torch.LongTensor([exp.action for exp in experiences]).unsqueeze(1).to(self.device)
            rewards = torch.FloatTensor([exp.reward for exp in experiences]).to(self.device)
            next_states = torch.FloatTensor([exp.next_state for exp in experiences]).to(self.device)
            dones = torch.FloatTensor([exp.done for exp in experiences]).to(self.device)
            # Use mixed precision for forward/backward passes
            if self.device.type == "cuda":
                with amp.autocast():
                    # Compute Q values
-                    current_q_values = self.policy_net(states).gather(1, actions)
+                    current_q_values = self.policy_net(states).gather(1, actions.unsqueeze(1))
-                    # Compute next state values using target network
+                    # Compute next Q values with target network
                    with torch.no_grad():
                        next_q_values = self.target_net(next_states).max(1)[0]
                        target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
@@ -1214,21 +1294,25 @@ class Agent:
                    # Compute loss
                    loss = F.smooth_l1_loss(current_q_values, target_q_values)
-                # Optimize with gradient scaling
+                # Backward pass with mixed precision
                self.optimizer.zero_grad()
                self.scaler.scale(loss).backward()
                # Gradient clipping to prevent exploding gradients
                self.scaler.unscale_(self.optimizer)
-                torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
+                torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), max_norm=1.0)
                self.scaler.step(self.optimizer)
                self.scaler.update()
            else:
-                # Standard precision training
+                # Standard precision for CPU
                # Compute Q values
-                current_q_values = self.policy_net(states).gather(1, actions)
+                current_q_values = self.policy_net(states).gather(1, actions.unsqueeze(1))
-                # Compute next state values using target network
+                # Compute next Q values with target network
                with torch.no_grad():
                    next_q_values = self.target_net(next_states).max(1)[0]
                    target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
                # Reshape target values to match current_q_values
                target_q_values = target_q_values.unsqueeze(1)
@@ -1236,18 +1320,27 @@ class Agent:
                # Compute loss
                loss = F.smooth_l1_loss(current_q_values, target_q_values)
-                # Optimize the model
+                # Backward pass
                self.optimizer.zero_grad()
                loss.backward()
-                torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
+                
                # Gradient clipping to prevent exploding gradients
                torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), max_norm=1.0)
                self.optimizer.step()
            # Update steps done
            self.steps_done += 1
            # Update target network
            if self.steps_done % TARGET_UPDATE == 0:
                self.target_net.load_state_dict(self.policy_net.state_dict())
            return loss.item()
        except Exception as e:
            logger.error(f"Error during learning: {e}")
-            import traceback
+            logger.error(f"Traceback: {traceback.format_exc()}")
            logger.error(traceback.format_exc())
            return None
    def update_target_network(self):
--- a/crypto/gogo2/runs/Mar10_12-21-45_GW-DOBRI/events.out.tfevents.1741602105.GW-DOBRI.27544.0
+++ b/crypto/gogo2/runs/Mar10_12-21-45_GW-DOBRI/events.out.tfevents.1741602105.GW-DOBRI.27544.0
--- a/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741596450.GW-DOBRI.46872.0
+++ b/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741596450.GW-DOBRI.46872.0
--- a/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741596799.GW-DOBRI.38248.0
+++ b/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741596799.GW-DOBRI.38248.0
--- a/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741598305.GW-DOBRI.61848.0
+++ b/crypto/gogo2/runs/trading_agent/events.out.tfevents.1741598305.GW-DOBRI.61848.0
--- a/crypto/gogo2/trading_bot.log
+++ b/crypto/gogo2/trading_bot.log
--- a/crypto/gogo2/training_results.png
+++ b/crypto/gogo2/training_results.png
--- a/crypto/gogo2/training_stats.csv
+++ b/crypto/gogo2/training_stats.csv
@@ -0,0 +1 @@
 episode_rewards,episode_lengths,balances,win_rates,episode_pnls,cumulative_pnl,drawdowns,prediction_accuracy
		`@@ -0,0 +1 @@`
							`episode_rewards,episode_lengths,balances,win_rates,episode_pnls,cumulative_pnl,drawdowns,prediction_accuracy`