fix model loading in live mode

2025-03-17 02:17:43 +02:00 · 2025-03-17 02:17:43 +02:00 · 991cf57274
commit 991cf57274
parent 485c61cf8c
1 changed files with 55 additions and 2 deletions
--- a/crypto/gogo2/main.py
+++ b/crypto/gogo2/main.py
@ -1695,7 +1695,22 @@ class Agent:
    def load(self, path="models/trading_agent.pt"):
        if os.path.isfile(path):
-            checkpoint = torch.load(path)
+            try:
                # First try with weights_only=True (safer)
                checkpoint = torch.load(path, weights_only=True)
            except Exception as e:
                logger.warning(f"Failed to load with weights_only=True: {e}")
                try:
                    # Try with safe_globals for numpy.scalar
                    import numpy as np
                    from torch.serialization import safe_globals
                    with safe_globals([np.core.multiarray.scalar]):
                        checkpoint = torch.load(path, weights_only=True)
                except Exception as e2:
                    logger.warning(f"Failed with safe_globals: {e2}")
                    # Fall back to weights_only=False if needed
                    checkpoint = torch.load(path, weights_only=False)
            self.policy_net.load_state_dict(checkpoint['policy_net'])
            self.target_net.load_state_dict(checkpoint['target_net'])
            self.optimizer.load_state_dict(checkpoint['optimizer'])
@ -2154,6 +2169,13 @@ async def live_trading(agent, env, exchange, demo=True):
        # Main trading loop
        step_counter = 0
        # For online learning
        states = []
        actions = []
        rewards = []
        next_states = []
        dones = []
        while True:
            # Wait for the next candle (1 minute)
            await asyncio.sleep(5)  # Check every 5 seconds
@ -2175,7 +2197,38 @@ async def live_trading(agent, env, exchange, demo=True):
            action = agent.select_action(state, training=False)
            # Take action
-            _, reward, _ = env.step(action)
+            next_state, reward, done = env.step(action)
            # Store experience for online learning
            states.append(state)
            actions.append(action)
            rewards.append(reward)
            next_states.append(next_state)
            dones.append(done)
            # Online learning - update the model with new experiences
            if len(states) >= 10:  # Batch size for online learning
                # Store experiences in replay memory
                for i in range(len(states)):
                    agent.memory.push(states[i], actions[i], rewards[i], next_states[i], dones[i])
                # Learn from experiences if we have enough samples
                if len(agent.memory) > 32:
                    loss = agent.learn()
                    if loss is not None:
                        agent.writer.add_scalar('Live/Loss', loss, step_counter)
                # Clear the temporary storage
                states = []
                actions = []
                rewards = []
                next_states = []
                dones = []
                # Save the updated model periodically
                if step_counter % 100 == 0:
                    agent.save("models/trading_agent_live_updated.pt")
                    logger.info("Updated model saved during live trading")
            # Log trading activity
            action_names = ["HOLD", "BUY", "SELL", "CLOSE"]