implementations

2025-02-12 01:27:38 +02:00
parent 1a15ee934b
commit 33a5588539
6 changed files with 595 additions and 22 deletions
--- a/crypto/gogo/training/rl_agent.py
+++ b/crypto/gogo/training/rl_agent.py
@@ -0,0 +1,49 @@
+# training/rl_agent.py
+import random
+from collections import deque
+import numpy as np
+import torch
+
+class ContinuousRLAgent:
+    def __init__(self, model, optimizer, replay_buffer, batch_size=32, gamma=0.99):
+        self.model = model
+        self.optimizer = optimizer
+        self.replay_buffer = replay_buffer
+        self.batch_size = batch_size
+        self.gamma = gamma
+
+    def act(self, state, epsilon=0.0):
+        """
+        Select an action based on the state, using an epsilon-greedy policy.
+        """
+        if random.random() < epsilon:
+            # Exploration: choose a random action.
+            action = np.random.choice([0, 1, 2])  # SELL, HOLD, BUY
+        else:
+            # Exploitation: choose the action with the highest Q-value.
+            with torch.no_grad():
+                state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
+                q_values = self.model(state_tensor)
+                action = torch.argmax(q_values).item()
+        return action
+
+class ReplayBuffer:
+    def __init__(self, capacity):
+        self.buffer = deque(maxlen=capacity)
+
+    def push(self, state, action, reward, next_state, done):
+        """
+        Store an experience tuple into the replay buffer.
+        """
+        self.buffer.append((state, action, reward, next_state, done))
+
+    def sample(self, batch_size):
+        """
+        Randomly sample a batch of experiences from the replay buffer.
+        """
+        batch = random.sample(self.buffer, batch_size)
+        states, actions, rewards, next_states, dones = zip(*batch)
+        return states, actions, rewards, next_states, dones
+
+    def __len__(self):
+        return len(self.buffer)