working training on CPU
This commit is contained in:
parent
783e411242
commit
efb85a3634
@ -17,6 +17,7 @@ from dotenv import load_dotenv
|
|||||||
import ccxt
|
import ccxt
|
||||||
import websockets
|
import websockets
|
||||||
from torch.utils.tensorboard import SummaryWriter
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
|
import torch.cuda.amp as amp # Add this import at the top
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
@ -63,7 +64,7 @@ class ReplayMemory:
|
|||||||
return len(self.memory)
|
return len(self.memory)
|
||||||
|
|
||||||
class DQN(nn.Module):
|
class DQN(nn.Module):
|
||||||
def __init__(self, state_size, action_size, hidden_size=256, lstm_layers=2, attention_heads=4):
|
def __init__(self, state_size, action_size, hidden_size=384, lstm_layers=2, attention_heads=4):
|
||||||
super(DQN, self).__init__()
|
super(DQN, self).__init__()
|
||||||
|
|
||||||
self.state_size = state_size
|
self.state_size = state_size
|
||||||
@ -73,9 +74,10 @@ class DQN(nn.Module):
|
|||||||
# Initial feature extraction
|
# Initial feature extraction
|
||||||
self.fc1 = nn.Linear(state_size, hidden_size)
|
self.fc1 = nn.Linear(state_size, hidden_size)
|
||||||
self.bn1 = nn.BatchNorm1d(hidden_size)
|
self.bn1 = nn.BatchNorm1d(hidden_size)
|
||||||
|
self.dropout1 = nn.Dropout(0.2) # Add dropout for regularization
|
||||||
|
|
||||||
# LSTM layer for sequential data
|
# LSTM layer for sequential data
|
||||||
self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers=lstm_layers, batch_first=True)
|
self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers=lstm_layers, batch_first=True, dropout=0.2)
|
||||||
|
|
||||||
# Attention mechanism
|
# Attention mechanism
|
||||||
self.attention = nn.MultiheadAttention(hidden_size, attention_heads)
|
self.attention = nn.MultiheadAttention(hidden_size, attention_heads)
|
||||||
@ -83,6 +85,7 @@ class DQN(nn.Module):
|
|||||||
# Output layers with increased capacity
|
# Output layers with increased capacity
|
||||||
self.fc2 = nn.Linear(hidden_size, hidden_size)
|
self.fc2 = nn.Linear(hidden_size, hidden_size)
|
||||||
self.bn2 = nn.BatchNorm1d(hidden_size)
|
self.bn2 = nn.BatchNorm1d(hidden_size)
|
||||||
|
self.dropout2 = nn.Dropout(0.2)
|
||||||
self.fc3 = nn.Linear(hidden_size, hidden_size // 2)
|
self.fc3 = nn.Linear(hidden_size, hidden_size // 2)
|
||||||
|
|
||||||
# Dueling DQN architecture
|
# Dueling DQN architecture
|
||||||
@ -90,7 +93,7 @@ class DQN(nn.Module):
|
|||||||
self.advantage_stream = nn.Linear(hidden_size // 2, action_size)
|
self.advantage_stream = nn.Linear(hidden_size // 2, action_size)
|
||||||
|
|
||||||
# Transformer encoder for more complex pattern recognition
|
# Transformer encoder for more complex pattern recognition
|
||||||
encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=attention_heads)
|
encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=attention_heads, dropout=0.1)
|
||||||
self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)
|
self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
@ -105,16 +108,15 @@ class DQN(nn.Module):
|
|||||||
# Handle mismatched input by either truncating or padding
|
# Handle mismatched input by either truncating or padding
|
||||||
if x.size(1) > self.state_size:
|
if x.size(1) > self.state_size:
|
||||||
x = x[:, :self.state_size] # Truncate
|
x = x[:, :self.state_size] # Truncate
|
||||||
print(f"Warning: Input truncated from {x.size(1)} to {self.state_size}")
|
|
||||||
else:
|
else:
|
||||||
# Pad with zeros
|
# Pad with zeros
|
||||||
padding = torch.zeros(batch_size, self.state_size - x.size(1), device=x.device)
|
padding = torch.zeros(batch_size, self.state_size - x.size(1), device=x.device)
|
||||||
x = torch.cat([x, padding], dim=1)
|
x = torch.cat([x, padding], dim=1)
|
||||||
print(f"Warning: Input padded from {x.size(1) - padding.size(1)} to {self.state_size}")
|
|
||||||
|
|
||||||
# Initial feature extraction
|
# Initial feature extraction
|
||||||
x = self.fc1(x)
|
x = self.fc1(x)
|
||||||
x = F.relu(self.bn1(x) if batch_size > 1 else self.bn1(x.unsqueeze(0)).squeeze(0))
|
x = F.relu(self.bn1(x) if batch_size > 1 else self.bn1(x.unsqueeze(0)).squeeze(0))
|
||||||
|
x = self.dropout1(x)
|
||||||
|
|
||||||
# Reshape for LSTM
|
# Reshape for LSTM
|
||||||
x_lstm = x.unsqueeze(1) if x.dim() == 2 else x
|
x_lstm = x.unsqueeze(1) if x.dim() == 2 else x
|
||||||
@ -134,6 +136,7 @@ class DQN(nn.Module):
|
|||||||
# Final layers
|
# Final layers
|
||||||
x = self.fc2(x)
|
x = self.fc2(x)
|
||||||
x = F.relu(self.bn2(x) if batch_size > 1 else self.bn2(x.unsqueeze(0)).squeeze(0))
|
x = F.relu(self.bn2(x) if batch_size > 1 else self.bn2(x.unsqueeze(0)).squeeze(0))
|
||||||
|
x = self.dropout2(x)
|
||||||
x = F.relu(self.fc3(x))
|
x = F.relu(self.fc3(x))
|
||||||
|
|
||||||
# Dueling architecture
|
# Dueling architecture
|
||||||
@ -641,6 +644,12 @@ class Agent:
|
|||||||
self.device = device
|
self.device = device
|
||||||
self.memory = ReplayMemory(MEMORY_SIZE)
|
self.memory = ReplayMemory(MEMORY_SIZE)
|
||||||
|
|
||||||
|
# Configure for RTX 4060 (8GB VRAM)
|
||||||
|
if device == "cuda":
|
||||||
|
torch.backends.cudnn.benchmark = True # Optimize for fixed input sizes
|
||||||
|
logger.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
|
||||||
|
logger.info(f"Available VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
|
||||||
|
|
||||||
# Q-Networks with configurable size
|
# Q-Networks with configurable size
|
||||||
self.policy_net = DQN(state_size, action_size, hidden_size, lstm_layers, attention_heads).to(device)
|
self.policy_net = DQN(state_size, action_size, hidden_size, lstm_layers, attention_heads).to(device)
|
||||||
self.target_net = DQN(state_size, action_size, hidden_size, lstm_layers, attention_heads).to(device)
|
self.target_net = DQN(state_size, action_size, hidden_size, lstm_layers, attention_heads).to(device)
|
||||||
@ -653,12 +662,19 @@ class Agent:
|
|||||||
|
|
||||||
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=LEARNING_RATE)
|
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=LEARNING_RATE)
|
||||||
|
|
||||||
|
# Mixed precision training
|
||||||
|
self.scaler = amp.GradScaler()
|
||||||
|
self.use_amp = device == "cuda" # Only use mixed precision on GPU
|
||||||
|
|
||||||
self.epsilon = EPSILON_START
|
self.epsilon = EPSILON_START
|
||||||
self.steps_done = 0
|
self.steps_done = 0
|
||||||
|
|
||||||
# TensorBoard logging
|
# TensorBoard logging
|
||||||
self.writer = SummaryWriter(log_dir='runs/trading_agent')
|
self.writer = SummaryWriter(log_dir='runs/trading_agent')
|
||||||
|
|
||||||
|
# Create models directory if it doesn't exist
|
||||||
|
os.makedirs("models", exist_ok=True)
|
||||||
|
|
||||||
def expand_model(self, new_state_size, new_hidden_size=512, new_lstm_layers=3, new_attention_heads=8):
|
def expand_model(self, new_state_size, new_hidden_size=512, new_lstm_layers=3, new_attention_heads=8):
|
||||||
"""Expand the model to handle more features or increase capacity"""
|
"""Expand the model to handle more features or increase capacity"""
|
||||||
logger.info(f"Expanding model: {self.state_size} → {new_state_size}, "
|
logger.info(f"Expanding model: {self.state_size} → {new_state_size}, "
|
||||||
@ -726,46 +742,79 @@ class Agent:
|
|||||||
return random.randrange(self.action_size)
|
return random.randrange(self.action_size)
|
||||||
|
|
||||||
def learn(self):
|
def learn(self):
|
||||||
|
"""Learn from experience replay with mixed precision"""
|
||||||
if len(self.memory) < BATCH_SIZE:
|
if len(self.memory) < BATCH_SIZE:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
experiences = self.memory.sample(BATCH_SIZE)
|
try:
|
||||||
batch = Experience(*zip(*experiences))
|
# Sample batch from memory
|
||||||
|
experiences = self.memory.sample(BATCH_SIZE)
|
||||||
# Convert to tensors
|
|
||||||
state_batch = torch.FloatTensor(batch.state).to(self.device)
|
|
||||||
action_batch = torch.LongTensor(batch.action).unsqueeze(1).to(self.device)
|
|
||||||
reward_batch = torch.FloatTensor(batch.reward).to(self.device)
|
|
||||||
next_state_batch = torch.FloatTensor(batch.next_state).to(self.device)
|
|
||||||
done_batch = torch.FloatTensor(batch.done).to(self.device)
|
|
||||||
|
|
||||||
# Get Q values for chosen actions
|
|
||||||
q_values = self.policy_net(state_batch).gather(1, action_batch)
|
|
||||||
|
|
||||||
# Double DQN: use policy net to select actions, target net to evaluate
|
|
||||||
with torch.no_grad():
|
|
||||||
# Get actions from policy net
|
|
||||||
next_actions = self.policy_net(next_state_batch).max(1)[1].unsqueeze(1)
|
|
||||||
# Evaluate using target net
|
|
||||||
next_q_values = self.target_net(next_state_batch).gather(1, next_actions)
|
|
||||||
next_q_values = next_q_values.squeeze(1)
|
|
||||||
|
|
||||||
# Compute target Q values
|
# Check if any experience has None values
|
||||||
expected_q_values = reward_batch + (GAMMA * next_q_values * (1 - done_batch))
|
for exp in experiences:
|
||||||
expected_q_values = expected_q_values.unsqueeze(1)
|
if exp.state is None or exp.next_state is None:
|
||||||
|
return None
|
||||||
# Compute loss (Huber loss for stability)
|
|
||||||
loss = F.smooth_l1_loss(q_values, expected_q_values)
|
# Convert to tensors
|
||||||
|
states = torch.FloatTensor([exp.state for exp in experiences]).to(self.device)
|
||||||
# Optimize the model
|
actions = torch.LongTensor([exp.action for exp in experiences]).unsqueeze(1).to(self.device)
|
||||||
self.optimizer.zero_grad()
|
rewards = torch.FloatTensor([exp.reward for exp in experiences]).to(self.device)
|
||||||
loss.backward()
|
next_states = torch.FloatTensor([exp.next_state for exp in experiences]).to(self.device)
|
||||||
# Gradient clipping
|
dones = torch.FloatTensor([exp.done for exp in experiences]).to(self.device)
|
||||||
for param in self.policy_net.parameters():
|
|
||||||
param.grad.data.clamp_(-1, 1)
|
# Use mixed precision for forward/backward passes
|
||||||
self.optimizer.step()
|
if self.use_amp:
|
||||||
|
with amp.autocast():
|
||||||
return loss.item()
|
# Compute Q values
|
||||||
|
current_q_values = self.policy_net(states).gather(1, actions)
|
||||||
|
|
||||||
|
# Compute next state values using target network
|
||||||
|
with torch.no_grad():
|
||||||
|
next_q_values = self.target_net(next_states).max(1)[0]
|
||||||
|
target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
|
||||||
|
|
||||||
|
# Reshape target values to match current_q_values
|
||||||
|
target_q_values = target_q_values.unsqueeze(1)
|
||||||
|
|
||||||
|
# Compute loss
|
||||||
|
loss = F.smooth_l1_loss(current_q_values, target_q_values)
|
||||||
|
|
||||||
|
# Optimize with gradient scaling
|
||||||
|
self.optimizer.zero_grad()
|
||||||
|
self.scaler.scale(loss).backward()
|
||||||
|
self.scaler.unscale_(self.optimizer)
|
||||||
|
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
|
||||||
|
self.scaler.step(self.optimizer)
|
||||||
|
self.scaler.update()
|
||||||
|
else:
|
||||||
|
# Standard precision training
|
||||||
|
# Compute Q values
|
||||||
|
current_q_values = self.policy_net(states).gather(1, actions)
|
||||||
|
|
||||||
|
# Compute next state values using target network
|
||||||
|
with torch.no_grad():
|
||||||
|
next_q_values = self.target_net(next_states).max(1)[0]
|
||||||
|
target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
|
||||||
|
|
||||||
|
# Reshape target values to match current_q_values
|
||||||
|
target_q_values = target_q_values.unsqueeze(1)
|
||||||
|
|
||||||
|
# Compute loss
|
||||||
|
loss = F.smooth_l1_loss(current_q_values, target_q_values)
|
||||||
|
|
||||||
|
# Optimize the model
|
||||||
|
self.optimizer.zero_grad()
|
||||||
|
loss.backward()
|
||||||
|
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
|
||||||
|
self.optimizer.step()
|
||||||
|
|
||||||
|
return loss.item()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during learning: {e}")
|
||||||
|
import traceback
|
||||||
|
logger.error(traceback.format_exc())
|
||||||
|
return None
|
||||||
|
|
||||||
def update_target_network(self):
|
def update_target_network(self):
|
||||||
self.target_net.load_state_dict(self.policy_net.state_dict())
|
self.target_net.load_state_dict(self.policy_net.state_dict())
|
||||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user