work with order execution - we are forced to do limit orders over the API

This commit is contained in:
Dobromir Popov
2025-07-14 13:36:07 +03:00
parent d7205a9745
commit f861559319
4 changed files with 387 additions and 182 deletions

View File

@ -807,6 +807,17 @@ class DQNAgent:
if isinstance(expected_dim, tuple):
expected_dim = np.prod(expected_dim)
# Debug: Check what dimensions we're actually seeing
if sanitized_states:
actual_dims = [len(state) for state in sanitized_states[:5]] # Check first 5
logger.debug(f"DQN State dimensions - Expected: {expected_dim}, Actual samples: {actual_dims}")
# If all states have a consistent dimension different from expected, use that
unique_dims = list(set(len(state) for state in sanitized_states))
if len(unique_dims) == 1 and unique_dims[0] != expected_dim:
logger.warning(f"All states have dimension {unique_dims[0]} but expected {expected_dim}. Using actual dimension.")
expected_dim = unique_dims[0]
# Filter out states with wrong dimensions and fix them
valid_states = []
valid_next_states = []
@ -1076,162 +1087,165 @@ class DQNAgent:
# Zero gradients
self.optimizer.zero_grad()
# Forward pass with amp autocasting
with torch.cuda.amp.autocast():
# Get current Q values and extrema predictions
current_q_values, current_extrema_pred, current_price_pred, hidden_features, current_advanced_pred = self.policy_net(states)
current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
# Get next Q values from target network
with torch.no_grad():
next_q_values, next_extrema_pred, next_price_pred, next_hidden_features, next_advanced_pred = self.target_net(next_states)
next_q_values = next_q_values.max(1)[0]
# Forward pass with amp autocasting
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
with torch.cuda.amp.autocast():
# Get current Q values and extrema predictions
current_q_values, current_extrema_pred, current_price_pred, hidden_features, current_advanced_pred = self.policy_net(states)
current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
# Check for dimension mismatch and fix it
if rewards.shape[0] != next_q_values.shape[0]:
# Log the shape mismatch for debugging
logger.warning(f"Shape mismatch detected: rewards {rewards.shape}, next_q_values {next_q_values.shape}")
# Use the smaller size to prevent index errors
min_size = min(rewards.shape[0], next_q_values.shape[0])
rewards = rewards[:min_size]
dones = dones[:min_size]
next_q_values = next_q_values[:min_size]
current_q_values = current_q_values[:min_size]
target_q_values = rewards + (1 - dones) * self.gamma * next_q_values
# Compute Q-value loss (primary task)
q_loss = nn.MSELoss()(current_q_values, target_q_values)
# Initialize loss with q_loss
loss = q_loss
# Try to extract price from current and next states
try:
# Extract price feature from sequence data (if available)
if len(states.shape) == 3: # [batch, seq, features]
current_prices = states[:, -1, -1] # Last timestep, last feature
next_prices = next_states[:, -1, -1]
else: # [batch, features]
current_prices = states[:, -1] # Last feature
next_prices = next_states[:, -1]
# Calculate price change for different timeframes
immediate_changes = (next_prices - current_prices) / current_prices
# Get the actual batch size for this calculation
actual_batch_size = states.shape[0]
# Create price direction labels - simplified for training
# 0 = down, 1 = sideways, 2 = up
immediate_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 1 # Default: sideways
midterm_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 1
longterm_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 1
# Immediate term direction (1s, 1m)
immediate_up = (immediate_changes > 0.0005)
immediate_down = (immediate_changes < -0.0005)
immediate_labels[immediate_up] = 2 # Up
immediate_labels[immediate_down] = 0 # Down
# For mid and long term, we can only approximate during training
# In a real system, we'd need historical data to validate these
# Here we'll use the immediate term with increasing thresholds as approximation
# Mid-term (1h) - use slightly higher threshold
midterm_up = (immediate_changes > 0.001)
midterm_down = (immediate_changes < -0.001)
midterm_labels[midterm_up] = 2 # Up
midterm_labels[midterm_down] = 0 # Down
# Long-term (1d) - use even higher threshold
longterm_up = (immediate_changes > 0.002)
longterm_down = (immediate_changes < -0.002)
longterm_labels[longterm_up] = 2 # Up
longterm_labels[longterm_down] = 0 # Down
# Generate target values for price change regression
# For simplicity, we'll use the immediate change and scaled versions for longer timeframes
price_value_targets = torch.zeros((actual_batch_size, 4), device=self.device)
price_value_targets[:, 0] = immediate_changes
price_value_targets[:, 1] = immediate_changes * 2.0 # Approximate 1h change
price_value_targets[:, 2] = immediate_changes * 4.0 # Approximate 1d change
price_value_targets[:, 3] = immediate_changes * 6.0 # Approximate 1w change
# Calculate loss for price direction prediction (classification)
if len(current_price_pred['immediate'].shape) > 1 and current_price_pred['immediate'].shape[0] >= actual_batch_size:
# Slice predictions to match the adjusted batch size
immediate_pred = current_price_pred['immediate'][:actual_batch_size]
midterm_pred = current_price_pred['midterm'][:actual_batch_size]
longterm_pred = current_price_pred['longterm'][:actual_batch_size]
price_values_pred = current_price_pred['values'][:actual_batch_size]
# Get next Q values from target network
with torch.no_grad():
next_q_values, next_extrema_pred, next_price_pred, next_hidden_features, next_advanced_pred = self.target_net(next_states)
next_q_values = next_q_values.max(1)[0]
# Compute losses for each task
immediate_loss = nn.CrossEntropyLoss()(immediate_pred, immediate_labels)
midterm_loss = nn.CrossEntropyLoss()(midterm_pred, midterm_labels)
longterm_loss = nn.CrossEntropyLoss()(longterm_pred, longterm_labels)
# Check for dimension mismatch and fix it
if rewards.shape[0] != next_q_values.shape[0]:
# Log the shape mismatch for debugging
logger.warning(f"Shape mismatch detected: rewards {rewards.shape}, next_q_values {next_q_values.shape}")
# Use the smaller size to prevent index errors
min_size = min(rewards.shape[0], next_q_values.shape[0])
rewards = rewards[:min_size]
dones = dones[:min_size]
next_q_values = next_q_values[:min_size]
current_q_values = current_q_values[:min_size]
# MSE loss for price value regression
price_value_loss = nn.MSELoss()(price_values_pred, price_value_targets)
# Combine all price prediction losses
price_loss = immediate_loss + 0.7 * midterm_loss + 0.5 * longterm_loss + 0.3 * price_value_loss
# Create extrema labels (same as before)
extrema_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 2 # Default: neither
# Identify potential bottoms (significant negative change)
bottoms = (immediate_changes < -0.003)
extrema_labels[bottoms] = 0
# Identify potential tops (significant positive change)
tops = (immediate_changes > 0.003)
extrema_labels[tops] = 1
# Calculate extrema prediction loss
if len(current_extrema_pred.shape) > 1 and current_extrema_pred.shape[0] >= actual_batch_size:
current_extrema_pred = current_extrema_pred[:actual_batch_size]
extrema_loss = nn.CrossEntropyLoss()(current_extrema_pred, extrema_labels)
# Combined loss with all components
# Primary task: Q-value learning (RL objective)
# Secondary tasks: extrema detection and price prediction (supervised objectives)
loss = q_loss + 0.3 * extrema_loss + 0.3 * price_loss
# Log loss components occasionally
if random.random() < 0.01: # Log 1% of the time
logger.info(
f"Mixed precision losses: Q-loss={q_loss.item():.4f}, "
f"Extrema-loss={extrema_loss.item():.4f}, "
f"Price-loss={price_loss.item():.4f}"
)
except Exception as e:
# Fallback if price extraction fails
logger.warning(f"Failed to calculate price prediction loss: {str(e)}. Using only Q-value loss.")
# Just use Q-value loss
target_q_values = rewards + (1 - dones) * self.gamma * next_q_values
# Compute Q-value loss (primary task)
q_loss = nn.MSELoss()(current_q_values, target_q_values)
# Initialize loss with q_loss
loss = q_loss
# Backward pass with scaled gradients
self.scaler.scale(loss).backward()
# Gradient clipping on scaled gradients
self.scaler.unscale_(self.optimizer)
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
# Update with scaler
self.scaler.step(self.optimizer)
self.scaler.update()
# Update target network if needed
self.update_count += 1
if self.update_count % self.target_update == 0:
self.target_net.load_state_dict(self.policy_net.state_dict())
# Track and decay epsilon
self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
return loss.item()
# Try to extract price from current and next states
try:
# Extract price feature from sequence data (if available)
if len(states.shape) == 3: # [batch, seq, features]
current_prices = states[:, -1, -1] # Last timestep, last feature
next_prices = next_states[:, -1, -1]
else: # [batch, features]
current_prices = states[:, -1] # Last feature
next_prices = next_states[:, -1]
# Calculate price change for different timeframes
immediate_changes = (next_prices - current_prices) / current_prices
# Get the actual batch size for this calculation
actual_batch_size = states.shape[0]
# Create price direction labels - simplified for training
# 0 = down, 1 = sideways, 2 = up
immediate_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 1 # Default: sideways
midterm_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 1
longterm_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 1
# Immediate term direction (1s, 1m)
immediate_up = (immediate_changes > 0.0005)
immediate_down = (immediate_changes < -0.0005)
immediate_labels[immediate_up] = 2 # Up
immediate_labels[immediate_down] = 0 # Down
# For mid and long term, we can only approximate during training
# In a real system, we'd need historical data to validate these
# Here we'll use the immediate term with increasing thresholds as approximation
# Mid-term (1h) - use slightly higher threshold
midterm_up = (immediate_changes > 0.001)
midterm_down = (immediate_changes < -0.001)
midterm_labels[midterm_up] = 2 # Up
midterm_labels[midterm_down] = 0 # Down
# Long-term (1d) - use even higher threshold
longterm_up = (immediate_changes > 0.002)
longterm_down = (immediate_changes < -0.002)
longterm_labels[longterm_up] = 2 # Up
longterm_labels[longterm_down] = 0 # Down
# Generate target values for price change regression
# For simplicity, we'll use the immediate change and scaled versions for longer timeframes
price_value_targets = torch.zeros((actual_batch_size, 4), device=self.device)
price_value_targets[:, 0] = immediate_changes
price_value_targets[:, 1] = immediate_changes * 2.0 # Approximate 1h change
price_value_targets[:, 2] = immediate_changes * 4.0 # Approximate 1d change
price_value_targets[:, 3] = immediate_changes * 6.0 # Approximate 1w change
# Calculate loss for price direction prediction (classification)
if len(current_price_pred['immediate'].shape) > 1 and current_price_pred['immediate'].shape[0] >= actual_batch_size:
# Slice predictions to match the adjusted batch size
immediate_pred = current_price_pred['immediate'][:actual_batch_size]
midterm_pred = current_price_pred['midterm'][:actual_batch_size]
longterm_pred = current_price_pred['longterm'][:actual_batch_size]
price_values_pred = current_price_pred['values'][:actual_batch_size]
# Compute losses for each task
immediate_loss = nn.CrossEntropyLoss()(immediate_pred, immediate_labels)
midterm_loss = nn.CrossEntropyLoss()(midterm_pred, midterm_labels)
longterm_loss = nn.CrossEntropyLoss()(longterm_pred, longterm_labels)
# MSE loss for price value regression
price_value_loss = nn.MSELoss()(price_values_pred, price_value_targets)
# Combine all price prediction losses
price_loss = immediate_loss + 0.7 * midterm_loss + 0.5 * longterm_loss + 0.3 * price_value_loss
# Create extrema labels (same as before)
extrema_labels = torch.ones(actual_batch_size, dtype=torch.long, device=self.device) * 2 # Default: neither
# Identify potential bottoms (significant negative change)
bottoms = (immediate_changes < -0.003)
extrema_labels[bottoms] = 0
# Identify potential tops (significant positive change)
tops = (immediate_changes > 0.003)
extrema_labels[tops] = 1
# Calculate extrema prediction loss
if len(current_extrema_pred.shape) > 1 and current_extrema_pred.shape[0] >= actual_batch_size:
current_extrema_pred = current_extrema_pred[:actual_batch_size]
extrema_loss = nn.CrossEntropyLoss()(current_extrema_pred, extrema_labels)
# Combined loss with all components
# Primary task: Q-value learning (RL objective)
# Secondary tasks: extrema detection and price prediction (supervised objectives)
loss = q_loss + 0.3 * extrema_loss + 0.3 * price_loss
# Log loss components occasionally
if random.random() < 0.01: # Log 1% of the time
logger.info(
f"Mixed precision losses: Q-loss={q_loss.item():.4f}, "
f"Extrema-loss={extrema_loss.item():.4f}, "
f"Price-loss={price_loss.item():.4f}"
)
except Exception as e:
# Fallback if price extraction fails
logger.warning(f"Failed to calculate price prediction loss: {str(e)}. Using only Q-value loss.")
# Just use Q-value loss
loss = q_loss
# Backward pass with scaled gradients
self.scaler.scale(loss).backward()
# Gradient clipping on scaled gradients
self.scaler.unscale_(self.optimizer)
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
# Update with scaler
self.scaler.step(self.optimizer)
self.scaler.update()
# Update target network if needed
self.update_count += 1
if self.update_count % self.target_update == 0:
self.target_net.load_state_dict(self.policy_net.state_dict())
# Track and decay epsilon
self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
return loss.item()
except Exception as e:
logger.error(f"Error in mixed precision training: {str(e)}")
logger.warning("Falling back to standard precision training")
@ -1565,6 +1579,14 @@ class DQNAgent:
try:
# If state is already a numpy array, return it
if isinstance(state, np.ndarray):
# Check for empty array
if state.size == 0:
logger.warning("Received empty numpy array state. Using fallback dimensions.")
expected_size = getattr(self, 'state_size', getattr(self, 'state_dim', 403))
if isinstance(expected_size, tuple):
expected_size = np.prod(expected_size)
return np.zeros(int(expected_size), dtype=np.float32)
# Check for non-numeric data and handle it
if state.dtype == object:
# Convert object array to float array
@ -1581,6 +1603,14 @@ class DQNAgent:
# If state is a list or tuple, convert to array
elif isinstance(state, (list, tuple)):
# Check for empty list/tuple
if len(state) == 0:
logger.warning("Received empty list/tuple state. Using fallback dimensions.")
expected_size = getattr(self, 'state_size', getattr(self, 'state_dim', 403))
if isinstance(expected_size, tuple):
expected_size = np.prod(expected_size)
return np.zeros(int(expected_size), dtype=np.float32)
# Recursively sanitize each element
sanitized = []
for item in state:
@ -1591,7 +1621,18 @@ class DQNAgent:
sanitized.append(sanitized_row)
else:
sanitized.append(self._extract_numeric_value(item))
return np.array(sanitized, dtype=np.float32)
result = np.array(sanitized, dtype=np.float32)
# Check if result is empty and provide fallback
if result.size == 0:
logger.warning("Sanitized state resulted in empty array. Using fallback dimensions.")
expected_size = getattr(self, 'state_size', getattr(self, 'state_dim', 403))
if isinstance(expected_size, tuple):
expected_size = np.prod(expected_size)
return np.zeros(int(expected_size), dtype=np.float32)
return result
# If state is a dict, try to extract values
elif isinstance(state, dict):

View File

@ -373,6 +373,12 @@ class EnhancedCNN(nn.Module):
def _check_rebuild_network(self, features):
"""Check if network needs to be rebuilt for different feature dimensions"""
# Prevent rebuilding with zero or invalid dimensions
if features <= 0:
logger.error(f"Invalid feature dimension: {features}. Cannot rebuild network with zero or negative dimensions.")
logger.error(f"Current feature_dim: {self.feature_dim}. Keeping existing network.")
return False
if features != self.feature_dim:
logger.info(f"Rebuilding network for new feature dimension: {features} (was {self.feature_dim})")
self.feature_dim = features
@ -386,6 +392,20 @@ class EnhancedCNN(nn.Module):
"""Forward pass through the ULTRA MASSIVE network"""
batch_size = x.size(0)
# Validate input dimensions to prevent zero-element tensor issues
if x.numel() == 0:
logger.error(f"Forward pass received empty tensor with shape {x.shape}")
# Return default output to prevent crash
default_output = torch.zeros(batch_size, self.n_actions, device=x.device)
return default_output
# Check for zero feature dimensions
if len(x.shape) > 1 and any(dim == 0 for dim in x.shape[1:]):
logger.error(f"Forward pass received tensor with zero feature dimensions: {x.shape}")
# Return default output to prevent crash
default_output = torch.zeros(batch_size, self.n_actions, device=x.device)
return default_output
# Process different input shapes
if len(x.shape) > 2:
# Handle 4D input [batch, timeframes, window, features] or 3D input [batch, timeframes, features]