predict price direction

This commit is contained in:
Dobromir Popov
2025-07-27 23:20:47 +03:00
parent dfa18035f1
commit 39267697f3
4 changed files with 572 additions and 101 deletions

View File

@ -72,8 +72,10 @@ Based on the existing implementation in `core/data_provider.py`, we'll enhance i
- OHCLV: 300 frames of (1s, 1m, 1h, 1d) ETH + 300s of 1s BTC - OHCLV: 300 frames of (1s, 1m, 1h, 1d) ETH + 300s of 1s BTC
- COB: for each 1s OHCLV we have +- 20 buckets of COB ammounts in USD - COB: for each 1s OHCLV we have +- 20 buckets of COB ammounts in USD
- 1,5,15 and 60s MA of the COB imbalance counting +- 5 COB buckets - 1,5,15 and 60s MA of the COB imbalance counting +- 5 COB buckets
- ***OUTPUTS***: suggested trade action (BUY/SELL) - ***OUTPUTS***:
- suggested trade action (BUY/SELL/HOLD). Paired with confidence
- immediate price movement drection vector (-1: vertical down, 1: vertical up, 0: horizontal) - linear; with it's own confidence
# Standardized input for all models: # Standardized input for all models:
{ {
'primary_symbol': 'ETH/USDT', 'primary_symbol': 'ETH/USDT',

View File

@ -4,7 +4,7 @@ import torch.optim as optim
import numpy as np import numpy as np
from collections import deque from collections import deque
import random import random
from typing import Tuple, List from typing import Tuple, List, Dict, Any
import os import os
import sys import sys
import logging import logging
@ -84,8 +84,8 @@ class DQNNetwork(nn.Module):
nn.Linear(512, 4) # trending, ranging, volatile, mixed nn.Linear(512, 4) # trending, ranging, volatile, mixed
) )
# Price prediction head # Price direction prediction head - outputs direction and confidence
self.price_head = nn.Sequential( self.price_direction_head = nn.Sequential(
nn.Linear(2048, 1024), nn.Linear(2048, 1024),
nn.LayerNorm(1024), nn.LayerNorm(1024),
nn.ReLU(inplace=True), nn.ReLU(inplace=True),
@ -93,9 +93,14 @@ class DQNNetwork(nn.Module):
nn.Linear(1024, 512), nn.Linear(1024, 512),
nn.LayerNorm(512), nn.LayerNorm(512),
nn.ReLU(inplace=True), nn.ReLU(inplace=True),
nn.Linear(512, 3) # short, medium, long term price direction nn.Linear(512, 2) # [direction, confidence]
) )
# Direction activation (tanh for -1 to 1)
self.direction_activation = nn.Tanh()
# Confidence activation (sigmoid for 0 to 1)
self.confidence_activation = nn.Sigmoid()
# Volatility prediction head # Volatility prediction head
self.volatility_head = nn.Sequential( self.volatility_head = nn.Sequential(
nn.Linear(2048, 1024), nn.Linear(2048, 1024),
@ -105,7 +110,7 @@ class DQNNetwork(nn.Module):
nn.Linear(1024, 256), nn.Linear(1024, 256),
nn.LayerNorm(256), nn.LayerNorm(256),
nn.ReLU(inplace=True), nn.ReLU(inplace=True),
nn.Linear(256, 1) # predicted volatility nn.Linear(256, 4) # predicted volatility for 4 timeframes
) )
# Main Q-value head (dueling architecture) # Main Q-value head (dueling architecture)
@ -162,7 +167,13 @@ class DQNNetwork(nn.Module):
# Multiple prediction heads # Multiple prediction heads
regime_pred = self.regime_head(features) regime_pred = self.regime_head(features)
price_pred = self.price_head(features) price_direction_raw = self.price_direction_head(features)
# Apply separate activations to direction and confidence
direction = self.direction_activation(price_direction_raw[:, 0:1]) # -1 to 1
confidence = self.confidence_activation(price_direction_raw[:, 1:2]) # 0 to 1
price_direction_pred = torch.cat([direction, confidence], dim=1) # [batch, 2]
volatility_pred = self.volatility_head(features) volatility_pred = self.volatility_head(features)
# Dueling Q-network # Dueling Q-network
@ -172,7 +183,7 @@ class DQNNetwork(nn.Module):
# Combine value and advantage for Q-values # Combine value and advantage for Q-values
q_values = value + advantage - advantage.mean(dim=1, keepdim=True) q_values = value + advantage - advantage.mean(dim=1, keepdim=True)
return q_values, regime_pred, price_pred, volatility_pred, features return q_values, regime_pred, price_direction_pred, volatility_pred, features
def act(self, state, explore=True): def act(self, state, explore=True):
""" """
@ -196,7 +207,11 @@ class DQNNetwork(nn.Module):
state = state.unsqueeze(0) state = state.unsqueeze(0)
with torch.no_grad(): with torch.no_grad():
q_values, regime_pred, price_pred, volatility_pred, features = self.forward(state) q_values, regime_pred, price_direction_pred, volatility_pred, features = self.forward(state)
# Process price direction predictions
if price_direction_pred is not None:
self.process_price_direction_predictions(price_direction_pred)
# Get action probabilities using softmax # Get action probabilities using softmax
action_probs = F.softmax(q_values, dim=1) action_probs = F.softmax(q_values, dim=1)
@ -332,23 +347,10 @@ class DQNAgent:
self.recent_prices = deque(maxlen=20) self.recent_prices = deque(maxlen=20)
self.recent_rewards = deque(maxlen=100) self.recent_rewards = deque(maxlen=100)
# Price prediction tracking # Price direction tracking - stores direction and confidence
self.last_price_pred = { self.last_price_direction = {
'immediate': { 'direction': 0.0, # Single value between -1 and 1
'direction': 1, # Default to "sideways" 'confidence': 0.0 # Single value between 0 and 1
'confidence': 0.0,
'change': 0.0
},
'midterm': {
'direction': 1, # Default to "sideways"
'confidence': 0.0,
'change': 0.0
},
'longterm': {
'direction': 1, # Default to "sideways"
'confidence': 0.0,
'change': 0.0
}
} }
# Store separate memory for price direction examples # Store separate memory for price direction examples
@ -521,25 +523,6 @@ class DQNAgent:
logger.error(f"Error saving DQN checkpoint: {e}") logger.error(f"Error saving DQN checkpoint: {e}")
return False return False
# Price prediction tracking
self.last_price_pred = {
'immediate': {
'direction': 1, # Default to "sideways"
'confidence': 0.0,
'change': 0.0
},
'midterm': {
'direction': 1, # Default to "sideways"
'confidence': 0.0,
'change': 0.0
},
'longterm': {
'direction': 1, # Default to "sideways"
'confidence': 0.0,
'change': 0.0
}
}
# Store separate memory for price direction examples # Store separate memory for price direction examples
self.price_movement_memory = [] # For storing examples of clear price movements self.price_movement_memory = [] # For storing examples of clear price movements
@ -811,6 +794,92 @@ class DQNAgent:
logger.error(f"Error in act_with_confidence: {e}") logger.error(f"Error in act_with_confidence: {e}")
# Return default action with low confidence # Return default action with low confidence
return 1, 0.1, [0.45, 0.55] # Default to HOLD action return 1, 0.1, [0.45, 0.55] # Default to HOLD action
def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]:
"""
Process price direction predictions and convert to standardized format
Args:
price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence]
Returns:
Dict with direction (-1 to 1) and confidence (0 to 1)
"""
try:
if price_direction_pred is None or price_direction_pred.numel() == 0:
return self.last_price_direction
# Extract direction and confidence values
direction_value = float(price_direction_pred[0, 0].item()) # -1 to 1
confidence_value = float(price_direction_pred[0, 1].item()) # 0 to 1
# Update last price direction
self.last_price_direction = {
'direction': direction_value,
'confidence': confidence_value
}
return self.last_price_direction
except Exception as e:
logger.error(f"Error processing price direction predictions: {e}")
return self.last_price_direction
def get_price_direction_vector(self) -> Dict[str, float]:
"""
Get the current price direction and confidence
Returns:
Dict with direction (-1 to 1) and confidence (0 to 1)
"""
return self.last_price_direction
def get_price_direction_summary(self) -> Dict[str, Any]:
"""
Get a summary of price direction prediction
Returns:
Dict containing direction and confidence information
"""
try:
direction_value = self.last_price_direction['direction']
confidence_value = self.last_price_direction['confidence']
# Convert to discrete direction
if direction_value > 0.1:
direction_label = "UP"
discrete_direction = 1
elif direction_value < -0.1:
direction_label = "DOWN"
discrete_direction = -1
else:
direction_label = "SIDEWAYS"
discrete_direction = 0
return {
'direction_value': float(direction_value),
'confidence_value': float(confidence_value),
'direction_label': direction_label,
'discrete_direction': discrete_direction,
'strength': abs(float(direction_value)),
'weighted_strength': abs(float(direction_value)) * float(confidence_value)
}
except Exception as e:
logger.error(f"Error calculating price direction summary: {e}")
return {
'direction_value': 0.0,
'confidence_value': 0.0,
'direction_label': "SIDEWAYS",
'discrete_direction': 0,
'strength': 0.0,
'weighted_strength': 0.0
}
except Exception as e:
logger.error(f"Error in act_with_confidence: {e}")
# Return default action with low confidence
return 1, 0.1, [0.45, 0.55] # Default to HOLD action
def _determine_action_with_position_management(self, sell_conf, buy_conf, current_price, market_context, explore): def _determine_action_with_position_management(self, sell_conf, buy_conf, current_price, market_context, explore):
""" """
@ -1032,11 +1101,8 @@ class DQNAgent:
logger.error(f"Error converting experiences to tensors: {e}") logger.error(f"Error converting experiences to tensors: {e}")
return 0.0 return 0.0
# Choose training method based on precision mode # Always use standard training to fix gradient issues
if self.use_mixed_precision: loss = self._replay_standard(states, actions, rewards, next_states, dones)
loss = self._replay_mixed_precision(states, actions, rewards, next_states, dones)
else:
loss = self._replay_standard(states, actions, rewards, next_states, dones)
# Update epsilon # Update epsilon
if self.epsilon > self.epsilon_min: if self.epsilon > self.epsilon_min:
@ -1208,9 +1274,33 @@ class DQNAgent:
q_loss = self.criterion(current_q_values, target_q_values.detach()) q_loss = self.criterion(current_q_values, target_q_values.detach())
# Use only Q-loss for now to ensure clean gradients # Calculate auxiliary losses and add to Q-loss
total_loss = q_loss total_loss = q_loss
# Add auxiliary losses if available
try:
# Get additional predictions from forward pass
if isinstance(q_values_output, tuple) and len(q_values_output) >= 5:
current_regime_pred = q_values_output[1]
current_price_pred = q_values_output[2]
current_volatility_pred = q_values_output[3]
current_extrema_pred = current_regime_pred # Use regime as extrema proxy for now
# Price direction loss
if current_price_pred is not None and current_price_pred.shape[0] > 0:
price_direction_loss = self._calculate_price_direction_loss(current_price_pred, rewards, actions)
if price_direction_loss is not None:
total_loss = total_loss + 0.2 * price_direction_loss
# Extrema loss
if current_extrema_pred is not None and current_extrema_pred.shape[0] > 0:
extrema_loss = self._calculate_extrema_loss(current_extrema_pred, rewards, actions)
if extrema_loss is not None:
total_loss = total_loss + 0.1 * extrema_loss
except Exception as e:
logger.debug(f"Could not add auxiliary loss in standard training: {e}")
# Reset gradients # Reset gradients
self.optimizer.zero_grad() self.optimizer.zero_grad()
@ -1309,13 +1399,17 @@ class DQNAgent:
# Add auxiliary losses if available # Add auxiliary losses if available
try: try:
# Price direction loss
if current_price_pred is not None and current_price_pred.shape[0] > 0:
price_direction_loss = self._calculate_price_direction_loss(current_price_pred, rewards, actions)
if price_direction_loss is not None:
loss = loss + 0.2 * price_direction_loss
# Extrema loss
if current_extrema_pred is not None and current_extrema_pred.shape[0] > 0: if current_extrema_pred is not None and current_extrema_pred.shape[0] > 0:
# Simple extrema targets extrema_loss = self._calculate_extrema_loss(current_extrema_pred, rewards, actions)
with torch.no_grad(): if extrema_loss is not None:
extrema_targets = torch.ones(current_extrema_pred.shape[0], dtype=torch.long, device=current_extrema_pred.device) * 2 loss = loss + 0.1 * extrema_loss
extrema_loss = F.cross_entropy(current_extrema_pred, extrema_targets)
loss = loss + 0.1 * extrema_loss
except Exception as e: except Exception as e:
logger.debug(f"Could not add auxiliary loss in mixed precision: {e}") logger.debug(f"Could not add auxiliary loss in mixed precision: {e}")
@ -1649,6 +1743,95 @@ class DQNAgent:
'exit_threshold': self.exit_confidence_threshold 'exit_threshold': self.exit_confidence_threshold
} }
def _calculate_price_direction_loss(self, price_direction_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
"""
Calculate loss for price direction predictions
Args:
price_direction_pred: Tensor of shape [batch, 2] containing [direction, confidence]
rewards: Tensor of shape [batch] containing rewards
actions: Tensor of shape [batch] containing actions
Returns:
Price direction loss tensor
"""
try:
if price_direction_pred.size(1) != 2:
return None
batch_size = price_direction_pred.size(0)
# Extract direction and confidence predictions
direction_pred = price_direction_pred[:, 0] # -1 to 1
confidence_pred = price_direction_pred[:, 1] # 0 to 1
# Create targets based on rewards and actions
with torch.no_grad():
# Direction targets: 1 if reward > 0 and action is BUY, -1 if reward > 0 and action is SELL, 0 otherwise
direction_targets = torch.zeros(batch_size, device=price_direction_pred.device)
for i in range(batch_size):
if rewards[i] > 0.01: # Positive reward threshold
if actions[i] == 0: # BUY action
direction_targets[i] = 1.0 # UP
elif actions[i] == 1: # SELL action
direction_targets[i] = -1.0 # DOWN
# else: targets remain 0 (sideways)
# Confidence targets: based on reward magnitude (higher reward = higher confidence)
confidence_targets = torch.abs(rewards).clamp(0, 1)
# Calculate losses for each component
direction_loss = F.mse_loss(direction_pred, direction_targets)
confidence_loss = F.mse_loss(confidence_pred, confidence_targets)
# Combined loss (direction is more important than confidence)
total_loss = direction_loss + 0.3 * confidence_loss
return total_loss
except Exception as e:
logger.debug(f"Error calculating price direction loss: {e}")
return None
def _calculate_extrema_loss(self, extrema_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
"""
Calculate loss for extrema predictions
Args:
extrema_pred: Extrema predictions
rewards: Tensor containing rewards
actions: Tensor containing actions
Returns:
Extrema loss tensor
"""
try:
batch_size = extrema_pred.size(0)
# Create targets based on reward patterns
with torch.no_grad():
extrema_targets = torch.ones(batch_size, dtype=torch.long, device=extrema_pred.device) * 2 # Default to "neither"
for i in range(batch_size):
# High positive reward suggests we're at a good entry point (potential bottom for BUY, top for SELL)
if rewards[i] > 0.05:
if actions[i] == 0: # BUY action
extrema_targets[i] = 0 # Bottom
elif actions[i] == 1: # SELL action
extrema_targets[i] = 1 # Top
# Calculate cross-entropy loss
if extrema_pred.size(1) >= 3:
extrema_loss = F.cross_entropy(extrema_pred[:, :3], extrema_targets)
else:
extrema_loss = F.cross_entropy(extrema_pred, extrema_targets)
return extrema_loss
except Exception as e:
logger.debug(f"Error calculating extrema loss: {e}")
return None
def get_enhanced_training_stats(self): def get_enhanced_training_stats(self):
"""Get enhanced RL training statistics with detailed metrics (from EnhancedDQNAgent)""" """Get enhanced RL training statistics with detailed metrics (from EnhancedDQNAgent)"""
return { return {

View File

@ -265,8 +265,9 @@ class EnhancedCNN(nn.Module):
nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither
) )
# ULTRA MASSIVE multi-timeframe price prediction heads # ULTRA MASSIVE price direction prediction head
self.price_pred_immediate = nn.Sequential( # Outputs single direction and confidence values
self.price_direction_head = nn.Sequential(
nn.Linear(1024, 1024), # Increased from 512 nn.Linear(1024, 1024), # Increased from 512
nn.ReLU(), nn.ReLU(),
nn.Dropout(0.3), nn.Dropout(0.3),
@ -275,32 +276,13 @@ class EnhancedCNN(nn.Module):
nn.Dropout(0.3), nn.Dropout(0.3),
nn.Linear(512, 256), # Increased from 128 nn.Linear(512, 256), # Increased from 128
nn.ReLU(), nn.ReLU(),
nn.Linear(256, 3) # Up, Down, Sideways nn.Linear(256, 2) # [direction, confidence]
) )
self.price_pred_midterm = nn.Sequential( # Direction activation (tanh for -1 to 1)
nn.Linear(1024, 1024), # Increased from 512 self.direction_activation = nn.Tanh()
nn.ReLU(), # Confidence activation (sigmoid for 0 to 1)
nn.Dropout(0.3), self.confidence_activation = nn.Sigmoid()
nn.Linear(1024, 512), # Increased from 256
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256), # Increased from 128
nn.ReLU(),
nn.Linear(256, 3) # Up, Down, Sideways
)
self.price_pred_longterm = nn.Sequential(
nn.Linear(1024, 1024), # Increased from 512
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(1024, 512), # Increased from 256
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256), # Increased from 128
nn.ReLU(),
nn.Linear(256, 3) # Up, Down, Sideways
)
# ULTRA MASSIVE value prediction with ensemble approaches # ULTRA MASSIVE value prediction with ensemble approaches
self.price_pred_value = nn.Sequential( self.price_pred_value = nn.Sequential(
@ -490,10 +472,14 @@ class EnhancedCNN(nn.Module):
# Extrema predictions (bottom/top/neither detection) # Extrema predictions (bottom/top/neither detection)
extrema_pred = self.extrema_head(features_refined) extrema_pred = self.extrema_head(features_refined)
# Multi-timeframe price movement predictions # Price direction predictions
price_immediate = self.price_pred_immediate(features_refined) price_direction_raw = self.price_direction_head(features_refined)
price_midterm = self.price_pred_midterm(features_refined)
price_longterm = self.price_pred_longterm(features_refined) # Apply separate activations to direction and confidence
direction = self.direction_activation(price_direction_raw[:, 0:1]) # -1 to 1
confidence = self.confidence_activation(price_direction_raw[:, 1:2]) # 0 to 1
price_direction_pred = torch.cat([direction, confidence], dim=1) # [batch, 2]
price_values = self.price_pred_value(features_refined) price_values = self.price_pred_value(features_refined)
# Additional specialized predictions for enhanced accuracy # Additional specialized predictions for enhanced accuracy
@ -502,15 +488,14 @@ class EnhancedCNN(nn.Module):
market_regime_pred = self.market_regime_head(features_refined) market_regime_pred = self.market_regime_head(features_refined)
risk_pred = self.risk_head(features_refined) risk_pred = self.risk_head(features_refined)
# Package all price predictions into a single tensor (use immediate as primary) # Use the price direction prediction directly (already [batch, 2])
# For compatibility with DQN agent, we return price_immediate as the price prediction tensor price_direction_tensor = price_direction_pred
price_pred_tensor = price_immediate
# Package additional predictions into a single tensor (use volatility as primary) # Package additional predictions into a single tensor (use volatility as primary)
# For compatibility with DQN agent, we return volatility_pred as the advanced prediction tensor # For compatibility with DQN agent, we return volatility_pred as the advanced prediction tensor
advanced_pred_tensor = volatility_pred advanced_pred_tensor = volatility_pred
return q_values, extrema_pred, price_pred_tensor, features_refined, advanced_pred_tensor return q_values, extrema_pred, price_direction_tensor, features_refined, advanced_pred_tensor
def act(self, state, explore=True) -> Tuple[int, float, List[float]]: def act(self, state, explore=True) -> Tuple[int, float, List[float]]:
"""Enhanced action selection with ultra massive model predictions""" """Enhanced action selection with ultra massive model predictions"""
@ -528,7 +513,11 @@ class EnhancedCNN(nn.Module):
state_tensor = state_tensor.unsqueeze(0) state_tensor = state_tensor.unsqueeze(0)
with torch.no_grad(): with torch.no_grad():
q_values, extrema_pred, price_predictions, features, advanced_predictions = self(state_tensor) q_values, extrema_pred, price_direction_predictions, features, advanced_predictions = self(state_tensor)
# Process price direction predictions
if price_direction_predictions is not None:
self.process_price_direction_predictions(price_direction_predictions)
# Apply softmax to get action probabilities # Apply softmax to get action probabilities
action_probs_tensor = torch.softmax(q_values, dim=1) action_probs_tensor = torch.softmax(q_values, dim=1)
@ -565,6 +554,100 @@ class EnhancedCNN(nn.Module):
logger.info(f" Risk Level: {risk_labels[risk_class]} ({risk[risk_class]:.3f})") logger.info(f" Risk Level: {risk_labels[risk_class]} ({risk[risk_class]:.3f})")
return action_idx, confidence, action_probs return action_idx, confidence, action_probs
def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]:
"""
Process price direction predictions and convert to standardized format
Args:
price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence]
Returns:
Dict with direction (-1 to 1) and confidence (0 to 1)
"""
try:
if price_direction_pred is None or price_direction_pred.numel() == 0:
return {}
# Extract direction and confidence values
direction_value = float(price_direction_pred[0, 0].item()) # -1 to 1
confidence_value = float(price_direction_pred[0, 1].item()) # 0 to 1
processed_directions = {
'direction': direction_value,
'confidence': confidence_value
}
# Store for later access
self.last_price_direction = processed_directions
return processed_directions
except Exception as e:
logger.error(f"Error processing price direction predictions: {e}")
return {}
def get_price_direction_vector(self) -> Dict[str, float]:
"""
Get the current price direction and confidence
Returns:
Dict with direction (-1 to 1) and confidence (0 to 1)
"""
return getattr(self, 'last_price_direction', {})
def get_price_direction_summary(self) -> Dict[str, Any]:
"""
Get a summary of price direction prediction
Returns:
Dict containing direction and confidence information
"""
try:
last_direction = getattr(self, 'last_price_direction', {})
if not last_direction:
return {
'direction_value': 0.0,
'confidence_value': 0.0,
'direction_label': "SIDEWAYS",
'discrete_direction': 0,
'strength': 0.0,
'weighted_strength': 0.0
}
direction_value = last_direction['direction']
confidence_value = last_direction['confidence']
# Convert to discrete direction
if direction_value > 0.1:
direction_label = "UP"
discrete_direction = 1
elif direction_value < -0.1:
direction_label = "DOWN"
discrete_direction = -1
else:
direction_label = "SIDEWAYS"
discrete_direction = 0
return {
'direction_value': float(direction_value),
'confidence_value': float(confidence_value),
'direction_label': direction_label,
'discrete_direction': discrete_direction,
'strength': abs(float(direction_value)),
'weighted_strength': abs(float(direction_value)) * float(confidence_value)
}
except Exception as e:
logger.error(f"Error calculating price direction summary: {e}")
return {
'direction_value': 0.0,
'confidence_value': 0.0,
'direction_label': "SIDEWAYS",
'discrete_direction': 0,
'strength': 0.0,
'weighted_strength': 0.0
}
def save(self, path): def save(self, path):
"""Save model weights and architecture""" """Save model weights and architecture"""

View File

@ -719,6 +719,95 @@ class TradingOrchestrator:
except Exception as e: except Exception as e:
logger.error(f"Error initializing ML models: {e}") logger.error(f"Error initializing ML models: {e}")
def _calculate_cnn_price_direction_loss(self, price_direction_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
"""
Calculate price direction loss for CNN model
Args:
price_direction_pred: Tensor of shape [batch, 2] containing [direction, confidence]
rewards: Tensor of shape [batch] containing rewards
actions: Tensor of shape [batch] containing actions
Returns:
Price direction loss tensor
"""
try:
if price_direction_pred.size(1) != 2:
return None
batch_size = price_direction_pred.size(0)
# Extract direction and confidence predictions
direction_pred = price_direction_pred[:, 0] # -1 to 1
confidence_pred = price_direction_pred[:, 1] # 0 to 1
# Create targets based on rewards and actions
with torch.no_grad():
# Direction targets: 1 if reward > 0 and action is BUY, -1 if reward > 0 and action is SELL, 0 otherwise
direction_targets = torch.zeros(batch_size, device=price_direction_pred.device)
for i in range(batch_size):
if rewards[i] > 0.01: # Positive reward threshold
if actions[i] == 0: # BUY action
direction_targets[i] = 1.0 # UP
elif actions[i] == 1: # SELL action
direction_targets[i] = -1.0 # DOWN
# else: targets remain 0 (sideways)
# Confidence targets: based on reward magnitude (higher reward = higher confidence)
confidence_targets = torch.abs(rewards).clamp(0, 1)
# Calculate losses for each component
direction_loss = nn.MSELoss()(direction_pred, direction_targets)
confidence_loss = nn.MSELoss()(confidence_pred, confidence_targets)
# Combined loss (direction is more important than confidence)
total_loss = direction_loss + 0.3 * confidence_loss
return total_loss
except Exception as e:
logger.debug(f"Error calculating CNN price direction loss: {e}")
return None
def _calculate_cnn_extrema_loss(self, extrema_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
"""
Calculate extrema loss for CNN model
Args:
extrema_pred: Extrema predictions
rewards: Tensor containing rewards
actions: Tensor containing actions
Returns:
Extrema loss tensor
"""
try:
batch_size = extrema_pred.size(0)
# Create targets based on reward patterns
with torch.no_grad():
extrema_targets = torch.ones(batch_size, dtype=torch.long, device=extrema_pred.device) * 2 # Default to "neither"
for i in range(batch_size):
# High positive reward suggests we're at a good entry point
if rewards[i] > 0.05:
if actions[i] == 0: # BUY action
extrema_targets[i] = 0 # Bottom
elif actions[i] == 1: # SELL action
extrema_targets[i] = 1 # Top
# Calculate cross-entropy loss
if extrema_pred.size(1) >= 3:
extrema_loss = nn.CrossEntropyLoss()(extrema_pred[:, :3], extrema_targets)
else:
extrema_loss = nn.CrossEntropyLoss()(extrema_pred, extrema_targets)
return extrema_loss
except Exception as e:
logger.debug(f"Error calculating CNN extrema loss: {e}")
return None
def update_model_loss(self, model_name: str, current_loss: float, best_loss: Optional[float] = None): def update_model_loss(self, model_name: str, current_loss: float, best_loss: Optional[float] = None):
"""Update model loss and potentially best loss""" """Update model loss and potentially best loss"""
if model_name in self.model_states: if model_name in self.model_states:
@ -1938,7 +2027,71 @@ class TradingOrchestrator:
# Evaluate the previous prediction and train the model immediately # Evaluate the previous prediction and train the model immediately
await self._evaluate_and_train_on_record(inference_record, current_price) await self._evaluate_and_train_on_record(inference_record, current_price)
logger.info(f"Completed immediate training for {model_name}") # Log predicted vs actual outcome
prediction = inference_record.get('prediction', {})
predicted_action = prediction.get('action', 'UNKNOWN')
predicted_confidence = prediction.get('confidence', 0.0)
# Calculate actual outcome
symbol = inference_record.get('symbol', 'ETH/USDT')
predicted_price = None
actual_price_change_pct = 0.0
# Try to get price direction vectors from metadata (new format)
if 'price_direction' in prediction and prediction['price_direction']:
try:
price_direction_data = prediction['price_direction']
# Process price direction data
if isinstance(price_direction_data, dict) and 'direction' in price_direction_data:
direction = price_direction_data['direction']
confidence = price_direction_data.get('confidence', 1.0)
# Convert direction to price change percentage
# Scale by confidence and direction strength
predicted_price_change_pct = direction * confidence * 0.02 # 2% max change
predicted_price = current_price * (1 + predicted_price_change_pct)
except Exception as e:
logger.debug(f"Error processing price direction data: {e}")
# Fallback to old price prediction format
elif 'price_prediction' in prediction and prediction['price_prediction']:
try:
price_prediction_data = prediction['price_prediction']
if isinstance(price_prediction_data, list) and len(price_prediction_data) > 0:
predicted_price_change_pct = float(price_prediction_data[0]) * 0.01
predicted_price = current_price * (1 + predicted_price_change_pct)
except Exception:
pass
# Calculate price change
if predicted_price is not None:
actual_price_change_pct = (current_price - predicted_price) / predicted_price * 100
price_outcome = f"Predicted: ${predicted_price:.2f} -> Actual: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)"
else:
# Fall back to historical price comparison
historical_data = self.data_provider.get_historical_data(symbol, '1m', limit=10)
if historical_data is not None and not historical_data.empty:
historical_price = historical_data['close'].iloc[-1]
actual_price_change_pct = (current_price - historical_price) / historical_price * 100
price_outcome = f"Historical: ${historical_price:.2f} -> Actual: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)"
else:
price_outcome = f"Actual: ${current_price:.2f}"
# Determine if prediction was correct based on action and price movement
was_correct = False
if predicted_action == 'BUY' and actual_price_change_pct > 0.1: # Price went up
was_correct = True
elif predicted_action == 'SELL' and actual_price_change_pct < -0.1: # Price went down
was_correct = True
elif predicted_action == 'HOLD' and abs(actual_price_change_pct) < 0.5: # Price stayed stable
was_correct = True
outcome_status = "✅ CORRECT" if was_correct else "❌ INCORRECT"
logger.info(f"Completed immediate training for {model_name} - {outcome_status}")
logger.info(f" Prediction: {predicted_action} ({predicted_confidence:.3f})")
logger.info(f" {price_outcome}")
logger.info(f" Outcome: {outcome_status}")
except Exception as e: except Exception as e:
logger.error(f"Error in immediate training for {model_name}: {e}") logger.error(f"Error in immediate training for {model_name}: {e}")
@ -2412,12 +2565,33 @@ class TradingOrchestrator:
self.cnn_optimizer.zero_grad() self.cnn_optimizer.zero_grad()
# Forward pass # Forward pass
q_values, extrema_pred, price_pred, features_refined, advanced_pred = self.cnn_model(features_tensor) q_values, extrema_pred, price_direction_pred, features_refined, advanced_pred = self.cnn_model(features_tensor)
# Calculate loss # Calculate primary Q-value loss
q_values_selected = q_values.gather(1, action_tensor.unsqueeze(1)).squeeze(1) q_values_selected = q_values.gather(1, action_tensor.unsqueeze(1)).squeeze(1)
target_q = reward_tensor # Simplified target target_q = reward_tensor # Simplified target
loss = nn.MSELoss()(q_values_selected, target_q) q_loss = nn.MSELoss()(q_values_selected, target_q)
# Calculate auxiliary losses for price direction and extrema
total_loss = q_loss
# Price direction loss
if price_direction_pred is not None and price_direction_pred.shape[0] > 0:
price_direction_loss = self._calculate_cnn_price_direction_loss(
price_direction_pred, reward_tensor, action_tensor
)
if price_direction_loss is not None:
total_loss = total_loss + 0.2 * price_direction_loss
# Extrema loss
if extrema_pred is not None and extrema_pred.shape[0] > 0:
extrema_loss = self._calculate_cnn_extrema_loss(
extrema_pred, reward_tensor, action_tensor
)
if extrema_loss is not None:
total_loss = total_loss + 0.1 * extrema_loss
loss = total_loss
# Backward pass # Backward pass
training_start_time = time.time() training_start_time = time.time()
@ -2640,9 +2814,17 @@ class TradingOrchestrator:
'HOLD': float(action_probs[0, 2].item()) 'HOLD': float(action_probs[0, 2].item())
} }
# Extract price predictions if available # Extract price direction predictions if available
price_prediction = None price_direction_data = None
if price_pred is not None: if price_pred is not None:
# Process price direction predictions
if hasattr(model.model, 'process_price_direction_predictions'):
try:
price_direction_data = model.model.process_price_direction_predictions(price_pred)
except Exception as e:
logger.debug(f"Error processing CNN price direction: {e}")
# Fallback to old format for compatibility
price_prediction = price_pred.squeeze(0).cpu().numpy().tolist() price_prediction = price_pred.squeeze(0).cpu().numpy().tolist()
prediction = Prediction( prediction = Prediction(
@ -2656,6 +2838,7 @@ class TradingOrchestrator:
'feature_size': len(base_data.get_feature_vector()), 'feature_size': len(base_data.get_feature_vector()),
'data_sources': ['ohlcv_1s', 'ohlcv_1m', 'ohlcv_1h', 'ohlcv_1d', 'btc', 'cob', 'indicators'], 'data_sources': ['ohlcv_1s', 'ohlcv_1m', 'ohlcv_1h', 'ohlcv_1d', 'btc', 'cob', 'indicators'],
'price_prediction': price_prediction, 'price_prediction': price_prediction,
'price_direction': price_direction_data,
'extrema_prediction': extrema_pred.squeeze(0).cpu().numpy().tolist() if extrema_pred is not None else None 'extrema_prediction': extrema_pred.squeeze(0).cpu().numpy().tolist() if extrema_pred is not None else None
} }
) )
@ -2694,6 +2877,14 @@ class TradingOrchestrator:
action_names = ['BUY', 'SELL', 'HOLD'] # Note: enhanced_cnn uses this order action_names = ['BUY', 'SELL', 'HOLD'] # Note: enhanced_cnn uses this order
best_action = action_names[action_idx] best_action = action_names[action_idx]
# Get price direction vectors from CNN model if available
price_direction_data = None
if hasattr(model.model, 'get_price_direction_vector'):
try:
price_direction_data = model.model.get_price_direction_vector()
except Exception as e:
logger.debug(f"Error getting price direction from CNN: {e}")
pred = Prediction( pred = Prediction(
action=best_action, action=best_action,
confidence=float(confidence), confidence=float(confidence),
@ -2708,7 +2899,8 @@ class TradingOrchestrator:
metadata={ metadata={
'feature_vector_size': len(feature_vector), 'feature_vector_size': len(feature_vector),
'unified_input': True, 'unified_input': True,
'fallback_method': 'direct_model_inference' 'fallback_method': 'direct_model_inference',
'price_direction': price_direction_data
} }
) )
predictions.append(pred) predictions.append(pred)
@ -2811,6 +3003,14 @@ class TradingOrchestrator:
if q_values_for_capture: if q_values_for_capture:
logger.warning(f"Q-values length mismatch: expected {len(action_names)}, got {len(q_values_for_capture)}. Using default probabilities.") logger.warning(f"Q-values length mismatch: expected {len(action_names)}, got {len(q_values_for_capture)}. Using default probabilities.")
# Get price direction vectors from DQN model if available
price_direction_data = None
if hasattr(model.model, 'get_price_direction_vector'):
try:
price_direction_data = model.model.get_price_direction_vector()
except Exception as e:
logger.debug(f"Error getting price direction from DQN: {e}")
prediction = Prediction( prediction = Prediction(
action=action, action=action,
confidence=float(confidence), confidence=float(confidence),
@ -2818,7 +3018,10 @@ class TradingOrchestrator:
timeframe='mixed', # RL uses mixed timeframes timeframe='mixed', # RL uses mixed timeframes
timestamp=datetime.now(), timestamp=datetime.now(),
model_name=model.name, model_name=model.name,
metadata={'state_size': len(state)} metadata={
'state_size': len(state),
'price_direction': price_direction_data
}
) )
# Capture DQN prediction for dashboard visualization # Capture DQN prediction for dashboard visualization