predict price direction

This commit is contained in:
Dobromir Popov
2025-07-27 23:20:47 +03:00
parent dfa18035f1
commit 39267697f3
4 changed files with 572 additions and 101 deletions

View File

@ -72,8 +72,10 @@ Based on the existing implementation in `core/data_provider.py`, we'll enhance i
- OHCLV: 300 frames of (1s, 1m, 1h, 1d) ETH + 300s of 1s BTC
- COB: for each 1s OHCLV we have +- 20 buckets of COB ammounts in USD
- 1,5,15 and 60s MA of the COB imbalance counting +- 5 COB buckets
- ***OUTPUTS***: suggested trade action (BUY/SELL)
- ***OUTPUTS***:
- suggested trade action (BUY/SELL/HOLD). Paired with confidence
- immediate price movement drection vector (-1: vertical down, 1: vertical up, 0: horizontal) - linear; with it's own confidence
# Standardized input for all models:
{
'primary_symbol': 'ETH/USDT',

View File

@ -4,7 +4,7 @@ import torch.optim as optim
import numpy as np
from collections import deque
import random
from typing import Tuple, List
from typing import Tuple, List, Dict, Any
import os
import sys
import logging
@ -84,8 +84,8 @@ class DQNNetwork(nn.Module):
nn.Linear(512, 4) # trending, ranging, volatile, mixed
)
# Price prediction head
self.price_head = nn.Sequential(
# Price direction prediction head - outputs direction and confidence
self.price_direction_head = nn.Sequential(
nn.Linear(2048, 1024),
nn.LayerNorm(1024),
nn.ReLU(inplace=True),
@ -93,9 +93,14 @@ class DQNNetwork(nn.Module):
nn.Linear(1024, 512),
nn.LayerNorm(512),
nn.ReLU(inplace=True),
nn.Linear(512, 3) # short, medium, long term price direction
nn.Linear(512, 2) # [direction, confidence]
)
# Direction activation (tanh for -1 to 1)
self.direction_activation = nn.Tanh()
# Confidence activation (sigmoid for 0 to 1)
self.confidence_activation = nn.Sigmoid()
# Volatility prediction head
self.volatility_head = nn.Sequential(
nn.Linear(2048, 1024),
@ -105,7 +110,7 @@ class DQNNetwork(nn.Module):
nn.Linear(1024, 256),
nn.LayerNorm(256),
nn.ReLU(inplace=True),
nn.Linear(256, 1) # predicted volatility
nn.Linear(256, 4) # predicted volatility for 4 timeframes
)
# Main Q-value head (dueling architecture)
@ -162,7 +167,13 @@ class DQNNetwork(nn.Module):
# Multiple prediction heads
regime_pred = self.regime_head(features)
price_pred = self.price_head(features)
price_direction_raw = self.price_direction_head(features)
# Apply separate activations to direction and confidence
direction = self.direction_activation(price_direction_raw[:, 0:1]) # -1 to 1
confidence = self.confidence_activation(price_direction_raw[:, 1:2]) # 0 to 1
price_direction_pred = torch.cat([direction, confidence], dim=1) # [batch, 2]
volatility_pred = self.volatility_head(features)
# Dueling Q-network
@ -172,7 +183,7 @@ class DQNNetwork(nn.Module):
# Combine value and advantage for Q-values
q_values = value + advantage - advantage.mean(dim=1, keepdim=True)
return q_values, regime_pred, price_pred, volatility_pred, features
return q_values, regime_pred, price_direction_pred, volatility_pred, features
def act(self, state, explore=True):
"""
@ -196,7 +207,11 @@ class DQNNetwork(nn.Module):
state = state.unsqueeze(0)
with torch.no_grad():
q_values, regime_pred, price_pred, volatility_pred, features = self.forward(state)
q_values, regime_pred, price_direction_pred, volatility_pred, features = self.forward(state)
# Process price direction predictions
if price_direction_pred is not None:
self.process_price_direction_predictions(price_direction_pred)
# Get action probabilities using softmax
action_probs = F.softmax(q_values, dim=1)
@ -332,23 +347,10 @@ class DQNAgent:
self.recent_prices = deque(maxlen=20)
self.recent_rewards = deque(maxlen=100)
# Price prediction tracking
self.last_price_pred = {
'immediate': {
'direction': 1, # Default to "sideways"
'confidence': 0.0,
'change': 0.0
},
'midterm': {
'direction': 1, # Default to "sideways"
'confidence': 0.0,
'change': 0.0
},
'longterm': {
'direction': 1, # Default to "sideways"
'confidence': 0.0,
'change': 0.0
}
# Price direction tracking - stores direction and confidence
self.last_price_direction = {
'direction': 0.0, # Single value between -1 and 1
'confidence': 0.0 # Single value between 0 and 1
}
# Store separate memory for price direction examples
@ -521,25 +523,6 @@ class DQNAgent:
logger.error(f"Error saving DQN checkpoint: {e}")
return False
# Price prediction tracking
self.last_price_pred = {
'immediate': {
'direction': 1, # Default to "sideways"
'confidence': 0.0,
'change': 0.0
},
'midterm': {
'direction': 1, # Default to "sideways"
'confidence': 0.0,
'change': 0.0
},
'longterm': {
'direction': 1, # Default to "sideways"
'confidence': 0.0,
'change': 0.0
}
}
# Store separate memory for price direction examples
self.price_movement_memory = [] # For storing examples of clear price movements
@ -811,6 +794,92 @@ class DQNAgent:
logger.error(f"Error in act_with_confidence: {e}")
# Return default action with low confidence
return 1, 0.1, [0.45, 0.55] # Default to HOLD action
def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]:
"""
Process price direction predictions and convert to standardized format
Args:
price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence]
Returns:
Dict with direction (-1 to 1) and confidence (0 to 1)
"""
try:
if price_direction_pred is None or price_direction_pred.numel() == 0:
return self.last_price_direction
# Extract direction and confidence values
direction_value = float(price_direction_pred[0, 0].item()) # -1 to 1
confidence_value = float(price_direction_pred[0, 1].item()) # 0 to 1
# Update last price direction
self.last_price_direction = {
'direction': direction_value,
'confidence': confidence_value
}
return self.last_price_direction
except Exception as e:
logger.error(f"Error processing price direction predictions: {e}")
return self.last_price_direction
def get_price_direction_vector(self) -> Dict[str, float]:
"""
Get the current price direction and confidence
Returns:
Dict with direction (-1 to 1) and confidence (0 to 1)
"""
return self.last_price_direction
def get_price_direction_summary(self) -> Dict[str, Any]:
"""
Get a summary of price direction prediction
Returns:
Dict containing direction and confidence information
"""
try:
direction_value = self.last_price_direction['direction']
confidence_value = self.last_price_direction['confidence']
# Convert to discrete direction
if direction_value > 0.1:
direction_label = "UP"
discrete_direction = 1
elif direction_value < -0.1:
direction_label = "DOWN"
discrete_direction = -1
else:
direction_label = "SIDEWAYS"
discrete_direction = 0
return {
'direction_value': float(direction_value),
'confidence_value': float(confidence_value),
'direction_label': direction_label,
'discrete_direction': discrete_direction,
'strength': abs(float(direction_value)),
'weighted_strength': abs(float(direction_value)) * float(confidence_value)
}
except Exception as e:
logger.error(f"Error calculating price direction summary: {e}")
return {
'direction_value': 0.0,
'confidence_value': 0.0,
'direction_label': "SIDEWAYS",
'discrete_direction': 0,
'strength': 0.0,
'weighted_strength': 0.0
}
except Exception as e:
logger.error(f"Error in act_with_confidence: {e}")
# Return default action with low confidence
return 1, 0.1, [0.45, 0.55] # Default to HOLD action
def _determine_action_with_position_management(self, sell_conf, buy_conf, current_price, market_context, explore):
"""
@ -1032,11 +1101,8 @@ class DQNAgent:
logger.error(f"Error converting experiences to tensors: {e}")
return 0.0
# Choose training method based on precision mode
if self.use_mixed_precision:
loss = self._replay_mixed_precision(states, actions, rewards, next_states, dones)
else:
loss = self._replay_standard(states, actions, rewards, next_states, dones)
# Always use standard training to fix gradient issues
loss = self._replay_standard(states, actions, rewards, next_states, dones)
# Update epsilon
if self.epsilon > self.epsilon_min:
@ -1208,9 +1274,33 @@ class DQNAgent:
q_loss = self.criterion(current_q_values, target_q_values.detach())
# Use only Q-loss for now to ensure clean gradients
# Calculate auxiliary losses and add to Q-loss
total_loss = q_loss
# Add auxiliary losses if available
try:
# Get additional predictions from forward pass
if isinstance(q_values_output, tuple) and len(q_values_output) >= 5:
current_regime_pred = q_values_output[1]
current_price_pred = q_values_output[2]
current_volatility_pred = q_values_output[3]
current_extrema_pred = current_regime_pred # Use regime as extrema proxy for now
# Price direction loss
if current_price_pred is not None and current_price_pred.shape[0] > 0:
price_direction_loss = self._calculate_price_direction_loss(current_price_pred, rewards, actions)
if price_direction_loss is not None:
total_loss = total_loss + 0.2 * price_direction_loss
# Extrema loss
if current_extrema_pred is not None and current_extrema_pred.shape[0] > 0:
extrema_loss = self._calculate_extrema_loss(current_extrema_pred, rewards, actions)
if extrema_loss is not None:
total_loss = total_loss + 0.1 * extrema_loss
except Exception as e:
logger.debug(f"Could not add auxiliary loss in standard training: {e}")
# Reset gradients
self.optimizer.zero_grad()
@ -1309,13 +1399,17 @@ class DQNAgent:
# Add auxiliary losses if available
try:
# Price direction loss
if current_price_pred is not None and current_price_pred.shape[0] > 0:
price_direction_loss = self._calculate_price_direction_loss(current_price_pred, rewards, actions)
if price_direction_loss is not None:
loss = loss + 0.2 * price_direction_loss
# Extrema loss
if current_extrema_pred is not None and current_extrema_pred.shape[0] > 0:
# Simple extrema targets
with torch.no_grad():
extrema_targets = torch.ones(current_extrema_pred.shape[0], dtype=torch.long, device=current_extrema_pred.device) * 2
extrema_loss = F.cross_entropy(current_extrema_pred, extrema_targets)
loss = loss + 0.1 * extrema_loss
extrema_loss = self._calculate_extrema_loss(current_extrema_pred, rewards, actions)
if extrema_loss is not None:
loss = loss + 0.1 * extrema_loss
except Exception as e:
logger.debug(f"Could not add auxiliary loss in mixed precision: {e}")
@ -1649,6 +1743,95 @@ class DQNAgent:
'exit_threshold': self.exit_confidence_threshold
}
def _calculate_price_direction_loss(self, price_direction_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
"""
Calculate loss for price direction predictions
Args:
price_direction_pred: Tensor of shape [batch, 2] containing [direction, confidence]
rewards: Tensor of shape [batch] containing rewards
actions: Tensor of shape [batch] containing actions
Returns:
Price direction loss tensor
"""
try:
if price_direction_pred.size(1) != 2:
return None
batch_size = price_direction_pred.size(0)
# Extract direction and confidence predictions
direction_pred = price_direction_pred[:, 0] # -1 to 1
confidence_pred = price_direction_pred[:, 1] # 0 to 1
# Create targets based on rewards and actions
with torch.no_grad():
# Direction targets: 1 if reward > 0 and action is BUY, -1 if reward > 0 and action is SELL, 0 otherwise
direction_targets = torch.zeros(batch_size, device=price_direction_pred.device)
for i in range(batch_size):
if rewards[i] > 0.01: # Positive reward threshold
if actions[i] == 0: # BUY action
direction_targets[i] = 1.0 # UP
elif actions[i] == 1: # SELL action
direction_targets[i] = -1.0 # DOWN
# else: targets remain 0 (sideways)
# Confidence targets: based on reward magnitude (higher reward = higher confidence)
confidence_targets = torch.abs(rewards).clamp(0, 1)
# Calculate losses for each component
direction_loss = F.mse_loss(direction_pred, direction_targets)
confidence_loss = F.mse_loss(confidence_pred, confidence_targets)
# Combined loss (direction is more important than confidence)
total_loss = direction_loss + 0.3 * confidence_loss
return total_loss
except Exception as e:
logger.debug(f"Error calculating price direction loss: {e}")
return None
def _calculate_extrema_loss(self, extrema_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
"""
Calculate loss for extrema predictions
Args:
extrema_pred: Extrema predictions
rewards: Tensor containing rewards
actions: Tensor containing actions
Returns:
Extrema loss tensor
"""
try:
batch_size = extrema_pred.size(0)
# Create targets based on reward patterns
with torch.no_grad():
extrema_targets = torch.ones(batch_size, dtype=torch.long, device=extrema_pred.device) * 2 # Default to "neither"
for i in range(batch_size):
# High positive reward suggests we're at a good entry point (potential bottom for BUY, top for SELL)
if rewards[i] > 0.05:
if actions[i] == 0: # BUY action
extrema_targets[i] = 0 # Bottom
elif actions[i] == 1: # SELL action
extrema_targets[i] = 1 # Top
# Calculate cross-entropy loss
if extrema_pred.size(1) >= 3:
extrema_loss = F.cross_entropy(extrema_pred[:, :3], extrema_targets)
else:
extrema_loss = F.cross_entropy(extrema_pred, extrema_targets)
return extrema_loss
except Exception as e:
logger.debug(f"Error calculating extrema loss: {e}")
return None
def get_enhanced_training_stats(self):
"""Get enhanced RL training statistics with detailed metrics (from EnhancedDQNAgent)"""
return {

View File

@ -265,8 +265,9 @@ class EnhancedCNN(nn.Module):
nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither
)
# ULTRA MASSIVE multi-timeframe price prediction heads
self.price_pred_immediate = nn.Sequential(
# ULTRA MASSIVE price direction prediction head
# Outputs single direction and confidence values
self.price_direction_head = nn.Sequential(
nn.Linear(1024, 1024), # Increased from 512
nn.ReLU(),
nn.Dropout(0.3),
@ -275,32 +276,13 @@ class EnhancedCNN(nn.Module):
nn.Dropout(0.3),
nn.Linear(512, 256), # Increased from 128
nn.ReLU(),
nn.Linear(256, 3) # Up, Down, Sideways
nn.Linear(256, 2) # [direction, confidence]
)
self.price_pred_midterm = nn.Sequential(
nn.Linear(1024, 1024), # Increased from 512
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(1024, 512), # Increased from 256
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256), # Increased from 128
nn.ReLU(),
nn.Linear(256, 3) # Up, Down, Sideways
)
self.price_pred_longterm = nn.Sequential(
nn.Linear(1024, 1024), # Increased from 512
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(1024, 512), # Increased from 256
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256), # Increased from 128
nn.ReLU(),
nn.Linear(256, 3) # Up, Down, Sideways
)
# Direction activation (tanh for -1 to 1)
self.direction_activation = nn.Tanh()
# Confidence activation (sigmoid for 0 to 1)
self.confidence_activation = nn.Sigmoid()
# ULTRA MASSIVE value prediction with ensemble approaches
self.price_pred_value = nn.Sequential(
@ -490,10 +472,14 @@ class EnhancedCNN(nn.Module):
# Extrema predictions (bottom/top/neither detection)
extrema_pred = self.extrema_head(features_refined)
# Multi-timeframe price movement predictions
price_immediate = self.price_pred_immediate(features_refined)
price_midterm = self.price_pred_midterm(features_refined)
price_longterm = self.price_pred_longterm(features_refined)
# Price direction predictions
price_direction_raw = self.price_direction_head(features_refined)
# Apply separate activations to direction and confidence
direction = self.direction_activation(price_direction_raw[:, 0:1]) # -1 to 1
confidence = self.confidence_activation(price_direction_raw[:, 1:2]) # 0 to 1
price_direction_pred = torch.cat([direction, confidence], dim=1) # [batch, 2]
price_values = self.price_pred_value(features_refined)
# Additional specialized predictions for enhanced accuracy
@ -502,15 +488,14 @@ class EnhancedCNN(nn.Module):
market_regime_pred = self.market_regime_head(features_refined)
risk_pred = self.risk_head(features_refined)
# Package all price predictions into a single tensor (use immediate as primary)
# For compatibility with DQN agent, we return price_immediate as the price prediction tensor
price_pred_tensor = price_immediate
# Use the price direction prediction directly (already [batch, 2])
price_direction_tensor = price_direction_pred
# Package additional predictions into a single tensor (use volatility as primary)
# For compatibility with DQN agent, we return volatility_pred as the advanced prediction tensor
advanced_pred_tensor = volatility_pred
return q_values, extrema_pred, price_pred_tensor, features_refined, advanced_pred_tensor
return q_values, extrema_pred, price_direction_tensor, features_refined, advanced_pred_tensor
def act(self, state, explore=True) -> Tuple[int, float, List[float]]:
"""Enhanced action selection with ultra massive model predictions"""
@ -528,7 +513,11 @@ class EnhancedCNN(nn.Module):
state_tensor = state_tensor.unsqueeze(0)
with torch.no_grad():
q_values, extrema_pred, price_predictions, features, advanced_predictions = self(state_tensor)
q_values, extrema_pred, price_direction_predictions, features, advanced_predictions = self(state_tensor)
# Process price direction predictions
if price_direction_predictions is not None:
self.process_price_direction_predictions(price_direction_predictions)
# Apply softmax to get action probabilities
action_probs_tensor = torch.softmax(q_values, dim=1)
@ -565,6 +554,100 @@ class EnhancedCNN(nn.Module):
logger.info(f" Risk Level: {risk_labels[risk_class]} ({risk[risk_class]:.3f})")
return action_idx, confidence, action_probs
def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]:
"""
Process price direction predictions and convert to standardized format
Args:
price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence]
Returns:
Dict with direction (-1 to 1) and confidence (0 to 1)
"""
try:
if price_direction_pred is None or price_direction_pred.numel() == 0:
return {}
# Extract direction and confidence values
direction_value = float(price_direction_pred[0, 0].item()) # -1 to 1
confidence_value = float(price_direction_pred[0, 1].item()) # 0 to 1
processed_directions = {
'direction': direction_value,
'confidence': confidence_value
}
# Store for later access
self.last_price_direction = processed_directions
return processed_directions
except Exception as e:
logger.error(f"Error processing price direction predictions: {e}")
return {}
def get_price_direction_vector(self) -> Dict[str, float]:
"""
Get the current price direction and confidence
Returns:
Dict with direction (-1 to 1) and confidence (0 to 1)
"""
return getattr(self, 'last_price_direction', {})
def get_price_direction_summary(self) -> Dict[str, Any]:
"""
Get a summary of price direction prediction
Returns:
Dict containing direction and confidence information
"""
try:
last_direction = getattr(self, 'last_price_direction', {})
if not last_direction:
return {
'direction_value': 0.0,
'confidence_value': 0.0,
'direction_label': "SIDEWAYS",
'discrete_direction': 0,
'strength': 0.0,
'weighted_strength': 0.0
}
direction_value = last_direction['direction']
confidence_value = last_direction['confidence']
# Convert to discrete direction
if direction_value > 0.1:
direction_label = "UP"
discrete_direction = 1
elif direction_value < -0.1:
direction_label = "DOWN"
discrete_direction = -1
else:
direction_label = "SIDEWAYS"
discrete_direction = 0
return {
'direction_value': float(direction_value),
'confidence_value': float(confidence_value),
'direction_label': direction_label,
'discrete_direction': discrete_direction,
'strength': abs(float(direction_value)),
'weighted_strength': abs(float(direction_value)) * float(confidence_value)
}
except Exception as e:
logger.error(f"Error calculating price direction summary: {e}")
return {
'direction_value': 0.0,
'confidence_value': 0.0,
'direction_label': "SIDEWAYS",
'discrete_direction': 0,
'strength': 0.0,
'weighted_strength': 0.0
}
def save(self, path):
"""Save model weights and architecture"""

View File

@ -719,6 +719,95 @@ class TradingOrchestrator:
except Exception as e:
logger.error(f"Error initializing ML models: {e}")
def _calculate_cnn_price_direction_loss(self, price_direction_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
"""
Calculate price direction loss for CNN model
Args:
price_direction_pred: Tensor of shape [batch, 2] containing [direction, confidence]
rewards: Tensor of shape [batch] containing rewards
actions: Tensor of shape [batch] containing actions
Returns:
Price direction loss tensor
"""
try:
if price_direction_pred.size(1) != 2:
return None
batch_size = price_direction_pred.size(0)
# Extract direction and confidence predictions
direction_pred = price_direction_pred[:, 0] # -1 to 1
confidence_pred = price_direction_pred[:, 1] # 0 to 1
# Create targets based on rewards and actions
with torch.no_grad():
# Direction targets: 1 if reward > 0 and action is BUY, -1 if reward > 0 and action is SELL, 0 otherwise
direction_targets = torch.zeros(batch_size, device=price_direction_pred.device)
for i in range(batch_size):
if rewards[i] > 0.01: # Positive reward threshold
if actions[i] == 0: # BUY action
direction_targets[i] = 1.0 # UP
elif actions[i] == 1: # SELL action
direction_targets[i] = -1.0 # DOWN
# else: targets remain 0 (sideways)
# Confidence targets: based on reward magnitude (higher reward = higher confidence)
confidence_targets = torch.abs(rewards).clamp(0, 1)
# Calculate losses for each component
direction_loss = nn.MSELoss()(direction_pred, direction_targets)
confidence_loss = nn.MSELoss()(confidence_pred, confidence_targets)
# Combined loss (direction is more important than confidence)
total_loss = direction_loss + 0.3 * confidence_loss
return total_loss
except Exception as e:
logger.debug(f"Error calculating CNN price direction loss: {e}")
return None
def _calculate_cnn_extrema_loss(self, extrema_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
"""
Calculate extrema loss for CNN model
Args:
extrema_pred: Extrema predictions
rewards: Tensor containing rewards
actions: Tensor containing actions
Returns:
Extrema loss tensor
"""
try:
batch_size = extrema_pred.size(0)
# Create targets based on reward patterns
with torch.no_grad():
extrema_targets = torch.ones(batch_size, dtype=torch.long, device=extrema_pred.device) * 2 # Default to "neither"
for i in range(batch_size):
# High positive reward suggests we're at a good entry point
if rewards[i] > 0.05:
if actions[i] == 0: # BUY action
extrema_targets[i] = 0 # Bottom
elif actions[i] == 1: # SELL action
extrema_targets[i] = 1 # Top
# Calculate cross-entropy loss
if extrema_pred.size(1) >= 3:
extrema_loss = nn.CrossEntropyLoss()(extrema_pred[:, :3], extrema_targets)
else:
extrema_loss = nn.CrossEntropyLoss()(extrema_pred, extrema_targets)
return extrema_loss
except Exception as e:
logger.debug(f"Error calculating CNN extrema loss: {e}")
return None
def update_model_loss(self, model_name: str, current_loss: float, best_loss: Optional[float] = None):
"""Update model loss and potentially best loss"""
if model_name in self.model_states:
@ -1938,7 +2027,71 @@ class TradingOrchestrator:
# Evaluate the previous prediction and train the model immediately
await self._evaluate_and_train_on_record(inference_record, current_price)
logger.info(f"Completed immediate training for {model_name}")
# Log predicted vs actual outcome
prediction = inference_record.get('prediction', {})
predicted_action = prediction.get('action', 'UNKNOWN')
predicted_confidence = prediction.get('confidence', 0.0)
# Calculate actual outcome
symbol = inference_record.get('symbol', 'ETH/USDT')
predicted_price = None
actual_price_change_pct = 0.0
# Try to get price direction vectors from metadata (new format)
if 'price_direction' in prediction and prediction['price_direction']:
try:
price_direction_data = prediction['price_direction']
# Process price direction data
if isinstance(price_direction_data, dict) and 'direction' in price_direction_data:
direction = price_direction_data['direction']
confidence = price_direction_data.get('confidence', 1.0)
# Convert direction to price change percentage
# Scale by confidence and direction strength
predicted_price_change_pct = direction * confidence * 0.02 # 2% max change
predicted_price = current_price * (1 + predicted_price_change_pct)
except Exception as e:
logger.debug(f"Error processing price direction data: {e}")
# Fallback to old price prediction format
elif 'price_prediction' in prediction and prediction['price_prediction']:
try:
price_prediction_data = prediction['price_prediction']
if isinstance(price_prediction_data, list) and len(price_prediction_data) > 0:
predicted_price_change_pct = float(price_prediction_data[0]) * 0.01
predicted_price = current_price * (1 + predicted_price_change_pct)
except Exception:
pass
# Calculate price change
if predicted_price is not None:
actual_price_change_pct = (current_price - predicted_price) / predicted_price * 100
price_outcome = f"Predicted: ${predicted_price:.2f} -> Actual: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)"
else:
# Fall back to historical price comparison
historical_data = self.data_provider.get_historical_data(symbol, '1m', limit=10)
if historical_data is not None and not historical_data.empty:
historical_price = historical_data['close'].iloc[-1]
actual_price_change_pct = (current_price - historical_price) / historical_price * 100
price_outcome = f"Historical: ${historical_price:.2f} -> Actual: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)"
else:
price_outcome = f"Actual: ${current_price:.2f}"
# Determine if prediction was correct based on action and price movement
was_correct = False
if predicted_action == 'BUY' and actual_price_change_pct > 0.1: # Price went up
was_correct = True
elif predicted_action == 'SELL' and actual_price_change_pct < -0.1: # Price went down
was_correct = True
elif predicted_action == 'HOLD' and abs(actual_price_change_pct) < 0.5: # Price stayed stable
was_correct = True
outcome_status = "✅ CORRECT" if was_correct else "❌ INCORRECT"
logger.info(f"Completed immediate training for {model_name} - {outcome_status}")
logger.info(f" Prediction: {predicted_action} ({predicted_confidence:.3f})")
logger.info(f" {price_outcome}")
logger.info(f" Outcome: {outcome_status}")
except Exception as e:
logger.error(f"Error in immediate training for {model_name}: {e}")
@ -2412,12 +2565,33 @@ class TradingOrchestrator:
self.cnn_optimizer.zero_grad()
# Forward pass
q_values, extrema_pred, price_pred, features_refined, advanced_pred = self.cnn_model(features_tensor)
q_values, extrema_pred, price_direction_pred, features_refined, advanced_pred = self.cnn_model(features_tensor)
# Calculate loss
# Calculate primary Q-value loss
q_values_selected = q_values.gather(1, action_tensor.unsqueeze(1)).squeeze(1)
target_q = reward_tensor # Simplified target
loss = nn.MSELoss()(q_values_selected, target_q)
q_loss = nn.MSELoss()(q_values_selected, target_q)
# Calculate auxiliary losses for price direction and extrema
total_loss = q_loss
# Price direction loss
if price_direction_pred is not None and price_direction_pred.shape[0] > 0:
price_direction_loss = self._calculate_cnn_price_direction_loss(
price_direction_pred, reward_tensor, action_tensor
)
if price_direction_loss is not None:
total_loss = total_loss + 0.2 * price_direction_loss
# Extrema loss
if extrema_pred is not None and extrema_pred.shape[0] > 0:
extrema_loss = self._calculate_cnn_extrema_loss(
extrema_pred, reward_tensor, action_tensor
)
if extrema_loss is not None:
total_loss = total_loss + 0.1 * extrema_loss
loss = total_loss
# Backward pass
training_start_time = time.time()
@ -2640,9 +2814,17 @@ class TradingOrchestrator:
'HOLD': float(action_probs[0, 2].item())
}
# Extract price predictions if available
price_prediction = None
# Extract price direction predictions if available
price_direction_data = None
if price_pred is not None:
# Process price direction predictions
if hasattr(model.model, 'process_price_direction_predictions'):
try:
price_direction_data = model.model.process_price_direction_predictions(price_pred)
except Exception as e:
logger.debug(f"Error processing CNN price direction: {e}")
# Fallback to old format for compatibility
price_prediction = price_pred.squeeze(0).cpu().numpy().tolist()
prediction = Prediction(
@ -2656,6 +2838,7 @@ class TradingOrchestrator:
'feature_size': len(base_data.get_feature_vector()),
'data_sources': ['ohlcv_1s', 'ohlcv_1m', 'ohlcv_1h', 'ohlcv_1d', 'btc', 'cob', 'indicators'],
'price_prediction': price_prediction,
'price_direction': price_direction_data,
'extrema_prediction': extrema_pred.squeeze(0).cpu().numpy().tolist() if extrema_pred is not None else None
}
)
@ -2694,6 +2877,14 @@ class TradingOrchestrator:
action_names = ['BUY', 'SELL', 'HOLD'] # Note: enhanced_cnn uses this order
best_action = action_names[action_idx]
# Get price direction vectors from CNN model if available
price_direction_data = None
if hasattr(model.model, 'get_price_direction_vector'):
try:
price_direction_data = model.model.get_price_direction_vector()
except Exception as e:
logger.debug(f"Error getting price direction from CNN: {e}")
pred = Prediction(
action=best_action,
confidence=float(confidence),
@ -2708,7 +2899,8 @@ class TradingOrchestrator:
metadata={
'feature_vector_size': len(feature_vector),
'unified_input': True,
'fallback_method': 'direct_model_inference'
'fallback_method': 'direct_model_inference',
'price_direction': price_direction_data
}
)
predictions.append(pred)
@ -2811,6 +3003,14 @@ class TradingOrchestrator:
if q_values_for_capture:
logger.warning(f"Q-values length mismatch: expected {len(action_names)}, got {len(q_values_for_capture)}. Using default probabilities.")
# Get price direction vectors from DQN model if available
price_direction_data = None
if hasattr(model.model, 'get_price_direction_vector'):
try:
price_direction_data = model.model.get_price_direction_vector()
except Exception as e:
logger.debug(f"Error getting price direction from DQN: {e}")
prediction = Prediction(
action=action,
confidence=float(confidence),
@ -2818,7 +3018,10 @@ class TradingOrchestrator:
timeframe='mixed', # RL uses mixed timeframes
timestamp=datetime.now(),
model_name=model.name,
metadata={'state_size': len(state)}
metadata={
'state_size': len(state),
'price_direction': price_direction_data
}
)
# Capture DQN prediction for dashboard visualization