predict price direction
This commit is contained in:
@ -72,8 +72,10 @@ Based on the existing implementation in `core/data_provider.py`, we'll enhance i
|
||||
- OHCLV: 300 frames of (1s, 1m, 1h, 1d) ETH + 300s of 1s BTC
|
||||
- COB: for each 1s OHCLV we have +- 20 buckets of COB ammounts in USD
|
||||
- 1,5,15 and 60s MA of the COB imbalance counting +- 5 COB buckets
|
||||
- ***OUTPUTS***: suggested trade action (BUY/SELL)
|
||||
|
||||
- ***OUTPUTS***:
|
||||
- suggested trade action (BUY/SELL/HOLD). Paired with confidence
|
||||
- immediate price movement drection vector (-1: vertical down, 1: vertical up, 0: horizontal) - linear; with it's own confidence
|
||||
|
||||
# Standardized input for all models:
|
||||
{
|
||||
'primary_symbol': 'ETH/USDT',
|
||||
|
@ -4,7 +4,7 @@ import torch.optim as optim
|
||||
import numpy as np
|
||||
from collections import deque
|
||||
import random
|
||||
from typing import Tuple, List
|
||||
from typing import Tuple, List, Dict, Any
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
@ -84,8 +84,8 @@ class DQNNetwork(nn.Module):
|
||||
nn.Linear(512, 4) # trending, ranging, volatile, mixed
|
||||
)
|
||||
|
||||
# Price prediction head
|
||||
self.price_head = nn.Sequential(
|
||||
# Price direction prediction head - outputs direction and confidence
|
||||
self.price_direction_head = nn.Sequential(
|
||||
nn.Linear(2048, 1024),
|
||||
nn.LayerNorm(1024),
|
||||
nn.ReLU(inplace=True),
|
||||
@ -93,9 +93,14 @@ class DQNNetwork(nn.Module):
|
||||
nn.Linear(1024, 512),
|
||||
nn.LayerNorm(512),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Linear(512, 3) # short, medium, long term price direction
|
||||
nn.Linear(512, 2) # [direction, confidence]
|
||||
)
|
||||
|
||||
# Direction activation (tanh for -1 to 1)
|
||||
self.direction_activation = nn.Tanh()
|
||||
# Confidence activation (sigmoid for 0 to 1)
|
||||
self.confidence_activation = nn.Sigmoid()
|
||||
|
||||
# Volatility prediction head
|
||||
self.volatility_head = nn.Sequential(
|
||||
nn.Linear(2048, 1024),
|
||||
@ -105,7 +110,7 @@ class DQNNetwork(nn.Module):
|
||||
nn.Linear(1024, 256),
|
||||
nn.LayerNorm(256),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Linear(256, 1) # predicted volatility
|
||||
nn.Linear(256, 4) # predicted volatility for 4 timeframes
|
||||
)
|
||||
|
||||
# Main Q-value head (dueling architecture)
|
||||
@ -162,7 +167,13 @@ class DQNNetwork(nn.Module):
|
||||
|
||||
# Multiple prediction heads
|
||||
regime_pred = self.regime_head(features)
|
||||
price_pred = self.price_head(features)
|
||||
price_direction_raw = self.price_direction_head(features)
|
||||
|
||||
# Apply separate activations to direction and confidence
|
||||
direction = self.direction_activation(price_direction_raw[:, 0:1]) # -1 to 1
|
||||
confidence = self.confidence_activation(price_direction_raw[:, 1:2]) # 0 to 1
|
||||
price_direction_pred = torch.cat([direction, confidence], dim=1) # [batch, 2]
|
||||
|
||||
volatility_pred = self.volatility_head(features)
|
||||
|
||||
# Dueling Q-network
|
||||
@ -172,7 +183,7 @@ class DQNNetwork(nn.Module):
|
||||
# Combine value and advantage for Q-values
|
||||
q_values = value + advantage - advantage.mean(dim=1, keepdim=True)
|
||||
|
||||
return q_values, regime_pred, price_pred, volatility_pred, features
|
||||
return q_values, regime_pred, price_direction_pred, volatility_pred, features
|
||||
|
||||
def act(self, state, explore=True):
|
||||
"""
|
||||
@ -196,7 +207,11 @@ class DQNNetwork(nn.Module):
|
||||
state = state.unsqueeze(0)
|
||||
|
||||
with torch.no_grad():
|
||||
q_values, regime_pred, price_pred, volatility_pred, features = self.forward(state)
|
||||
q_values, regime_pred, price_direction_pred, volatility_pred, features = self.forward(state)
|
||||
|
||||
# Process price direction predictions
|
||||
if price_direction_pred is not None:
|
||||
self.process_price_direction_predictions(price_direction_pred)
|
||||
|
||||
# Get action probabilities using softmax
|
||||
action_probs = F.softmax(q_values, dim=1)
|
||||
@ -332,23 +347,10 @@ class DQNAgent:
|
||||
self.recent_prices = deque(maxlen=20)
|
||||
self.recent_rewards = deque(maxlen=100)
|
||||
|
||||
# Price prediction tracking
|
||||
self.last_price_pred = {
|
||||
'immediate': {
|
||||
'direction': 1, # Default to "sideways"
|
||||
'confidence': 0.0,
|
||||
'change': 0.0
|
||||
},
|
||||
'midterm': {
|
||||
'direction': 1, # Default to "sideways"
|
||||
'confidence': 0.0,
|
||||
'change': 0.0
|
||||
},
|
||||
'longterm': {
|
||||
'direction': 1, # Default to "sideways"
|
||||
'confidence': 0.0,
|
||||
'change': 0.0
|
||||
}
|
||||
# Price direction tracking - stores direction and confidence
|
||||
self.last_price_direction = {
|
||||
'direction': 0.0, # Single value between -1 and 1
|
||||
'confidence': 0.0 # Single value between 0 and 1
|
||||
}
|
||||
|
||||
# Store separate memory for price direction examples
|
||||
@ -521,25 +523,6 @@ class DQNAgent:
|
||||
logger.error(f"Error saving DQN checkpoint: {e}")
|
||||
return False
|
||||
|
||||
# Price prediction tracking
|
||||
self.last_price_pred = {
|
||||
'immediate': {
|
||||
'direction': 1, # Default to "sideways"
|
||||
'confidence': 0.0,
|
||||
'change': 0.0
|
||||
},
|
||||
'midterm': {
|
||||
'direction': 1, # Default to "sideways"
|
||||
'confidence': 0.0,
|
||||
'change': 0.0
|
||||
},
|
||||
'longterm': {
|
||||
'direction': 1, # Default to "sideways"
|
||||
'confidence': 0.0,
|
||||
'change': 0.0
|
||||
}
|
||||
}
|
||||
|
||||
# Store separate memory for price direction examples
|
||||
self.price_movement_memory = [] # For storing examples of clear price movements
|
||||
|
||||
@ -811,6 +794,92 @@ class DQNAgent:
|
||||
logger.error(f"Error in act_with_confidence: {e}")
|
||||
# Return default action with low confidence
|
||||
return 1, 0.1, [0.45, 0.55] # Default to HOLD action
|
||||
|
||||
def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]:
|
||||
"""
|
||||
Process price direction predictions and convert to standardized format
|
||||
|
||||
Args:
|
||||
price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence]
|
||||
|
||||
Returns:
|
||||
Dict with direction (-1 to 1) and confidence (0 to 1)
|
||||
"""
|
||||
try:
|
||||
if price_direction_pred is None or price_direction_pred.numel() == 0:
|
||||
return self.last_price_direction
|
||||
|
||||
# Extract direction and confidence values
|
||||
direction_value = float(price_direction_pred[0, 0].item()) # -1 to 1
|
||||
confidence_value = float(price_direction_pred[0, 1].item()) # 0 to 1
|
||||
|
||||
# Update last price direction
|
||||
self.last_price_direction = {
|
||||
'direction': direction_value,
|
||||
'confidence': confidence_value
|
||||
}
|
||||
|
||||
return self.last_price_direction
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing price direction predictions: {e}")
|
||||
return self.last_price_direction
|
||||
|
||||
def get_price_direction_vector(self) -> Dict[str, float]:
|
||||
"""
|
||||
Get the current price direction and confidence
|
||||
|
||||
Returns:
|
||||
Dict with direction (-1 to 1) and confidence (0 to 1)
|
||||
"""
|
||||
return self.last_price_direction
|
||||
|
||||
def get_price_direction_summary(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get a summary of price direction prediction
|
||||
|
||||
Returns:
|
||||
Dict containing direction and confidence information
|
||||
"""
|
||||
try:
|
||||
direction_value = self.last_price_direction['direction']
|
||||
confidence_value = self.last_price_direction['confidence']
|
||||
|
||||
# Convert to discrete direction
|
||||
if direction_value > 0.1:
|
||||
direction_label = "UP"
|
||||
discrete_direction = 1
|
||||
elif direction_value < -0.1:
|
||||
direction_label = "DOWN"
|
||||
discrete_direction = -1
|
||||
else:
|
||||
direction_label = "SIDEWAYS"
|
||||
discrete_direction = 0
|
||||
|
||||
return {
|
||||
'direction_value': float(direction_value),
|
||||
'confidence_value': float(confidence_value),
|
||||
'direction_label': direction_label,
|
||||
'discrete_direction': discrete_direction,
|
||||
'strength': abs(float(direction_value)),
|
||||
'weighted_strength': abs(float(direction_value)) * float(confidence_value)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating price direction summary: {e}")
|
||||
return {
|
||||
'direction_value': 0.0,
|
||||
'confidence_value': 0.0,
|
||||
'direction_label': "SIDEWAYS",
|
||||
'discrete_direction': 0,
|
||||
'strength': 0.0,
|
||||
'weighted_strength': 0.0
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in act_with_confidence: {e}")
|
||||
# Return default action with low confidence
|
||||
return 1, 0.1, [0.45, 0.55] # Default to HOLD action
|
||||
|
||||
def _determine_action_with_position_management(self, sell_conf, buy_conf, current_price, market_context, explore):
|
||||
"""
|
||||
@ -1032,11 +1101,8 @@ class DQNAgent:
|
||||
logger.error(f"Error converting experiences to tensors: {e}")
|
||||
return 0.0
|
||||
|
||||
# Choose training method based on precision mode
|
||||
if self.use_mixed_precision:
|
||||
loss = self._replay_mixed_precision(states, actions, rewards, next_states, dones)
|
||||
else:
|
||||
loss = self._replay_standard(states, actions, rewards, next_states, dones)
|
||||
# Always use standard training to fix gradient issues
|
||||
loss = self._replay_standard(states, actions, rewards, next_states, dones)
|
||||
|
||||
# Update epsilon
|
||||
if self.epsilon > self.epsilon_min:
|
||||
@ -1208,9 +1274,33 @@ class DQNAgent:
|
||||
|
||||
q_loss = self.criterion(current_q_values, target_q_values.detach())
|
||||
|
||||
# Use only Q-loss for now to ensure clean gradients
|
||||
# Calculate auxiliary losses and add to Q-loss
|
||||
total_loss = q_loss
|
||||
|
||||
# Add auxiliary losses if available
|
||||
try:
|
||||
# Get additional predictions from forward pass
|
||||
if isinstance(q_values_output, tuple) and len(q_values_output) >= 5:
|
||||
current_regime_pred = q_values_output[1]
|
||||
current_price_pred = q_values_output[2]
|
||||
current_volatility_pred = q_values_output[3]
|
||||
current_extrema_pred = current_regime_pred # Use regime as extrema proxy for now
|
||||
|
||||
# Price direction loss
|
||||
if current_price_pred is not None and current_price_pred.shape[0] > 0:
|
||||
price_direction_loss = self._calculate_price_direction_loss(current_price_pred, rewards, actions)
|
||||
if price_direction_loss is not None:
|
||||
total_loss = total_loss + 0.2 * price_direction_loss
|
||||
|
||||
# Extrema loss
|
||||
if current_extrema_pred is not None and current_extrema_pred.shape[0] > 0:
|
||||
extrema_loss = self._calculate_extrema_loss(current_extrema_pred, rewards, actions)
|
||||
if extrema_loss is not None:
|
||||
total_loss = total_loss + 0.1 * extrema_loss
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not add auxiliary loss in standard training: {e}")
|
||||
|
||||
# Reset gradients
|
||||
self.optimizer.zero_grad()
|
||||
|
||||
@ -1309,13 +1399,17 @@ class DQNAgent:
|
||||
|
||||
# Add auxiliary losses if available
|
||||
try:
|
||||
# Price direction loss
|
||||
if current_price_pred is not None and current_price_pred.shape[0] > 0:
|
||||
price_direction_loss = self._calculate_price_direction_loss(current_price_pred, rewards, actions)
|
||||
if price_direction_loss is not None:
|
||||
loss = loss + 0.2 * price_direction_loss
|
||||
|
||||
# Extrema loss
|
||||
if current_extrema_pred is not None and current_extrema_pred.shape[0] > 0:
|
||||
# Simple extrema targets
|
||||
with torch.no_grad():
|
||||
extrema_targets = torch.ones(current_extrema_pred.shape[0], dtype=torch.long, device=current_extrema_pred.device) * 2
|
||||
|
||||
extrema_loss = F.cross_entropy(current_extrema_pred, extrema_targets)
|
||||
loss = loss + 0.1 * extrema_loss
|
||||
extrema_loss = self._calculate_extrema_loss(current_extrema_pred, rewards, actions)
|
||||
if extrema_loss is not None:
|
||||
loss = loss + 0.1 * extrema_loss
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not add auxiliary loss in mixed precision: {e}")
|
||||
@ -1649,6 +1743,95 @@ class DQNAgent:
|
||||
'exit_threshold': self.exit_confidence_threshold
|
||||
}
|
||||
|
||||
def _calculate_price_direction_loss(self, price_direction_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Calculate loss for price direction predictions
|
||||
|
||||
Args:
|
||||
price_direction_pred: Tensor of shape [batch, 2] containing [direction, confidence]
|
||||
rewards: Tensor of shape [batch] containing rewards
|
||||
actions: Tensor of shape [batch] containing actions
|
||||
|
||||
Returns:
|
||||
Price direction loss tensor
|
||||
"""
|
||||
try:
|
||||
if price_direction_pred.size(1) != 2:
|
||||
return None
|
||||
|
||||
batch_size = price_direction_pred.size(0)
|
||||
|
||||
# Extract direction and confidence predictions
|
||||
direction_pred = price_direction_pred[:, 0] # -1 to 1
|
||||
confidence_pred = price_direction_pred[:, 1] # 0 to 1
|
||||
|
||||
# Create targets based on rewards and actions
|
||||
with torch.no_grad():
|
||||
# Direction targets: 1 if reward > 0 and action is BUY, -1 if reward > 0 and action is SELL, 0 otherwise
|
||||
direction_targets = torch.zeros(batch_size, device=price_direction_pred.device)
|
||||
for i in range(batch_size):
|
||||
if rewards[i] > 0.01: # Positive reward threshold
|
||||
if actions[i] == 0: # BUY action
|
||||
direction_targets[i] = 1.0 # UP
|
||||
elif actions[i] == 1: # SELL action
|
||||
direction_targets[i] = -1.0 # DOWN
|
||||
# else: targets remain 0 (sideways)
|
||||
|
||||
# Confidence targets: based on reward magnitude (higher reward = higher confidence)
|
||||
confidence_targets = torch.abs(rewards).clamp(0, 1)
|
||||
|
||||
# Calculate losses for each component
|
||||
direction_loss = F.mse_loss(direction_pred, direction_targets)
|
||||
confidence_loss = F.mse_loss(confidence_pred, confidence_targets)
|
||||
|
||||
# Combined loss (direction is more important than confidence)
|
||||
total_loss = direction_loss + 0.3 * confidence_loss
|
||||
|
||||
return total_loss
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error calculating price direction loss: {e}")
|
||||
return None
|
||||
|
||||
def _calculate_extrema_loss(self, extrema_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Calculate loss for extrema predictions
|
||||
|
||||
Args:
|
||||
extrema_pred: Extrema predictions
|
||||
rewards: Tensor containing rewards
|
||||
actions: Tensor containing actions
|
||||
|
||||
Returns:
|
||||
Extrema loss tensor
|
||||
"""
|
||||
try:
|
||||
batch_size = extrema_pred.size(0)
|
||||
|
||||
# Create targets based on reward patterns
|
||||
with torch.no_grad():
|
||||
extrema_targets = torch.ones(batch_size, dtype=torch.long, device=extrema_pred.device) * 2 # Default to "neither"
|
||||
|
||||
for i in range(batch_size):
|
||||
# High positive reward suggests we're at a good entry point (potential bottom for BUY, top for SELL)
|
||||
if rewards[i] > 0.05:
|
||||
if actions[i] == 0: # BUY action
|
||||
extrema_targets[i] = 0 # Bottom
|
||||
elif actions[i] == 1: # SELL action
|
||||
extrema_targets[i] = 1 # Top
|
||||
|
||||
# Calculate cross-entropy loss
|
||||
if extrema_pred.size(1) >= 3:
|
||||
extrema_loss = F.cross_entropy(extrema_pred[:, :3], extrema_targets)
|
||||
else:
|
||||
extrema_loss = F.cross_entropy(extrema_pred, extrema_targets)
|
||||
|
||||
return extrema_loss
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error calculating extrema loss: {e}")
|
||||
return None
|
||||
|
||||
def get_enhanced_training_stats(self):
|
||||
"""Get enhanced RL training statistics with detailed metrics (from EnhancedDQNAgent)"""
|
||||
return {
|
||||
|
@ -265,8 +265,9 @@ class EnhancedCNN(nn.Module):
|
||||
nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither
|
||||
)
|
||||
|
||||
# ULTRA MASSIVE multi-timeframe price prediction heads
|
||||
self.price_pred_immediate = nn.Sequential(
|
||||
# ULTRA MASSIVE price direction prediction head
|
||||
# Outputs single direction and confidence values
|
||||
self.price_direction_head = nn.Sequential(
|
||||
nn.Linear(1024, 1024), # Increased from 512
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.3),
|
||||
@ -275,32 +276,13 @@ class EnhancedCNN(nn.Module):
|
||||
nn.Dropout(0.3),
|
||||
nn.Linear(512, 256), # Increased from 128
|
||||
nn.ReLU(),
|
||||
nn.Linear(256, 3) # Up, Down, Sideways
|
||||
nn.Linear(256, 2) # [direction, confidence]
|
||||
)
|
||||
|
||||
self.price_pred_midterm = nn.Sequential(
|
||||
nn.Linear(1024, 1024), # Increased from 512
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.3),
|
||||
nn.Linear(1024, 512), # Increased from 256
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.3),
|
||||
nn.Linear(512, 256), # Increased from 128
|
||||
nn.ReLU(),
|
||||
nn.Linear(256, 3) # Up, Down, Sideways
|
||||
)
|
||||
|
||||
self.price_pred_longterm = nn.Sequential(
|
||||
nn.Linear(1024, 1024), # Increased from 512
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.3),
|
||||
nn.Linear(1024, 512), # Increased from 256
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.3),
|
||||
nn.Linear(512, 256), # Increased from 128
|
||||
nn.ReLU(),
|
||||
nn.Linear(256, 3) # Up, Down, Sideways
|
||||
)
|
||||
# Direction activation (tanh for -1 to 1)
|
||||
self.direction_activation = nn.Tanh()
|
||||
# Confidence activation (sigmoid for 0 to 1)
|
||||
self.confidence_activation = nn.Sigmoid()
|
||||
|
||||
# ULTRA MASSIVE value prediction with ensemble approaches
|
||||
self.price_pred_value = nn.Sequential(
|
||||
@ -490,10 +472,14 @@ class EnhancedCNN(nn.Module):
|
||||
# Extrema predictions (bottom/top/neither detection)
|
||||
extrema_pred = self.extrema_head(features_refined)
|
||||
|
||||
# Multi-timeframe price movement predictions
|
||||
price_immediate = self.price_pred_immediate(features_refined)
|
||||
price_midterm = self.price_pred_midterm(features_refined)
|
||||
price_longterm = self.price_pred_longterm(features_refined)
|
||||
# Price direction predictions
|
||||
price_direction_raw = self.price_direction_head(features_refined)
|
||||
|
||||
# Apply separate activations to direction and confidence
|
||||
direction = self.direction_activation(price_direction_raw[:, 0:1]) # -1 to 1
|
||||
confidence = self.confidence_activation(price_direction_raw[:, 1:2]) # 0 to 1
|
||||
price_direction_pred = torch.cat([direction, confidence], dim=1) # [batch, 2]
|
||||
|
||||
price_values = self.price_pred_value(features_refined)
|
||||
|
||||
# Additional specialized predictions for enhanced accuracy
|
||||
@ -502,15 +488,14 @@ class EnhancedCNN(nn.Module):
|
||||
market_regime_pred = self.market_regime_head(features_refined)
|
||||
risk_pred = self.risk_head(features_refined)
|
||||
|
||||
# Package all price predictions into a single tensor (use immediate as primary)
|
||||
# For compatibility with DQN agent, we return price_immediate as the price prediction tensor
|
||||
price_pred_tensor = price_immediate
|
||||
# Use the price direction prediction directly (already [batch, 2])
|
||||
price_direction_tensor = price_direction_pred
|
||||
|
||||
# Package additional predictions into a single tensor (use volatility as primary)
|
||||
# For compatibility with DQN agent, we return volatility_pred as the advanced prediction tensor
|
||||
advanced_pred_tensor = volatility_pred
|
||||
|
||||
return q_values, extrema_pred, price_pred_tensor, features_refined, advanced_pred_tensor
|
||||
return q_values, extrema_pred, price_direction_tensor, features_refined, advanced_pred_tensor
|
||||
|
||||
def act(self, state, explore=True) -> Tuple[int, float, List[float]]:
|
||||
"""Enhanced action selection with ultra massive model predictions"""
|
||||
@ -528,7 +513,11 @@ class EnhancedCNN(nn.Module):
|
||||
state_tensor = state_tensor.unsqueeze(0)
|
||||
|
||||
with torch.no_grad():
|
||||
q_values, extrema_pred, price_predictions, features, advanced_predictions = self(state_tensor)
|
||||
q_values, extrema_pred, price_direction_predictions, features, advanced_predictions = self(state_tensor)
|
||||
|
||||
# Process price direction predictions
|
||||
if price_direction_predictions is not None:
|
||||
self.process_price_direction_predictions(price_direction_predictions)
|
||||
|
||||
# Apply softmax to get action probabilities
|
||||
action_probs_tensor = torch.softmax(q_values, dim=1)
|
||||
@ -565,6 +554,100 @@ class EnhancedCNN(nn.Module):
|
||||
logger.info(f" Risk Level: {risk_labels[risk_class]} ({risk[risk_class]:.3f})")
|
||||
|
||||
return action_idx, confidence, action_probs
|
||||
|
||||
def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]:
|
||||
"""
|
||||
Process price direction predictions and convert to standardized format
|
||||
|
||||
Args:
|
||||
price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence]
|
||||
|
||||
Returns:
|
||||
Dict with direction (-1 to 1) and confidence (0 to 1)
|
||||
"""
|
||||
try:
|
||||
if price_direction_pred is None or price_direction_pred.numel() == 0:
|
||||
return {}
|
||||
|
||||
# Extract direction and confidence values
|
||||
direction_value = float(price_direction_pred[0, 0].item()) # -1 to 1
|
||||
confidence_value = float(price_direction_pred[0, 1].item()) # 0 to 1
|
||||
|
||||
processed_directions = {
|
||||
'direction': direction_value,
|
||||
'confidence': confidence_value
|
||||
}
|
||||
|
||||
# Store for later access
|
||||
self.last_price_direction = processed_directions
|
||||
|
||||
return processed_directions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing price direction predictions: {e}")
|
||||
return {}
|
||||
|
||||
def get_price_direction_vector(self) -> Dict[str, float]:
|
||||
"""
|
||||
Get the current price direction and confidence
|
||||
|
||||
Returns:
|
||||
Dict with direction (-1 to 1) and confidence (0 to 1)
|
||||
"""
|
||||
return getattr(self, 'last_price_direction', {})
|
||||
|
||||
def get_price_direction_summary(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get a summary of price direction prediction
|
||||
|
||||
Returns:
|
||||
Dict containing direction and confidence information
|
||||
"""
|
||||
try:
|
||||
last_direction = getattr(self, 'last_price_direction', {})
|
||||
if not last_direction:
|
||||
return {
|
||||
'direction_value': 0.0,
|
||||
'confidence_value': 0.0,
|
||||
'direction_label': "SIDEWAYS",
|
||||
'discrete_direction': 0,
|
||||
'strength': 0.0,
|
||||
'weighted_strength': 0.0
|
||||
}
|
||||
|
||||
direction_value = last_direction['direction']
|
||||
confidence_value = last_direction['confidence']
|
||||
|
||||
# Convert to discrete direction
|
||||
if direction_value > 0.1:
|
||||
direction_label = "UP"
|
||||
discrete_direction = 1
|
||||
elif direction_value < -0.1:
|
||||
direction_label = "DOWN"
|
||||
discrete_direction = -1
|
||||
else:
|
||||
direction_label = "SIDEWAYS"
|
||||
discrete_direction = 0
|
||||
|
||||
return {
|
||||
'direction_value': float(direction_value),
|
||||
'confidence_value': float(confidence_value),
|
||||
'direction_label': direction_label,
|
||||
'discrete_direction': discrete_direction,
|
||||
'strength': abs(float(direction_value)),
|
||||
'weighted_strength': abs(float(direction_value)) * float(confidence_value)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating price direction summary: {e}")
|
||||
return {
|
||||
'direction_value': 0.0,
|
||||
'confidence_value': 0.0,
|
||||
'direction_label': "SIDEWAYS",
|
||||
'discrete_direction': 0,
|
||||
'strength': 0.0,
|
||||
'weighted_strength': 0.0
|
||||
}
|
||||
|
||||
def save(self, path):
|
||||
"""Save model weights and architecture"""
|
||||
|
@ -719,6 +719,95 @@ class TradingOrchestrator:
|
||||
except Exception as e:
|
||||
logger.error(f"Error initializing ML models: {e}")
|
||||
|
||||
def _calculate_cnn_price_direction_loss(self, price_direction_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Calculate price direction loss for CNN model
|
||||
|
||||
Args:
|
||||
price_direction_pred: Tensor of shape [batch, 2] containing [direction, confidence]
|
||||
rewards: Tensor of shape [batch] containing rewards
|
||||
actions: Tensor of shape [batch] containing actions
|
||||
|
||||
Returns:
|
||||
Price direction loss tensor
|
||||
"""
|
||||
try:
|
||||
if price_direction_pred.size(1) != 2:
|
||||
return None
|
||||
|
||||
batch_size = price_direction_pred.size(0)
|
||||
|
||||
# Extract direction and confidence predictions
|
||||
direction_pred = price_direction_pred[:, 0] # -1 to 1
|
||||
confidence_pred = price_direction_pred[:, 1] # 0 to 1
|
||||
|
||||
# Create targets based on rewards and actions
|
||||
with torch.no_grad():
|
||||
# Direction targets: 1 if reward > 0 and action is BUY, -1 if reward > 0 and action is SELL, 0 otherwise
|
||||
direction_targets = torch.zeros(batch_size, device=price_direction_pred.device)
|
||||
for i in range(batch_size):
|
||||
if rewards[i] > 0.01: # Positive reward threshold
|
||||
if actions[i] == 0: # BUY action
|
||||
direction_targets[i] = 1.0 # UP
|
||||
elif actions[i] == 1: # SELL action
|
||||
direction_targets[i] = -1.0 # DOWN
|
||||
# else: targets remain 0 (sideways)
|
||||
|
||||
# Confidence targets: based on reward magnitude (higher reward = higher confidence)
|
||||
confidence_targets = torch.abs(rewards).clamp(0, 1)
|
||||
|
||||
# Calculate losses for each component
|
||||
direction_loss = nn.MSELoss()(direction_pred, direction_targets)
|
||||
confidence_loss = nn.MSELoss()(confidence_pred, confidence_targets)
|
||||
|
||||
# Combined loss (direction is more important than confidence)
|
||||
total_loss = direction_loss + 0.3 * confidence_loss
|
||||
|
||||
return total_loss
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error calculating CNN price direction loss: {e}")
|
||||
return None
|
||||
|
||||
def _calculate_cnn_extrema_loss(self, extrema_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Calculate extrema loss for CNN model
|
||||
|
||||
Args:
|
||||
extrema_pred: Extrema predictions
|
||||
rewards: Tensor containing rewards
|
||||
actions: Tensor containing actions
|
||||
|
||||
Returns:
|
||||
Extrema loss tensor
|
||||
"""
|
||||
try:
|
||||
batch_size = extrema_pred.size(0)
|
||||
|
||||
# Create targets based on reward patterns
|
||||
with torch.no_grad():
|
||||
extrema_targets = torch.ones(batch_size, dtype=torch.long, device=extrema_pred.device) * 2 # Default to "neither"
|
||||
|
||||
for i in range(batch_size):
|
||||
# High positive reward suggests we're at a good entry point
|
||||
if rewards[i] > 0.05:
|
||||
if actions[i] == 0: # BUY action
|
||||
extrema_targets[i] = 0 # Bottom
|
||||
elif actions[i] == 1: # SELL action
|
||||
extrema_targets[i] = 1 # Top
|
||||
|
||||
# Calculate cross-entropy loss
|
||||
if extrema_pred.size(1) >= 3:
|
||||
extrema_loss = nn.CrossEntropyLoss()(extrema_pred[:, :3], extrema_targets)
|
||||
else:
|
||||
extrema_loss = nn.CrossEntropyLoss()(extrema_pred, extrema_targets)
|
||||
|
||||
return extrema_loss
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error calculating CNN extrema loss: {e}")
|
||||
return None
|
||||
|
||||
def update_model_loss(self, model_name: str, current_loss: float, best_loss: Optional[float] = None):
|
||||
"""Update model loss and potentially best loss"""
|
||||
if model_name in self.model_states:
|
||||
@ -1938,7 +2027,71 @@ class TradingOrchestrator:
|
||||
# Evaluate the previous prediction and train the model immediately
|
||||
await self._evaluate_and_train_on_record(inference_record, current_price)
|
||||
|
||||
logger.info(f"Completed immediate training for {model_name}")
|
||||
# Log predicted vs actual outcome
|
||||
prediction = inference_record.get('prediction', {})
|
||||
predicted_action = prediction.get('action', 'UNKNOWN')
|
||||
predicted_confidence = prediction.get('confidence', 0.0)
|
||||
|
||||
# Calculate actual outcome
|
||||
symbol = inference_record.get('symbol', 'ETH/USDT')
|
||||
predicted_price = None
|
||||
actual_price_change_pct = 0.0
|
||||
|
||||
# Try to get price direction vectors from metadata (new format)
|
||||
if 'price_direction' in prediction and prediction['price_direction']:
|
||||
try:
|
||||
price_direction_data = prediction['price_direction']
|
||||
# Process price direction data
|
||||
if isinstance(price_direction_data, dict) and 'direction' in price_direction_data:
|
||||
direction = price_direction_data['direction']
|
||||
confidence = price_direction_data.get('confidence', 1.0)
|
||||
|
||||
# Convert direction to price change percentage
|
||||
# Scale by confidence and direction strength
|
||||
predicted_price_change_pct = direction * confidence * 0.02 # 2% max change
|
||||
predicted_price = current_price * (1 + predicted_price_change_pct)
|
||||
except Exception as e:
|
||||
logger.debug(f"Error processing price direction data: {e}")
|
||||
|
||||
# Fallback to old price prediction format
|
||||
elif 'price_prediction' in prediction and prediction['price_prediction']:
|
||||
try:
|
||||
price_prediction_data = prediction['price_prediction']
|
||||
if isinstance(price_prediction_data, list) and len(price_prediction_data) > 0:
|
||||
predicted_price_change_pct = float(price_prediction_data[0]) * 0.01
|
||||
predicted_price = current_price * (1 + predicted_price_change_pct)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Calculate price change
|
||||
if predicted_price is not None:
|
||||
actual_price_change_pct = (current_price - predicted_price) / predicted_price * 100
|
||||
price_outcome = f"Predicted: ${predicted_price:.2f} -> Actual: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)"
|
||||
else:
|
||||
# Fall back to historical price comparison
|
||||
historical_data = self.data_provider.get_historical_data(symbol, '1m', limit=10)
|
||||
if historical_data is not None and not historical_data.empty:
|
||||
historical_price = historical_data['close'].iloc[-1]
|
||||
actual_price_change_pct = (current_price - historical_price) / historical_price * 100
|
||||
price_outcome = f"Historical: ${historical_price:.2f} -> Actual: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)"
|
||||
else:
|
||||
price_outcome = f"Actual: ${current_price:.2f}"
|
||||
|
||||
# Determine if prediction was correct based on action and price movement
|
||||
was_correct = False
|
||||
if predicted_action == 'BUY' and actual_price_change_pct > 0.1: # Price went up
|
||||
was_correct = True
|
||||
elif predicted_action == 'SELL' and actual_price_change_pct < -0.1: # Price went down
|
||||
was_correct = True
|
||||
elif predicted_action == 'HOLD' and abs(actual_price_change_pct) < 0.5: # Price stayed stable
|
||||
was_correct = True
|
||||
|
||||
outcome_status = "✅ CORRECT" if was_correct else "❌ INCORRECT"
|
||||
|
||||
logger.info(f"Completed immediate training for {model_name} - {outcome_status}")
|
||||
logger.info(f" Prediction: {predicted_action} ({predicted_confidence:.3f})")
|
||||
logger.info(f" {price_outcome}")
|
||||
logger.info(f" Outcome: {outcome_status}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in immediate training for {model_name}: {e}")
|
||||
@ -2412,12 +2565,33 @@ class TradingOrchestrator:
|
||||
self.cnn_optimizer.zero_grad()
|
||||
|
||||
# Forward pass
|
||||
q_values, extrema_pred, price_pred, features_refined, advanced_pred = self.cnn_model(features_tensor)
|
||||
q_values, extrema_pred, price_direction_pred, features_refined, advanced_pred = self.cnn_model(features_tensor)
|
||||
|
||||
# Calculate loss
|
||||
# Calculate primary Q-value loss
|
||||
q_values_selected = q_values.gather(1, action_tensor.unsqueeze(1)).squeeze(1)
|
||||
target_q = reward_tensor # Simplified target
|
||||
loss = nn.MSELoss()(q_values_selected, target_q)
|
||||
q_loss = nn.MSELoss()(q_values_selected, target_q)
|
||||
|
||||
# Calculate auxiliary losses for price direction and extrema
|
||||
total_loss = q_loss
|
||||
|
||||
# Price direction loss
|
||||
if price_direction_pred is not None and price_direction_pred.shape[0] > 0:
|
||||
price_direction_loss = self._calculate_cnn_price_direction_loss(
|
||||
price_direction_pred, reward_tensor, action_tensor
|
||||
)
|
||||
if price_direction_loss is not None:
|
||||
total_loss = total_loss + 0.2 * price_direction_loss
|
||||
|
||||
# Extrema loss
|
||||
if extrema_pred is not None and extrema_pred.shape[0] > 0:
|
||||
extrema_loss = self._calculate_cnn_extrema_loss(
|
||||
extrema_pred, reward_tensor, action_tensor
|
||||
)
|
||||
if extrema_loss is not None:
|
||||
total_loss = total_loss + 0.1 * extrema_loss
|
||||
|
||||
loss = total_loss
|
||||
|
||||
# Backward pass
|
||||
training_start_time = time.time()
|
||||
@ -2640,9 +2814,17 @@ class TradingOrchestrator:
|
||||
'HOLD': float(action_probs[0, 2].item())
|
||||
}
|
||||
|
||||
# Extract price predictions if available
|
||||
price_prediction = None
|
||||
# Extract price direction predictions if available
|
||||
price_direction_data = None
|
||||
if price_pred is not None:
|
||||
# Process price direction predictions
|
||||
if hasattr(model.model, 'process_price_direction_predictions'):
|
||||
try:
|
||||
price_direction_data = model.model.process_price_direction_predictions(price_pred)
|
||||
except Exception as e:
|
||||
logger.debug(f"Error processing CNN price direction: {e}")
|
||||
|
||||
# Fallback to old format for compatibility
|
||||
price_prediction = price_pred.squeeze(0).cpu().numpy().tolist()
|
||||
|
||||
prediction = Prediction(
|
||||
@ -2656,6 +2838,7 @@ class TradingOrchestrator:
|
||||
'feature_size': len(base_data.get_feature_vector()),
|
||||
'data_sources': ['ohlcv_1s', 'ohlcv_1m', 'ohlcv_1h', 'ohlcv_1d', 'btc', 'cob', 'indicators'],
|
||||
'price_prediction': price_prediction,
|
||||
'price_direction': price_direction_data,
|
||||
'extrema_prediction': extrema_pred.squeeze(0).cpu().numpy().tolist() if extrema_pred is not None else None
|
||||
}
|
||||
)
|
||||
@ -2694,6 +2877,14 @@ class TradingOrchestrator:
|
||||
action_names = ['BUY', 'SELL', 'HOLD'] # Note: enhanced_cnn uses this order
|
||||
best_action = action_names[action_idx]
|
||||
|
||||
# Get price direction vectors from CNN model if available
|
||||
price_direction_data = None
|
||||
if hasattr(model.model, 'get_price_direction_vector'):
|
||||
try:
|
||||
price_direction_data = model.model.get_price_direction_vector()
|
||||
except Exception as e:
|
||||
logger.debug(f"Error getting price direction from CNN: {e}")
|
||||
|
||||
pred = Prediction(
|
||||
action=best_action,
|
||||
confidence=float(confidence),
|
||||
@ -2708,7 +2899,8 @@ class TradingOrchestrator:
|
||||
metadata={
|
||||
'feature_vector_size': len(feature_vector),
|
||||
'unified_input': True,
|
||||
'fallback_method': 'direct_model_inference'
|
||||
'fallback_method': 'direct_model_inference',
|
||||
'price_direction': price_direction_data
|
||||
}
|
||||
)
|
||||
predictions.append(pred)
|
||||
@ -2811,6 +3003,14 @@ class TradingOrchestrator:
|
||||
if q_values_for_capture:
|
||||
logger.warning(f"Q-values length mismatch: expected {len(action_names)}, got {len(q_values_for_capture)}. Using default probabilities.")
|
||||
|
||||
# Get price direction vectors from DQN model if available
|
||||
price_direction_data = None
|
||||
if hasattr(model.model, 'get_price_direction_vector'):
|
||||
try:
|
||||
price_direction_data = model.model.get_price_direction_vector()
|
||||
except Exception as e:
|
||||
logger.debug(f"Error getting price direction from DQN: {e}")
|
||||
|
||||
prediction = Prediction(
|
||||
action=action,
|
||||
confidence=float(confidence),
|
||||
@ -2818,7 +3018,10 @@ class TradingOrchestrator:
|
||||
timeframe='mixed', # RL uses mixed timeframes
|
||||
timestamp=datetime.now(),
|
||||
model_name=model.name,
|
||||
metadata={'state_size': len(state)}
|
||||
metadata={
|
||||
'state_size': len(state),
|
||||
'price_direction': price_direction_data
|
||||
}
|
||||
)
|
||||
|
||||
# Capture DQN prediction for dashboard visualization
|
||||
|
Reference in New Issue
Block a user