inference_enabled, cleanup
This commit is contained in:
@ -3369,12 +3369,17 @@ class TradingOrchestrator:
|
||||
)
|
||||
logger.info(f" Outcome: {outcome_status}")
|
||||
|
||||
# Add performance summary
|
||||
# Add comprehensive performance summary
|
||||
if model_name in self.model_performance:
|
||||
perf = self.model_performance[model_name]
|
||||
logger.info(
|
||||
f" Performance: {perf['accuracy']:.1%} ({perf['correct']}/{perf['total']})"
|
||||
f" Performance: {perf['directional_accuracy']:.1%} directional ({perf['directional_correct']}/{perf['total']}) | "
|
||||
f"{perf['accuracy']:.1%} profitable ({perf['correct']}/{perf['total']})"
|
||||
)
|
||||
if perf["pivot_attempted"] > 0:
|
||||
logger.info(
|
||||
f" Pivot Detection: {perf['pivot_accuracy']:.1%} ({perf['pivot_detected']}/{perf['pivot_attempted']})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in immediate training for {model_name}: {e}")
|
||||
@ -3453,32 +3458,62 @@ class TradingOrchestrator:
|
||||
predicted_price_vector=predicted_price_vector,
|
||||
)
|
||||
|
||||
# Update model performance tracking
|
||||
# Initialize enhanced model performance tracking
|
||||
if model_name not in self.model_performance:
|
||||
self.model_performance[model_name] = {
|
||||
"correct": 0,
|
||||
"correct": 0, # Profitability accuracy (backwards compatible)
|
||||
"total": 0,
|
||||
"accuracy": 0.0,
|
||||
"accuracy": 0.0, # Profitability accuracy (backwards compatible)
|
||||
"directional_correct": 0, # NEW: Directional accuracy
|
||||
"directional_accuracy": 0.0, # NEW: Directional accuracy %
|
||||
"pivot_detected": 0, # NEW: Successful pivot detections
|
||||
"pivot_attempted": 0, # NEW: Total pivot attempts
|
||||
"pivot_accuracy": 0.0, # NEW: Pivot detection accuracy
|
||||
"price_predictions": {"total": 0, "accurate": 0, "avg_error": 0.0},
|
||||
}
|
||||
|
||||
# Ensure all new keys exist (for existing models)
|
||||
perf = self.model_performance[model_name]
|
||||
if "directional_correct" not in perf:
|
||||
perf["directional_correct"] = 0
|
||||
perf["directional_accuracy"] = 0.0
|
||||
perf["pivot_detected"] = 0
|
||||
perf["pivot_attempted"] = 0
|
||||
perf["pivot_accuracy"] = 0.0
|
||||
|
||||
# Ensure price_predictions key exists
|
||||
if "price_predictions" not in self.model_performance[model_name]:
|
||||
self.model_performance[model_name]["price_predictions"] = {
|
||||
"total": 0,
|
||||
"accurate": 0,
|
||||
"avg_error": 0.0,
|
||||
}
|
||||
if "price_predictions" not in perf:
|
||||
perf["price_predictions"] = {"total": 0, "accurate": 0, "avg_error": 0.0}
|
||||
|
||||
self.model_performance[model_name]["total"] += 1
|
||||
if was_correct:
|
||||
self.model_performance[model_name]["correct"] += 1
|
||||
|
||||
self.model_performance[model_name]["accuracy"] = (
|
||||
self.model_performance[model_name]["correct"]
|
||||
/ self.model_performance[model_name]["total"]
|
||||
# Calculate directional accuracy separately
|
||||
directional_correct = (
|
||||
(predicted_action == "BUY" and price_change_pct > 0) or
|
||||
(predicted_action == "SELL" and price_change_pct < 0) or
|
||||
(predicted_action == "HOLD" and abs(price_change_pct) < 0.05)
|
||||
)
|
||||
|
||||
# Update all accuracy metrics
|
||||
perf["total"] += 1
|
||||
if was_correct: # Profitability accuracy
|
||||
perf["correct"] += 1
|
||||
if directional_correct:
|
||||
perf["directional_correct"] += 1
|
||||
|
||||
# Update pivot detection tracking
|
||||
is_significant_move = abs(price_change_pct) > 0.08 # 0.08% threshold for "significant"
|
||||
if predicted_action in ["BUY", "SELL"] and is_significant_move:
|
||||
perf["pivot_attempted"] += 1
|
||||
if directional_correct:
|
||||
perf["pivot_detected"] += 1
|
||||
|
||||
# Calculate all accuracy percentages
|
||||
perf["accuracy"] = perf["correct"] / perf["total"] # Profitability accuracy
|
||||
perf["directional_accuracy"] = perf["directional_correct"] / perf["total"] # Directional accuracy
|
||||
if perf["pivot_attempted"] > 0:
|
||||
perf["pivot_accuracy"] = perf["pivot_detected"] / perf["pivot_attempted"] # Pivot accuracy
|
||||
else:
|
||||
perf["pivot_accuracy"] = 0.0
|
||||
|
||||
# Track price prediction accuracy if available
|
||||
if inference_price is not None:
|
||||
price_prediction_stats = self.model_performance[model_name][
|
||||
@ -3504,7 +3539,8 @@ class TradingOrchestrator:
|
||||
f"({price_prediction_stats['avg_error']:.2f}% avg error)"
|
||||
)
|
||||
|
||||
# Enhanced logging for training evaluation
|
||||
# Enhanced logging with new accuracy metrics
|
||||
perf = self.model_performance[model_name]
|
||||
logger.info(f"Training evaluation for {model_name}:")
|
||||
logger.info(
|
||||
f" Action: {predicted_action} | Confidence: {prediction_confidence:.3f}"
|
||||
@ -3512,10 +3548,15 @@ class TradingOrchestrator:
|
||||
logger.info(
|
||||
f" Price change: {price_change_pct:+.3f}% | Time: {time_diff_seconds:.1f}s"
|
||||
)
|
||||
logger.info(f" Reward: {reward:.4f} | Correct: {was_correct}")
|
||||
logger.info(f" Reward: {reward:.4f} | Profitable: {was_correct} | Directional: {directional_correct}")
|
||||
logger.info(
|
||||
f" Accuracy: {self.model_performance[model_name]['accuracy']:.1%} ({self.model_performance[model_name]['correct']}/{self.model_performance[model_name]['total']})"
|
||||
f" Profitability: {perf['accuracy']:.1%} ({perf['correct']}/{perf['total']}) | "
|
||||
f"Directional: {perf['directional_accuracy']:.1%} ({perf['directional_correct']}/{perf['total']})"
|
||||
)
|
||||
if perf["pivot_attempted"] > 0:
|
||||
logger.info(
|
||||
f" Pivot Detection: {perf['pivot_accuracy']:.1%} ({perf['pivot_detected']}/{perf['pivot_attempted']})"
|
||||
)
|
||||
|
||||
# Train the specific model based on sophisticated outcome
|
||||
await self._train_model_on_outcome(
|
||||
@ -3549,6 +3590,45 @@ class TradingOrchestrator:
|
||||
except Exception as e:
|
||||
logger.error(f"Error evaluating and training on record: {e}")
|
||||
|
||||
def _is_pivot_point(self, price_change_pct: float, prediction_confidence: float, time_diff_minutes: float) -> tuple[bool, str, float]:
|
||||
"""
|
||||
Detect if this is a significant pivot point worth trading.
|
||||
Pivot points are the key moments where markets change direction or momentum.
|
||||
|
||||
Returns:
|
||||
tuple: (is_pivot, pivot_type, pivot_strength)
|
||||
"""
|
||||
abs_change = abs(price_change_pct)
|
||||
|
||||
# Pivot point thresholds (much more realistic for crypto)
|
||||
minor_pivot = 0.08 # 0.08% - small but tradeable pivot
|
||||
medium_pivot = 0.25 # 0.25% - significant pivot
|
||||
major_pivot = 0.6 # 0.6% - major pivot
|
||||
massive_pivot = 1.2 # 1.2% - massive pivot
|
||||
|
||||
# Time-based multipliers (faster pivots are more valuable)
|
||||
time_multiplier = 1.0
|
||||
if time_diff_minutes < 2.0: # Very fast pivot
|
||||
time_multiplier = 2.0
|
||||
elif time_diff_minutes < 5.0: # Fast pivot
|
||||
time_multiplier = 1.5
|
||||
elif time_diff_minutes > 15.0: # Slow pivot - less valuable
|
||||
time_multiplier = 0.7
|
||||
|
||||
# Confidence multiplier (high confidence pivots are more valuable)
|
||||
confidence_multiplier = 0.5 + (prediction_confidence * 1.5) # 0.5 to 2.0
|
||||
|
||||
if abs_change >= massive_pivot:
|
||||
return True, "MASSIVE_PIVOT", 10.0 * time_multiplier * confidence_multiplier
|
||||
elif abs_change >= major_pivot:
|
||||
return True, "MAJOR_PIVOT", 5.0 * time_multiplier * confidence_multiplier
|
||||
elif abs_change >= medium_pivot:
|
||||
return True, "MEDIUM_PIVOT", 2.5 * time_multiplier * confidence_multiplier
|
||||
elif abs_change >= minor_pivot:
|
||||
return True, "MINOR_PIVOT", 1.2 * time_multiplier * confidence_multiplier
|
||||
else:
|
||||
return False, "NO_PIVOT", 0.1 # Very small reward for noise
|
||||
|
||||
def _calculate_sophisticated_reward(
|
||||
self,
|
||||
predicted_action: str,
|
||||
@ -3562,11 +3642,19 @@ class TradingOrchestrator:
|
||||
predicted_price_vector: dict = None,
|
||||
) -> tuple[float, bool]:
|
||||
"""
|
||||
Calculate sophisticated reward based on prediction accuracy, confidence, and price movement magnitude
|
||||
Now considers position status and current P&L when evaluating decisions
|
||||
NOISE REDUCTION: Treats neutral/low-confidence signals as HOLD to reduce training noise
|
||||
PRICE VECTOR BONUS: Rewards accurate price direction and magnitude predictions
|
||||
|
||||
PIVOT-POINT FOCUSED REWARD SYSTEM
|
||||
|
||||
This system heavily rewards models for correctly identifying pivot points -
|
||||
the actual profitable trading opportunities in the market. Small movements
|
||||
are treated as noise and given minimal rewards.
|
||||
|
||||
Key Features:
|
||||
- Separate directional accuracy vs profitability accuracy tracking
|
||||
- Heavy rewards for successful pivot point detection
|
||||
- Minimal penalties for noise (small movements)
|
||||
- Time-weighted rewards (faster detection = better)
|
||||
- Confidence-weighted rewards (higher confidence = better)
|
||||
|
||||
Args:
|
||||
predicted_action: The predicted action ('BUY', 'SELL', 'HOLD')
|
||||
prediction_confidence: Model's confidence in the prediction (0.0 to 1.0)
|
||||
@ -3579,21 +3667,36 @@ class TradingOrchestrator:
|
||||
predicted_price_vector: Dict with 'direction' (-1 to 1) and 'confidence' (0 to 1)
|
||||
|
||||
Returns:
|
||||
tuple: (reward, was_correct)
|
||||
tuple: (reward, directional_correct, profitability_correct, pivot_detected)
|
||||
"""
|
||||
try:
|
||||
# NOISE REDUCTION: Treat low-confidence signals as HOLD
|
||||
confidence_threshold = 0.6 # Only consider BUY/SELL if confidence > 60%
|
||||
if prediction_confidence < confidence_threshold:
|
||||
predicted_action = "HOLD"
|
||||
logger.debug(f"Low confidence ({prediction_confidence:.2f}) - treating as HOLD for noise reduction")
|
||||
# Store original action for directional accuracy tracking
|
||||
original_action = predicted_action
|
||||
|
||||
# FEE-AWARE THRESHOLDS: Account for trading fees (0.05-0.06% per trade, ~0.12% round trip)
|
||||
fee_cost = 0.12 # 0.12% round trip fee cost
|
||||
movement_threshold = 0.15 # Minimum movement to be profitable after fees
|
||||
strong_movement_threshold = 0.5 # Strong movements - good profit potential
|
||||
rapid_movement_threshold = 1.0 # Rapid movements - excellent profit potential
|
||||
massive_movement_threshold = 2.0 # Massive movements - extraordinary profit potential
|
||||
# PIVOT POINT DETECTION
|
||||
is_pivot, pivot_type, pivot_strength = self._is_pivot_point(
|
||||
price_change_pct, prediction_confidence, time_diff_minutes
|
||||
)
|
||||
|
||||
# DIRECTIONAL ACCURACY (simple direction prediction)
|
||||
directional_correct = False
|
||||
if predicted_action == "BUY" and price_change_pct > 0:
|
||||
directional_correct = True
|
||||
elif predicted_action == "SELL" and price_change_pct < 0:
|
||||
directional_correct = True
|
||||
elif predicted_action == "HOLD" and abs(price_change_pct) < 0.05: # Very small movement
|
||||
directional_correct = True
|
||||
|
||||
# PROFITABILITY ACCURACY (fee-aware profitable trades)
|
||||
fee_cost = 0.10 # 0.10% round trip fee cost (realistic for most exchanges)
|
||||
profitability_correct = False
|
||||
|
||||
if predicted_action == "BUY" and price_change_pct > fee_cost:
|
||||
profitability_correct = True
|
||||
elif predicted_action == "SELL" and price_change_pct < -fee_cost:
|
||||
profitability_correct = True
|
||||
elif predicted_action == "HOLD" and abs(price_change_pct) < fee_cost:
|
||||
profitability_correct = True
|
||||
|
||||
# Determine current position status if not provided
|
||||
if has_position is None and symbol:
|
||||
@ -3604,210 +3707,104 @@ class TradingOrchestrator:
|
||||
elif has_position is None:
|
||||
has_position = False
|
||||
|
||||
# Determine if prediction was directionally correct
|
||||
was_correct = False
|
||||
directional_accuracy = 0.0
|
||||
|
||||
if predicted_action == "BUY":
|
||||
# BUY signals need to overcome fee costs for profitability
|
||||
was_correct = price_change_pct > movement_threshold
|
||||
# PIVOT POINT REWARD CALCULATION
|
||||
base_reward = 0.0
|
||||
pivot_bonus = 0.0
|
||||
|
||||
# For backwards compatibility, use profitability_correct as the main "was_correct"
|
||||
was_correct = profitability_correct
|
||||
|
||||
# MASSIVE REWARDS FOR SUCCESSFUL PIVOT POINT DETECTION
|
||||
if is_pivot and directional_correct:
|
||||
# Base pivot reward
|
||||
base_reward = pivot_strength
|
||||
|
||||
# ENHANCED FEE-AWARE REWARD STRUCTURE
|
||||
if price_change_pct > massive_movement_threshold:
|
||||
# Massive movements (2%+) - EXTRAORDINARY rewards for high confidence
|
||||
directional_accuracy = price_change_pct * 5.0 # 5x multiplier for massive moves
|
||||
if prediction_confidence > 0.8:
|
||||
directional_accuracy *= 2.0 # Additional 2x for high confidence (10x total)
|
||||
elif price_change_pct > rapid_movement_threshold:
|
||||
# Rapid movements (1%+) - EXCELLENT rewards for high confidence
|
||||
directional_accuracy = price_change_pct * 3.0 # 3x multiplier for rapid moves
|
||||
if prediction_confidence > 0.7:
|
||||
directional_accuracy *= 1.5 # Additional 1.5x for good confidence (4.5x total)
|
||||
elif price_change_pct > strong_movement_threshold:
|
||||
# Strong movements (0.5%+) - GOOD rewards
|
||||
directional_accuracy = price_change_pct * 2.0 # 2x multiplier for strong moves
|
||||
else:
|
||||
# Small movements - minimal rewards (fees eat most profit)
|
||||
directional_accuracy = max(0, (price_change_pct - fee_cost)) * 0.5 # Penalty for fee cost
|
||||
# EXTRAORDINARY bonuses for successful pivot predictions
|
||||
if pivot_type == "MASSIVE_PIVOT":
|
||||
pivot_bonus = 50.0 * prediction_confidence # Up to 50x reward!
|
||||
logger.info(f"MASSIVE PIVOT SUCCESS: {pivot_type} detected with {prediction_confidence:.2f} confidence = {pivot_bonus:.1f}x bonus!")
|
||||
elif pivot_type == "MAJOR_PIVOT":
|
||||
pivot_bonus = 20.0 * prediction_confidence # Up to 20x reward!
|
||||
logger.info(f"MAJOR PIVOT SUCCESS: {pivot_type} detected with {prediction_confidence:.2f} confidence = {pivot_bonus:.1f}x bonus!")
|
||||
elif pivot_type == "MEDIUM_PIVOT":
|
||||
pivot_bonus = 8.0 * prediction_confidence # Up to 8x reward!
|
||||
logger.info(f"MEDIUM PIVOT SUCCESS: {pivot_type} detected with {prediction_confidence:.2f} confidence = {pivot_bonus:.1f}x bonus!")
|
||||
elif pivot_type == "MINOR_PIVOT":
|
||||
pivot_bonus = 3.0 * prediction_confidence # Up to 3x reward!
|
||||
logger.info(f"MINOR PIVOT SUCCESS: {pivot_type} detected with {prediction_confidence:.2f} confidence = {pivot_bonus:.1f}x bonus!")
|
||||
|
||||
elif predicted_action == "SELL":
|
||||
# SELL signals need to overcome fee costs for profitability
|
||||
was_correct = price_change_pct < -movement_threshold
|
||||
# Additional time-based bonus for early detection
|
||||
if time_diff_minutes < 1.0:
|
||||
time_bonus = pivot_bonus * 0.5 # 50% bonus for very fast detection
|
||||
pivot_bonus += time_bonus
|
||||
logger.info(f"EARLY DETECTION BONUS: Detected {pivot_type} in {time_diff_minutes:.1f}m = +{time_bonus:.1f} bonus")
|
||||
|
||||
base_reward += pivot_bonus
|
||||
|
||||
elif is_pivot and not directional_correct:
|
||||
# MODERATE penalty for missing pivot points (still valuable to learn from)
|
||||
base_reward = -pivot_strength * 0.3 # Small penalty to encourage learning
|
||||
logger.debug(f"MISSED PIVOT: {pivot_type} missed, small penalty = {base_reward:.2f}")
|
||||
|
||||
elif not is_pivot and directional_correct:
|
||||
# Small reward for correct direction on non-pivots (noise)
|
||||
base_reward = 0.2 * prediction_confidence
|
||||
logger.debug(f"NOISE CORRECT: Correct direction on noise movement = {base_reward:.2f}")
|
||||
|
||||
# ENHANCED FEE-AWARE REWARD STRUCTURE (symmetric to BUY)
|
||||
abs_change = abs(price_change_pct)
|
||||
if abs_change > massive_movement_threshold:
|
||||
# Massive movements (2%+) - EXTRAORDINARY rewards for high confidence
|
||||
directional_accuracy = abs_change * 5.0 # 5x multiplier for massive moves
|
||||
if prediction_confidence > 0.8:
|
||||
directional_accuracy *= 2.0 # Additional 2x for high confidence (10x total)
|
||||
elif abs_change > rapid_movement_threshold:
|
||||
# Rapid movements (1%+) - EXCELLENT rewards for high confidence
|
||||
directional_accuracy = abs_change * 3.0 # 3x multiplier for rapid moves
|
||||
if prediction_confidence > 0.7:
|
||||
directional_accuracy *= 1.5 # Additional 1.5x for good confidence (4.5x total)
|
||||
elif abs_change > strong_movement_threshold:
|
||||
# Strong movements (0.5%+) - GOOD rewards
|
||||
directional_accuracy = abs_change * 2.0 # 2x multiplier for strong moves
|
||||
else:
|
||||
# Small movements - minimal rewards (fees eat most profit)
|
||||
directional_accuracy = max(0, (abs_change - fee_cost)) * 0.5 # Penalty for fee cost
|
||||
|
||||
elif predicted_action == "HOLD":
|
||||
# HOLD evaluation with noise reduction - smaller rewards to reduce training noise
|
||||
if has_position:
|
||||
# If we have a position, HOLD evaluation depends on P&L and price movement
|
||||
if current_position_pnl > 0: # Currently profitable position
|
||||
# Holding a profitable position is good if price continues favorably
|
||||
if price_change_pct > 0: # Price went up while holding profitable position - excellent
|
||||
was_correct = True
|
||||
directional_accuracy = price_change_pct * 0.8 # Reduced from 1.5 to reduce noise
|
||||
elif abs(price_change_pct) < movement_threshold: # Price stable - good
|
||||
was_correct = True
|
||||
directional_accuracy = movement_threshold * 0.5 # Reduced reward to reduce noise
|
||||
else: # Price dropped while holding profitable position - still okay but less reward
|
||||
was_correct = True
|
||||
directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.3)
|
||||
elif current_position_pnl < 0: # Currently losing position
|
||||
# Holding a losing position is generally bad - should consider closing
|
||||
if price_change_pct > movement_threshold: # Price recovered - good hold
|
||||
was_correct = True
|
||||
directional_accuracy = price_change_pct * 0.6 # Reduced reward
|
||||
else: # Price continued down or stayed flat - bad hold
|
||||
was_correct = False
|
||||
# Penalty proportional to loss magnitude
|
||||
directional_accuracy = abs(current_position_pnl / 100.0) * 0.3 # Reduced penalty
|
||||
else: # Breakeven position
|
||||
# Standard HOLD evaluation for breakeven positions
|
||||
if abs(price_change_pct) < movement_threshold: # Price stable - good
|
||||
was_correct = True
|
||||
directional_accuracy = movement_threshold * 0.4 # Reduced reward
|
||||
else: # Price moved significantly - missed opportunity
|
||||
was_correct = False
|
||||
directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.5
|
||||
else:
|
||||
# If we don't have a position, HOLD is correct if price stayed relatively stable
|
||||
was_correct = abs(price_change_pct) < movement_threshold
|
||||
directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.4 # Reduced reward
|
||||
|
||||
# Calculate FEE-AWARE magnitude-based multiplier (aggressive rewards for profitable movements)
|
||||
abs_movement = abs(price_change_pct)
|
||||
if abs_movement > massive_movement_threshold:
|
||||
magnitude_multiplier = min(abs_movement / 1.0, 8.0) # Up to 8x for massive moves (8% = 8x)
|
||||
elif abs_movement > rapid_movement_threshold:
|
||||
magnitude_multiplier = min(abs_movement / 1.5, 4.0) # Up to 4x for rapid moves (6% = 4x)
|
||||
elif abs_movement > strong_movement_threshold:
|
||||
magnitude_multiplier = min(abs_movement / 2.0, 2.0) # Up to 2x for strong moves (4% = 2x)
|
||||
else:
|
||||
# Small movements get minimal multiplier due to fees
|
||||
magnitude_multiplier = max(0.1, (abs_movement - fee_cost) / 2.0) # Penalty for fee cost
|
||||
|
||||
# Calculate confidence-based reward adjustment
|
||||
if was_correct:
|
||||
# Reward confident correct predictions more, penalize unconfident correct predictions less
|
||||
confidence_multiplier = 0.5 + (
|
||||
prediction_confidence * 1.5
|
||||
) # Range: 0.5 to 2.0
|
||||
base_reward = (
|
||||
directional_accuracy * magnitude_multiplier * confidence_multiplier
|
||||
# Very small penalty for wrong direction on noise (don't overtrain on noise)
|
||||
base_reward = -0.1 * prediction_confidence
|
||||
logger.debug(f"NOISE INCORRECT: Wrong direction on noise movement = {base_reward:.2f}")
|
||||
|
||||
# POSITION-AWARE ADJUSTMENTS
|
||||
if has_position:
|
||||
# Adjust rewards based on current position status
|
||||
if current_position_pnl > 0.5: # Profitable position
|
||||
if predicted_action == "HOLD" and price_change_pct > 0:
|
||||
base_reward += 0.5 # Bonus for holding profitable position during uptrend
|
||||
logger.debug(f"POSITION BONUS: Holding profitable position during uptrend = +0.5")
|
||||
elif current_position_pnl < -0.5: # Losing position
|
||||
if predicted_action in ["BUY", "SELL"] and directional_correct:
|
||||
base_reward += 0.3 # Bonus for taking action to exit losing position
|
||||
logger.debug(f"EXIT BONUS: Taking action on losing position = +0.3")
|
||||
|
||||
# PRICE VECTOR BONUS (if available)
|
||||
if predicted_price_vector and isinstance(predicted_price_vector, dict):
|
||||
vector_bonus = self._calculate_price_vector_bonus(
|
||||
predicted_price_vector, price_change_pct, abs(price_change_pct), prediction_confidence
|
||||
)
|
||||
if vector_bonus > 0:
|
||||
base_reward += vector_bonus
|
||||
logger.debug(f"PRICE VECTOR BONUS: +{vector_bonus:.3f}")
|
||||
|
||||
# ENHANCED HIGH-CONFIDENCE BONUSES for profitable movements
|
||||
abs_movement = abs(price_change_pct)
|
||||
|
||||
# Extraordinary confidence bonus for massive movements
|
||||
if prediction_confidence > 0.9 and abs_movement > massive_movement_threshold:
|
||||
base_reward *= 3.0 # 300% bonus for ultra-confident massive moves
|
||||
logger.info(f"ULTRA CONFIDENCE BONUS: {prediction_confidence:.2f} confidence + {abs_movement:.2f}% movement = 3x reward")
|
||||
|
||||
# Excellent confidence bonus for rapid movements
|
||||
elif prediction_confidence > 0.8 and abs_movement > rapid_movement_threshold:
|
||||
base_reward *= 2.0 # 200% bonus for very confident rapid moves
|
||||
logger.info(f"HIGH CONFIDENCE BONUS: {prediction_confidence:.2f} confidence + {abs_movement:.2f}% movement = 2x reward")
|
||||
|
||||
# Good confidence bonus for strong movements
|
||||
elif prediction_confidence > 0.7 and abs_movement > strong_movement_threshold:
|
||||
base_reward *= 1.5 # 150% bonus for confident strong moves
|
||||
logger.info(f"CONFIDENCE BONUS: {prediction_confidence:.2f} confidence + {abs_movement:.2f}% movement = 1.5x reward")
|
||||
|
||||
# Rapid movement detection bonus (speed matters for fees)
|
||||
if time_diff_minutes < 5.0 and abs_movement > rapid_movement_threshold:
|
||||
base_reward *= 1.3 # 30% bonus for rapid detection of big moves
|
||||
logger.info(f"RAPID DETECTION BONUS: {abs_movement:.2f}% movement in {time_diff_minutes:.1f}m = 1.3x reward")
|
||||
|
||||
# PRICE VECTOR ACCURACY BONUS - Reward models for accurate price direction/magnitude predictions
|
||||
if predicted_price_vector and isinstance(predicted_price_vector, dict):
|
||||
vector_bonus = self._calculate_price_vector_bonus(
|
||||
predicted_price_vector, price_change_pct, abs_movement, prediction_confidence
|
||||
)
|
||||
if vector_bonus > 0:
|
||||
base_reward += vector_bonus
|
||||
logger.info(f"PRICE VECTOR BONUS: +{vector_bonus:.3f} for accurate direction/magnitude prediction")
|
||||
|
||||
else:
|
||||
# ENHANCED PENALTY SYSTEM: Discourage fee-losing trades
|
||||
abs_movement = abs(price_change_pct)
|
||||
|
||||
# Penalize incorrect predictions more severely if they were confident
|
||||
confidence_penalty = 0.5 + (prediction_confidence * 1.5) # Higher confidence = higher penalty
|
||||
base_penalty = abs_movement * confidence_penalty
|
||||
|
||||
# SEVERE penalties for confident wrong predictions on big moves
|
||||
if prediction_confidence > 0.8 and abs_movement > rapid_movement_threshold:
|
||||
base_penalty *= 5.0 # 5x penalty for very confident wrong on big moves
|
||||
logger.warning(f"SEVERE PENALTY: {prediction_confidence:.2f} confidence wrong on {abs_movement:.2f}% movement = 5x penalty")
|
||||
elif prediction_confidence > 0.7 and abs_movement > strong_movement_threshold:
|
||||
base_penalty *= 3.0 # 3x penalty for confident wrong on strong moves
|
||||
logger.warning(f"HIGH PENALTY: {prediction_confidence:.2f} confidence wrong on {abs_movement:.2f}% movement = 3x penalty")
|
||||
elif prediction_confidence > 0.8:
|
||||
base_penalty *= 2.0 # 2x penalty for overconfident wrong predictions
|
||||
|
||||
# ADDITIONAL penalty for predictions that would lose money to fees
|
||||
if abs_movement < fee_cost and prediction_confidence > 0.5:
|
||||
fee_loss_penalty = (fee_cost - abs_movement) * 2.0 # Penalty for fee-losing trades
|
||||
base_penalty += fee_loss_penalty
|
||||
logger.warning(f"FEE LOSS PENALTY: {abs_movement:.2f}% movement < {fee_cost:.2f}% fees = +{fee_loss_penalty:.3f} penalty")
|
||||
|
||||
base_reward = -base_penalty
|
||||
|
||||
# Time decay factor (predictions should be evaluated quickly)
|
||||
time_decay = max(
|
||||
0.1, 1.0 - (time_diff_minutes / 60.0)
|
||||
) # Decay over 1 hour, min 10%
|
||||
|
||||
# Final reward calculation
|
||||
# Time decay factor (pivot detection should be fast)
|
||||
time_decay = max(0.3, 1.0 - (time_diff_minutes / 30.0)) # Decay over 30 minutes, min 30%
|
||||
|
||||
# Apply time decay
|
||||
final_reward = base_reward * time_decay
|
||||
|
||||
# Bonus for accurate price predictions
|
||||
if (
|
||||
has_price_prediction and abs(price_change_pct) < 1.0
|
||||
): # Accurate price prediction
|
||||
final_reward *= 1.2 # 20% bonus for accurate price predictions
|
||||
logger.debug(
|
||||
f"Applied price prediction accuracy bonus: {final_reward:.3f}"
|
||||
)
|
||||
|
||||
# Clamp reward to reasonable range
|
||||
final_reward = max(-5.0, min(5.0, final_reward))
|
||||
|
||||
|
||||
# Clamp reward to reasonable range (higher range for pivot bonuses)
|
||||
final_reward = max(-10.0, min(100.0, final_reward))
|
||||
|
||||
# Log detailed accuracy information
|
||||
logger.debug(
|
||||
f"REWARD CALCULATION: action={predicted_action}, confidence={prediction_confidence:.3f}, "
|
||||
f"price_change={price_change_pct:.3f}%, pivot={is_pivot}/{pivot_type}, "
|
||||
f"directional_correct={directional_correct}, profitability_correct={profitability_correct}, "
|
||||
f"reward={final_reward:.3f}"
|
||||
)
|
||||
|
||||
return final_reward, was_correct
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating sophisticated reward: {e}")
|
||||
# Fallback to simple reward with position awareness
|
||||
has_position = self._has_open_position(symbol) if symbol else False
|
||||
|
||||
if predicted_action == "HOLD" and has_position:
|
||||
# If holding a position, HOLD is correct if price didn't drop significantly
|
||||
simple_correct = price_change_pct > -0.2 # Allow small losses while holding
|
||||
else:
|
||||
# Standard evaluation for other cases
|
||||
simple_correct = (
|
||||
(predicted_action == "BUY" and price_change_pct > 0.1)
|
||||
or (predicted_action == "SELL" and price_change_pct < -0.1)
|
||||
or (predicted_action == "HOLD" and abs(price_change_pct) < 0.1)
|
||||
)
|
||||
return (1.0 if simple_correct else -0.5, simple_correct)
|
||||
# Fallback to simple directional accuracy
|
||||
simple_correct = (
|
||||
(predicted_action == "BUY" and price_change_pct > 0) or
|
||||
(predicted_action == "SELL" and price_change_pct < 0) or
|
||||
(predicted_action == "HOLD" and abs(price_change_pct) < 0.05)
|
||||
)
|
||||
return (1.0 if simple_correct else -0.1, simple_correct)
|
||||
|
||||
def _calculate_price_vector_bonus(
|
||||
self,
|
||||
@ -4334,6 +4331,25 @@ class TradingOrchestrator:
|
||||
|
||||
# Create training sample from record
|
||||
model_input = record.get("model_input")
|
||||
|
||||
# If model_input is None, try to generate fresh state for training
|
||||
if model_input is None:
|
||||
logger.debug(f"No stored model input for {model_name}, generating fresh state")
|
||||
try:
|
||||
# Generate fresh input state for training
|
||||
if hasattr(self, 'data_provider') and self.data_provider:
|
||||
# Use data provider to generate current market state
|
||||
fresh_state = self._generate_fresh_state_fallback(model_name)
|
||||
if fresh_state is not None and len(fresh_state) > 0:
|
||||
model_input = fresh_state
|
||||
logger.debug(f"Generated fresh training state for {model_name}: shape={fresh_state.shape if hasattr(fresh_state, 'shape') else len(fresh_state)}")
|
||||
else:
|
||||
logger.warning(f"Failed to generate fresh state for {model_name}")
|
||||
else:
|
||||
logger.warning(f"No data provider available for generating fresh state for {model_name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error generating fresh state for {model_name}: {e}")
|
||||
|
||||
if model_input is not None:
|
||||
# Convert to tensor and ensure device placement
|
||||
device = next(self.cnn_model.parameters()).device
|
||||
@ -4432,7 +4448,71 @@ class TradingOrchestrator:
|
||||
)
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"No model input available for CNN training")
|
||||
logger.warning(f"No model input available for CNN training for {model_name}. This prevents the model from learning.")
|
||||
|
||||
# Try one more time to generate training data from current market conditions
|
||||
try:
|
||||
if hasattr(self, 'data_provider') and self.data_provider:
|
||||
# Create minimal training sample from current market data
|
||||
symbol = record.get("symbol", "ETH/USDT")
|
||||
current_price = self._get_current_price(symbol)
|
||||
|
||||
# Get variables from function scope
|
||||
actual_action = prediction["action"]
|
||||
pred_confidence = prediction.get("confidence", 0.5)
|
||||
|
||||
# Create a basic feature vector (this is a fallback)
|
||||
basic_features = np.array([
|
||||
current_price / 10000.0, # Normalized price
|
||||
pred_confidence, # Model confidence
|
||||
reward, # Current reward
|
||||
1.0 if actual_action == "BUY" else 0.0,
|
||||
1.0 if actual_action == "SELL" else 0.0,
|
||||
1.0 if actual_action == "HOLD" else 0.0
|
||||
], dtype=np.float32)
|
||||
|
||||
# Pad to expected size if needed
|
||||
expected_size = 512 # Adjust based on your model's expected input size
|
||||
if len(basic_features) < expected_size:
|
||||
padding = np.zeros(expected_size - len(basic_features), dtype=np.float32)
|
||||
basic_features = np.concatenate([basic_features, padding])
|
||||
|
||||
logger.info(f"Created fallback training features for {model_name}: shape={basic_features.shape}")
|
||||
|
||||
# Now perform training with the fallback features
|
||||
device = next(self.cnn_model.parameters()).device
|
||||
features_tensor = torch.tensor(basic_features, dtype=torch.float32, device=device).unsqueeze(0)
|
||||
|
||||
# Convert action to index
|
||||
actions = ["BUY", "SELL", "HOLD"]
|
||||
action_idx = actions.index(actual_action) if actual_action in actions else 2
|
||||
action_tensor = torch.tensor([action_idx], dtype=torch.long, device=device)
|
||||
reward_tensor = torch.tensor([reward], dtype=torch.float32, device=device)
|
||||
|
||||
# Perform minimal training step
|
||||
self.cnn_model.train()
|
||||
self.cnn_optimizer.zero_grad()
|
||||
|
||||
# Forward pass
|
||||
q_values, _, _, _, _ = self.cnn_model(features_tensor)
|
||||
|
||||
# Calculate basic loss
|
||||
q_values_selected = q_values.gather(1, action_tensor.unsqueeze(1)).squeeze(1)
|
||||
loss = nn.MSELoss()(q_values_selected, reward_tensor)
|
||||
|
||||
# Backward pass
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(self.cnn_model.parameters(), max_norm=1.0)
|
||||
self.cnn_optimizer.step()
|
||||
|
||||
logger.info(f"Fallback CNN training completed for {model_name}: loss={loss.item():.4f}")
|
||||
return True
|
||||
|
||||
except Exception as fallback_error:
|
||||
logger.error(f"Fallback CNN training failed for {model_name}: {fallback_error}")
|
||||
|
||||
# If we reach here, even fallback training failed
|
||||
logger.error(f"All CNN training methods failed for {model_name}. Model will not learn from this prediction.")
|
||||
return False
|
||||
|
||||
# Try model interface training methods
|
||||
|
Reference in New Issue
Block a user