data normalizations
This commit is contained in:
@@ -16,6 +16,7 @@ import logging
|
||||
import time
|
||||
import threading
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Any, Tuple, Union
|
||||
from dataclasses import dataclass, field
|
||||
@@ -2364,12 +2365,262 @@ class TradingOrchestrator:
|
||||
logger.info("Initializing ExtremaTrainer with historical context...")
|
||||
self.extrema_trainer.initialize_context_data()
|
||||
|
||||
# CRITICAL: Initialize ALL models with historical data
|
||||
self._initialize_models_with_historical_data(loaded_data)
|
||||
|
||||
logger.info(f"🎯 Historical data loading complete: {total_candles} total candles loaded")
|
||||
logger.info(f"📊 Available datasets: {list(loaded_data.keys())}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in historical data loading: {e}")
|
||||
|
||||
def _initialize_models_with_historical_data(self, loaded_data: Dict[str, Any]):
|
||||
"""Initialize all NN models with historical data and multi-symbol support"""
|
||||
try:
|
||||
logger.info("Initializing models with historical data and multi-symbol support...")
|
||||
|
||||
# Prepare multi-symbol feature matrices
|
||||
symbol_features = self._prepare_multi_symbol_features(loaded_data)
|
||||
|
||||
# Initialize CNN with multi-symbol data
|
||||
if hasattr(self, 'cnn_model') and self.cnn_model:
|
||||
logger.info("Initializing CNN with multi-symbol historical features...")
|
||||
self._initialize_cnn_with_data(symbol_features)
|
||||
|
||||
# Initialize DQN with multi-symbol states
|
||||
if hasattr(self, 'rl_agent') and self.rl_agent:
|
||||
logger.info("Initializing DQN with multi-symbol state vectors...")
|
||||
self._initialize_dqn_with_data(symbol_features)
|
||||
|
||||
# Initialize Transformer with sequence data
|
||||
if hasattr(self, 'transformer_model') and self.transformer_model:
|
||||
logger.info("Initializing Transformer with multi-symbol sequences...")
|
||||
self._initialize_transformer_with_data(symbol_features)
|
||||
|
||||
# Initialize Decision Fusion with comprehensive features
|
||||
if hasattr(self, 'decision_fusion') and self.decision_fusion:
|
||||
logger.info("Initializing Decision Fusion with multi-symbol features...")
|
||||
self._initialize_decision_with_data(symbol_features)
|
||||
|
||||
logger.info("✅ All models initialized with historical multi-symbol data")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error initializing models with historical data: {e}")
|
||||
|
||||
def _prepare_multi_symbol_features(self, loaded_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Prepare normalized multi-symbol feature matrices"""
|
||||
try:
|
||||
symbol_features = {
|
||||
'ETH/USDT': {'1m': None, '1h': None, '1d': None},
|
||||
'BTC/USDT': {'1m': None}
|
||||
}
|
||||
|
||||
# Process each symbol's data with symbol-specific normalization
|
||||
for data_key, df in loaded_data.items():
|
||||
if df is None or df.empty:
|
||||
continue
|
||||
|
||||
# Extract symbol and timeframe
|
||||
if '_1m' in data_key:
|
||||
symbol = data_key.replace('_1m', '')
|
||||
timeframe = '1m'
|
||||
elif '_1h' in data_key:
|
||||
symbol = data_key.replace('_1h', '')
|
||||
timeframe = '1h'
|
||||
elif '_1d' in data_key:
|
||||
symbol = data_key.replace('_1d', '')
|
||||
timeframe = '1d'
|
||||
else:
|
||||
continue
|
||||
|
||||
# Apply symbol-grouped normalization
|
||||
normalized_df = self._apply_symbol_grouped_normalization(df, symbol)
|
||||
|
||||
if normalized_df is not None:
|
||||
symbol_features[symbol][timeframe] = normalized_df
|
||||
logger.debug(f"Prepared normalized features for {symbol} {timeframe}")
|
||||
|
||||
return symbol_features
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error preparing multi-symbol features: {e}")
|
||||
return {}
|
||||
|
||||
def _apply_symbol_grouped_normalization(self, df: pd.DataFrame, symbol: str) -> pd.DataFrame:
|
||||
"""Apply symbol-grouped normalization with consistent ranges across timeframes"""
|
||||
try:
|
||||
df_norm = df.copy()
|
||||
|
||||
# Get symbol-specific price ranges for consistent normalization
|
||||
symbol_price_ranges = {
|
||||
'ETH/USDT': {'min': 1000, 'max': 5000}, # ETH price range
|
||||
'BTC/USDT': {'min': 90000, 'max': 120000} # BTC price range
|
||||
}
|
||||
|
||||
if symbol in symbol_price_ranges:
|
||||
price_range = symbol_price_ranges[symbol]
|
||||
range_size = price_range['max'] - price_range['min']
|
||||
|
||||
# Normalize price columns to [0, 1] range specific to symbol
|
||||
price_cols = ['open', 'high', 'low', 'close']
|
||||
for col in price_cols:
|
||||
if col in df_norm.columns:
|
||||
df_norm[col] = (df_norm[col] - price_range['min']) / range_size
|
||||
df_norm[col] = np.clip(df_norm[col], 0, 1) # Ensure [0,1] range
|
||||
|
||||
# Normalize volume to [0, 1] using log scale
|
||||
if 'volume' in df_norm.columns:
|
||||
df_norm['volume'] = np.log1p(df_norm['volume'])
|
||||
vol_max = df_norm['volume'].max()
|
||||
if vol_max > 0:
|
||||
df_norm['volume'] = df_norm['volume'] / vol_max
|
||||
|
||||
logger.debug(f"Applied symbol-grouped normalization for {symbol}")
|
||||
|
||||
# Fill any NaN values
|
||||
df_norm = df_norm.fillna(0)
|
||||
|
||||
return df_norm
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in symbol-grouped normalization for {symbol}: {e}")
|
||||
return df
|
||||
|
||||
def _initialize_cnn_with_data(self, symbol_features: Dict[str, Any]):
|
||||
"""Initialize CNN with multi-symbol feature matrix"""
|
||||
try:
|
||||
# Create combined feature matrix: [ETH_1m, ETH_1h, ETH_1d, BTC_1m]
|
||||
combined_features = []
|
||||
|
||||
# ETH features (1m, 1h, 1d)
|
||||
for timeframe in ['1m', '1h', '1d']:
|
||||
eth_data = symbol_features.get('ETH/USDT', {}).get(timeframe)
|
||||
if eth_data is not None and not eth_data.empty:
|
||||
# Use last 60 candles for CNN input
|
||||
recent_data = eth_data.tail(60)
|
||||
features = recent_data[['open', 'high', 'low', 'close', 'volume']].values
|
||||
combined_features.append(features.flatten())
|
||||
|
||||
# BTC features (1m)
|
||||
btc_data = symbol_features.get('BTC/USDT', {}).get('1m')
|
||||
if btc_data is not None and not btc_data.empty:
|
||||
recent_data = btc_data.tail(60)
|
||||
features = recent_data[['open', 'high', 'low', 'close', 'volume']].values
|
||||
combined_features.append(features.flatten())
|
||||
|
||||
if combined_features:
|
||||
# Concatenate all features
|
||||
full_features = np.concatenate(combined_features)
|
||||
logger.info(f"CNN initialized with {len(full_features)} multi-symbol features")
|
||||
|
||||
# Store for model access
|
||||
if not hasattr(self, 'model_historical_features'):
|
||||
self.model_historical_features = {}
|
||||
self.model_historical_features['cnn'] = full_features
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error initializing CNN with historical data: {e}")
|
||||
|
||||
def _initialize_dqn_with_data(self, symbol_features: Dict[str, Any]):
|
||||
"""Initialize DQN with multi-symbol state vectors"""
|
||||
try:
|
||||
# Create comprehensive state vector combining all symbols and timeframes
|
||||
state_components = []
|
||||
|
||||
for symbol in ['ETH/USDT', 'BTC/USDT']:
|
||||
timeframes = ['1m', '1h', '1d'] if symbol == 'ETH/USDT' else ['1m']
|
||||
|
||||
for timeframe in timeframes:
|
||||
data = symbol_features.get(symbol, {}).get(timeframe)
|
||||
if data is not None and not data.empty:
|
||||
# Extract key features for state
|
||||
latest = data.iloc[-1]
|
||||
state_features = [
|
||||
latest['close'], # Current price
|
||||
latest['volume'], # Current volume
|
||||
data['close'].pct_change().iloc[-1] if len(data) > 1 else 0, # Price change
|
||||
]
|
||||
state_components.extend(state_features)
|
||||
|
||||
if state_components:
|
||||
# Pad or truncate to expected DQN state size
|
||||
target_size = 100 # DQN expects 100-dimensional state
|
||||
if len(state_components) < target_size:
|
||||
state_components.extend([0] * (target_size - len(state_components)))
|
||||
else:
|
||||
state_components = state_components[:target_size]
|
||||
|
||||
state_vector = np.array(state_components, dtype=np.float32)
|
||||
logger.info(f"DQN initialized with {len(state_vector)} dimensional multi-symbol state")
|
||||
|
||||
# Store for model access
|
||||
if not hasattr(self, 'model_historical_features'):
|
||||
self.model_historical_features = {}
|
||||
self.model_historical_features['dqn'] = state_vector
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error initializing DQN with historical data: {e}")
|
||||
|
||||
def _initialize_transformer_with_data(self, symbol_features: Dict[str, Any]):
|
||||
"""Initialize Transformer with multi-symbol sequence data"""
|
||||
try:
|
||||
# Prepare sequence data for transformer
|
||||
sequences = []
|
||||
|
||||
# ETH sequences
|
||||
for timeframe in ['1m', '1h', '1d']:
|
||||
eth_data = symbol_features.get('ETH/USDT', {}).get(timeframe)
|
||||
if eth_data is not None and not eth_data.empty:
|
||||
# Use last 150 points as sequence
|
||||
sequence = eth_data.tail(150)[['open', 'high', 'low', 'close', 'volume']].values
|
||||
sequences.append(sequence)
|
||||
|
||||
# BTC sequence
|
||||
btc_data = symbol_features.get('BTC/USDT', {}).get('1m')
|
||||
if btc_data is not None and not btc_data.empty:
|
||||
sequence = btc_data.tail(150)[['open', 'high', 'low', 'close', 'volume']].values
|
||||
sequences.append(sequence)
|
||||
|
||||
if sequences:
|
||||
logger.info(f"Transformer initialized with {len(sequences)} multi-symbol sequences")
|
||||
|
||||
# Store for model access
|
||||
if not hasattr(self, 'model_historical_features'):
|
||||
self.model_historical_features = {}
|
||||
self.model_historical_features['transformer'] = sequences
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error initializing Transformer with historical data: {e}")
|
||||
|
||||
def _initialize_decision_with_data(self, symbol_features: Dict[str, Any]):
|
||||
"""Initialize Decision Fusion with comprehensive multi-symbol features"""
|
||||
try:
|
||||
# Aggregate all available features for decision fusion
|
||||
all_features = {}
|
||||
|
||||
for symbol in symbol_features:
|
||||
for timeframe in symbol_features[symbol]:
|
||||
data = symbol_features[symbol][timeframe]
|
||||
if data is not None and not data.empty:
|
||||
key = f"{symbol}_{timeframe}"
|
||||
all_features[key] = {
|
||||
'latest_price': data['close'].iloc[-1],
|
||||
'volume': data['volume'].iloc[-1],
|
||||
'price_change': data['close'].pct_change().iloc[-1] if len(data) > 1 else 0,
|
||||
'volatility': data['close'].std() if len(data) > 1 else 0
|
||||
}
|
||||
|
||||
if all_features:
|
||||
logger.info(f"Decision Fusion initialized with {len(all_features)} symbol-timeframe combinations")
|
||||
|
||||
# Store for model access
|
||||
if not hasattr(self, 'model_historical_features'):
|
||||
self.model_historical_features = {}
|
||||
self.model_historical_features['decision'] = all_features
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error initializing Decision Fusion with historical data: {e}")
|
||||
|
||||
def get_ohlcv_data(self, symbol: str, timeframe: str, limit: int = 300) -> List:
|
||||
"""Get OHLCV data for a symbol with specified timeframe and limit."""
|
||||
try:
|
||||
|
177
core/reward_calculator.py
Normal file
177
core/reward_calculator.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""
|
||||
Improved Reward Function for RL Trading Agent
|
||||
|
||||
This module provides a more sophisticated reward function for the RL trading agent
|
||||
that incorporates realistic trading fees, penalties for excessive trading, and
|
||||
rewards for successful holding of positions.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from datetime import datetime, timedelta
|
||||
from collections import deque
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class RewardCalculator:
|
||||
def __init__(self, base_fee_rate=0.001, reward_scaling=10.0, risk_aversion=0.1):
|
||||
self.base_fee_rate = base_fee_rate
|
||||
self.reward_scaling = reward_scaling
|
||||
self.risk_aversion = risk_aversion
|
||||
self.trade_pnls = []
|
||||
self.returns = []
|
||||
self.trade_timestamps = []
|
||||
self.frequency_threshold = 10 # Trades per minute threshold for penalty
|
||||
self.max_frequency_penalty = 0.05
|
||||
|
||||
def record_pnl(self, pnl):
|
||||
"""Record P&L for risk adjustment calculations"""
|
||||
self.trade_pnls.append(pnl)
|
||||
if len(self.trade_pnls) > 100:
|
||||
self.trade_pnls.pop(0)
|
||||
|
||||
def record_trade(self, action):
|
||||
"""Record trade action for frequency penalty calculations"""
|
||||
from time import time
|
||||
self.trade_timestamps.append(time())
|
||||
if len(self.trade_timestamps) > 100:
|
||||
self.trade_timestamps.pop(0)
|
||||
|
||||
def _calculate_frequency_penalty(self):
|
||||
"""Calculate penalty for high-frequency trading"""
|
||||
if len(self.trade_timestamps) < 2:
|
||||
return 0.0
|
||||
time_span = self.trade_timestamps[-1] - self.trade_timestamps[0]
|
||||
if time_span <= 0:
|
||||
return 0.0
|
||||
trades_per_minute = (len(self.trade_timestamps) / time_span) * 60
|
||||
if trades_per_minute > self.frequency_threshold:
|
||||
penalty = min(self.max_frequency_penalty, (trades_per_minute - self.frequency_threshold) * 0.001)
|
||||
return penalty
|
||||
return 0.0
|
||||
|
||||
def _calculate_risk_adjustment(self, reward):
|
||||
"""Adjust rewards based on risk (simple Sharpe ratio implementation)"""
|
||||
if len(self.trade_pnls) < 5:
|
||||
return reward
|
||||
pnl_array = np.array(self.trade_pnls)
|
||||
mean_return = np.mean(pnl_array)
|
||||
std_return = np.std(pnl_array)
|
||||
if std_return == 0:
|
||||
return reward
|
||||
sharpe = mean_return / std_return
|
||||
adjustment_factor = np.clip(1.0 + 0.5 * sharpe, 0.5, 2.0)
|
||||
return reward * adjustment_factor
|
||||
|
||||
def _calculate_holding_reward(self, position_held_time, price_change):
|
||||
"""Calculate reward for holding a position"""
|
||||
base_holding_reward = 0.0005 * (position_held_time / 60.0)
|
||||
if price_change > 0:
|
||||
return base_holding_reward * 2
|
||||
elif price_change < 0:
|
||||
return base_holding_reward * 0.5
|
||||
return base_holding_reward
|
||||
|
||||
def calculate_basic_reward(self, pnl, confidence):
|
||||
"""Calculate basic training reward based on P&L and confidence"""
|
||||
try:
|
||||
# Reward based on net PnL after fees and confidence alignment
|
||||
base_reward = pnl
|
||||
# Stronger penalty for confident wrong decisions
|
||||
if pnl < 0 and confidence >= 0.6:
|
||||
confidence_adjustment = -confidence * 3.0
|
||||
elif pnl > 0 and confidence >= 0.6:
|
||||
confidence_adjustment = confidence * 1.0
|
||||
else:
|
||||
confidence_adjustment = 0.0
|
||||
final_reward = base_reward + confidence_adjustment
|
||||
# Reduce tanh compression so small PnL changes are not flattened
|
||||
normalized_reward = np.tanh(final_reward / 2.5)
|
||||
logger.debug(f"Basic reward calculation: P&L={pnl:.4f}, confidence={confidence:.2f}, reward={normalized_reward:.4f}")
|
||||
return float(normalized_reward)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating basic reward: {e}")
|
||||
return 0.0
|
||||
|
||||
def calculate_enhanced_reward(self, action, price_change, position_held_time=0, volatility=None, is_profitable=False, confidence=0.0, predicted_change=0.0, actual_change=0.0, current_pnl=0.0, symbol='UNKNOWN'):
|
||||
"""Calculate enhanced reward for trading actions"""
|
||||
fee = self.base_fee_rate
|
||||
frequency_penalty = self._calculate_frequency_penalty()
|
||||
if action == 0: # Buy
|
||||
reward = -fee - frequency_penalty
|
||||
elif action == 1: # Sell
|
||||
profit_pct = price_change
|
||||
net_profit = profit_pct - (fee * 2)
|
||||
reward = net_profit * self.reward_scaling
|
||||
reward -= frequency_penalty
|
||||
self.record_pnl(net_profit)
|
||||
else: # Hold
|
||||
if is_profitable:
|
||||
reward = self._calculate_holding_reward(position_held_time, price_change)
|
||||
else:
|
||||
reward = -0.0001
|
||||
if action in [0, 1] and predicted_change != 0:
|
||||
if (action == 0 and actual_change > 0) or (action == 1 and actual_change < 0):
|
||||
reward += abs(actual_change) * 5.0
|
||||
else:
|
||||
reward -= abs(predicted_change) * 2.0
|
||||
reward += current_pnl * 0.1
|
||||
if volatility is not None:
|
||||
reward -= abs(volatility) * 100
|
||||
if self.risk_aversion > 0 and len(self.returns) > 1:
|
||||
returns_std = np.std(self.returns)
|
||||
reward -= returns_std * self.risk_aversion
|
||||
self.record_trade(action)
|
||||
return reward
|
||||
|
||||
def calculate_prediction_reward(self, symbol, predicted_direction, actual_direction, confidence, predicted_change, actual_change, current_pnl=0.0, position_duration=0.0):
|
||||
"""Calculate reward for prediction accuracy"""
|
||||
reward = 0.0
|
||||
if predicted_direction == actual_direction:
|
||||
reward += 1.0 * confidence
|
||||
else:
|
||||
reward -= 0.5
|
||||
if predicted_direction == actual_direction and abs(predicted_change) > 0.001:
|
||||
reward += abs(actual_change) * 5.0
|
||||
if predicted_direction != actual_direction and abs(predicted_change) > 0.001:
|
||||
reward -= abs(predicted_change) * 2.0
|
||||
reward += current_pnl * 0.1
|
||||
# Dynamic adjustment based on recent PnL (loss cutting incentive)
|
||||
if hasattr(self, 'pnl_history') and symbol in self.pnl_history and self.pnl_history[symbol]:
|
||||
latest_pnl_entry = self.pnl_history[symbol][-1]
|
||||
latest_pnl_value = latest_pnl_entry.get('pnl', 0.0) if isinstance(latest_pnl_entry, dict) else 0.0
|
||||
if latest_pnl_value < 0 and position_duration > 60:
|
||||
reward -= (abs(latest_pnl_value) * 0.2)
|
||||
pnl_values = [entry.get('pnl', 0.0) for entry in self.pnl_history[symbol] if isinstance(entry, dict)]
|
||||
best_pnl = max(pnl_values) if pnl_values else 0.0
|
||||
if best_pnl < 0.0:
|
||||
reward -= 0.1
|
||||
return reward
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
# Create calculator instance
|
||||
reward_calc = RewardCalculator()
|
||||
|
||||
# Example reward for a buy action
|
||||
buy_reward = reward_calc.calculate_enhanced_reward(action=0, price_change=0)
|
||||
print(f"Buy action reward: {buy_reward:.5f}")
|
||||
|
||||
# Record a trade for frequency tracking
|
||||
reward_calc.record_trade(0)
|
||||
|
||||
# Wait a bit and make another trade to test frequency penalty
|
||||
import time
|
||||
time.sleep(0.1)
|
||||
|
||||
# Example reward for a sell action with profit
|
||||
sell_reward = reward_calc.calculate_enhanced_reward(action=1, price_change=0.015, position_held_time=60)
|
||||
print(f"Sell action reward (with profit): {sell_reward:.5f}")
|
||||
|
||||
# Example reward for a hold action on profitable position
|
||||
hold_reward = reward_calc.calculate_enhanced_reward(action=2, price_change=0.01, position_held_time=30, is_profitable=True)
|
||||
print(f"Hold action reward (profitable): {hold_reward:.5f}")
|
||||
|
||||
# Example reward for a hold action on unprofitable position
|
||||
hold_reward_neg = reward_calc.calculate_enhanced_reward(action=2, price_change=-0.01, position_held_time=30, is_profitable=False)
|
||||
print(f"Hold action reward (unprofitable): {hold_reward_neg:.5f}")
|
Reference in New Issue
Block a user