data normalizations

This commit is contained in:
Dobromir Popov
2025-09-02 18:51:49 +03:00
parent 1c013f2806
commit 6dcb82c184
4 changed files with 251 additions and 322 deletions

View File

@@ -16,6 +16,7 @@ import logging
import time
import threading
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any, Tuple, Union
from dataclasses import dataclass, field
@@ -2364,12 +2365,262 @@ class TradingOrchestrator:
logger.info("Initializing ExtremaTrainer with historical context...")
self.extrema_trainer.initialize_context_data()
# CRITICAL: Initialize ALL models with historical data
self._initialize_models_with_historical_data(loaded_data)
logger.info(f"🎯 Historical data loading complete: {total_candles} total candles loaded")
logger.info(f"📊 Available datasets: {list(loaded_data.keys())}")
except Exception as e:
logger.error(f"Error in historical data loading: {e}")
def _initialize_models_with_historical_data(self, loaded_data: Dict[str, Any]):
"""Initialize all NN models with historical data and multi-symbol support"""
try:
logger.info("Initializing models with historical data and multi-symbol support...")
# Prepare multi-symbol feature matrices
symbol_features = self._prepare_multi_symbol_features(loaded_data)
# Initialize CNN with multi-symbol data
if hasattr(self, 'cnn_model') and self.cnn_model:
logger.info("Initializing CNN with multi-symbol historical features...")
self._initialize_cnn_with_data(symbol_features)
# Initialize DQN with multi-symbol states
if hasattr(self, 'rl_agent') and self.rl_agent:
logger.info("Initializing DQN with multi-symbol state vectors...")
self._initialize_dqn_with_data(symbol_features)
# Initialize Transformer with sequence data
if hasattr(self, 'transformer_model') and self.transformer_model:
logger.info("Initializing Transformer with multi-symbol sequences...")
self._initialize_transformer_with_data(symbol_features)
# Initialize Decision Fusion with comprehensive features
if hasattr(self, 'decision_fusion') and self.decision_fusion:
logger.info("Initializing Decision Fusion with multi-symbol features...")
self._initialize_decision_with_data(symbol_features)
logger.info("✅ All models initialized with historical multi-symbol data")
except Exception as e:
logger.error(f"Error initializing models with historical data: {e}")
def _prepare_multi_symbol_features(self, loaded_data: Dict[str, Any]) -> Dict[str, Any]:
"""Prepare normalized multi-symbol feature matrices"""
try:
symbol_features = {
'ETH/USDT': {'1m': None, '1h': None, '1d': None},
'BTC/USDT': {'1m': None}
}
# Process each symbol's data with symbol-specific normalization
for data_key, df in loaded_data.items():
if df is None or df.empty:
continue
# Extract symbol and timeframe
if '_1m' in data_key:
symbol = data_key.replace('_1m', '')
timeframe = '1m'
elif '_1h' in data_key:
symbol = data_key.replace('_1h', '')
timeframe = '1h'
elif '_1d' in data_key:
symbol = data_key.replace('_1d', '')
timeframe = '1d'
else:
continue
# Apply symbol-grouped normalization
normalized_df = self._apply_symbol_grouped_normalization(df, symbol)
if normalized_df is not None:
symbol_features[symbol][timeframe] = normalized_df
logger.debug(f"Prepared normalized features for {symbol} {timeframe}")
return symbol_features
except Exception as e:
logger.error(f"Error preparing multi-symbol features: {e}")
return {}
def _apply_symbol_grouped_normalization(self, df: pd.DataFrame, symbol: str) -> pd.DataFrame:
"""Apply symbol-grouped normalization with consistent ranges across timeframes"""
try:
df_norm = df.copy()
# Get symbol-specific price ranges for consistent normalization
symbol_price_ranges = {
'ETH/USDT': {'min': 1000, 'max': 5000}, # ETH price range
'BTC/USDT': {'min': 90000, 'max': 120000} # BTC price range
}
if symbol in symbol_price_ranges:
price_range = symbol_price_ranges[symbol]
range_size = price_range['max'] - price_range['min']
# Normalize price columns to [0, 1] range specific to symbol
price_cols = ['open', 'high', 'low', 'close']
for col in price_cols:
if col in df_norm.columns:
df_norm[col] = (df_norm[col] - price_range['min']) / range_size
df_norm[col] = np.clip(df_norm[col], 0, 1) # Ensure [0,1] range
# Normalize volume to [0, 1] using log scale
if 'volume' in df_norm.columns:
df_norm['volume'] = np.log1p(df_norm['volume'])
vol_max = df_norm['volume'].max()
if vol_max > 0:
df_norm['volume'] = df_norm['volume'] / vol_max
logger.debug(f"Applied symbol-grouped normalization for {symbol}")
# Fill any NaN values
df_norm = df_norm.fillna(0)
return df_norm
except Exception as e:
logger.error(f"Error in symbol-grouped normalization for {symbol}: {e}")
return df
def _initialize_cnn_with_data(self, symbol_features: Dict[str, Any]):
"""Initialize CNN with multi-symbol feature matrix"""
try:
# Create combined feature matrix: [ETH_1m, ETH_1h, ETH_1d, BTC_1m]
combined_features = []
# ETH features (1m, 1h, 1d)
for timeframe in ['1m', '1h', '1d']:
eth_data = symbol_features.get('ETH/USDT', {}).get(timeframe)
if eth_data is not None and not eth_data.empty:
# Use last 60 candles for CNN input
recent_data = eth_data.tail(60)
features = recent_data[['open', 'high', 'low', 'close', 'volume']].values
combined_features.append(features.flatten())
# BTC features (1m)
btc_data = symbol_features.get('BTC/USDT', {}).get('1m')
if btc_data is not None and not btc_data.empty:
recent_data = btc_data.tail(60)
features = recent_data[['open', 'high', 'low', 'close', 'volume']].values
combined_features.append(features.flatten())
if combined_features:
# Concatenate all features
full_features = np.concatenate(combined_features)
logger.info(f"CNN initialized with {len(full_features)} multi-symbol features")
# Store for model access
if not hasattr(self, 'model_historical_features'):
self.model_historical_features = {}
self.model_historical_features['cnn'] = full_features
except Exception as e:
logger.error(f"Error initializing CNN with historical data: {e}")
def _initialize_dqn_with_data(self, symbol_features: Dict[str, Any]):
"""Initialize DQN with multi-symbol state vectors"""
try:
# Create comprehensive state vector combining all symbols and timeframes
state_components = []
for symbol in ['ETH/USDT', 'BTC/USDT']:
timeframes = ['1m', '1h', '1d'] if symbol == 'ETH/USDT' else ['1m']
for timeframe in timeframes:
data = symbol_features.get(symbol, {}).get(timeframe)
if data is not None and not data.empty:
# Extract key features for state
latest = data.iloc[-1]
state_features = [
latest['close'], # Current price
latest['volume'], # Current volume
data['close'].pct_change().iloc[-1] if len(data) > 1 else 0, # Price change
]
state_components.extend(state_features)
if state_components:
# Pad or truncate to expected DQN state size
target_size = 100 # DQN expects 100-dimensional state
if len(state_components) < target_size:
state_components.extend([0] * (target_size - len(state_components)))
else:
state_components = state_components[:target_size]
state_vector = np.array(state_components, dtype=np.float32)
logger.info(f"DQN initialized with {len(state_vector)} dimensional multi-symbol state")
# Store for model access
if not hasattr(self, 'model_historical_features'):
self.model_historical_features = {}
self.model_historical_features['dqn'] = state_vector
except Exception as e:
logger.error(f"Error initializing DQN with historical data: {e}")
def _initialize_transformer_with_data(self, symbol_features: Dict[str, Any]):
"""Initialize Transformer with multi-symbol sequence data"""
try:
# Prepare sequence data for transformer
sequences = []
# ETH sequences
for timeframe in ['1m', '1h', '1d']:
eth_data = symbol_features.get('ETH/USDT', {}).get(timeframe)
if eth_data is not None and not eth_data.empty:
# Use last 150 points as sequence
sequence = eth_data.tail(150)[['open', 'high', 'low', 'close', 'volume']].values
sequences.append(sequence)
# BTC sequence
btc_data = symbol_features.get('BTC/USDT', {}).get('1m')
if btc_data is not None and not btc_data.empty:
sequence = btc_data.tail(150)[['open', 'high', 'low', 'close', 'volume']].values
sequences.append(sequence)
if sequences:
logger.info(f"Transformer initialized with {len(sequences)} multi-symbol sequences")
# Store for model access
if not hasattr(self, 'model_historical_features'):
self.model_historical_features = {}
self.model_historical_features['transformer'] = sequences
except Exception as e:
logger.error(f"Error initializing Transformer with historical data: {e}")
def _initialize_decision_with_data(self, symbol_features: Dict[str, Any]):
"""Initialize Decision Fusion with comprehensive multi-symbol features"""
try:
# Aggregate all available features for decision fusion
all_features = {}
for symbol in symbol_features:
for timeframe in symbol_features[symbol]:
data = symbol_features[symbol][timeframe]
if data is not None and not data.empty:
key = f"{symbol}_{timeframe}"
all_features[key] = {
'latest_price': data['close'].iloc[-1],
'volume': data['volume'].iloc[-1],
'price_change': data['close'].pct_change().iloc[-1] if len(data) > 1 else 0,
'volatility': data['close'].std() if len(data) > 1 else 0
}
if all_features:
logger.info(f"Decision Fusion initialized with {len(all_features)} symbol-timeframe combinations")
# Store for model access
if not hasattr(self, 'model_historical_features'):
self.model_historical_features = {}
self.model_historical_features['decision'] = all_features
except Exception as e:
logger.error(f"Error initializing Decision Fusion with historical data: {e}")
def get_ohlcv_data(self, symbol: str, timeframe: str, limit: int = 300) -> List:
"""Get OHLCV data for a symbol with specified timeframe and limit."""
try:

177
core/reward_calculator.py Normal file
View File

@@ -0,0 +1,177 @@
"""
Improved Reward Function for RL Trading Agent
This module provides a more sophisticated reward function for the RL trading agent
that incorporates realistic trading fees, penalties for excessive trading, and
rewards for successful holding of positions.
"""
import numpy as np
from datetime import datetime, timedelta
from collections import deque
import logging
logger = logging.getLogger(__name__)
class RewardCalculator:
def __init__(self, base_fee_rate=0.001, reward_scaling=10.0, risk_aversion=0.1):
self.base_fee_rate = base_fee_rate
self.reward_scaling = reward_scaling
self.risk_aversion = risk_aversion
self.trade_pnls = []
self.returns = []
self.trade_timestamps = []
self.frequency_threshold = 10 # Trades per minute threshold for penalty
self.max_frequency_penalty = 0.05
def record_pnl(self, pnl):
"""Record P&L for risk adjustment calculations"""
self.trade_pnls.append(pnl)
if len(self.trade_pnls) > 100:
self.trade_pnls.pop(0)
def record_trade(self, action):
"""Record trade action for frequency penalty calculations"""
from time import time
self.trade_timestamps.append(time())
if len(self.trade_timestamps) > 100:
self.trade_timestamps.pop(0)
def _calculate_frequency_penalty(self):
"""Calculate penalty for high-frequency trading"""
if len(self.trade_timestamps) < 2:
return 0.0
time_span = self.trade_timestamps[-1] - self.trade_timestamps[0]
if time_span <= 0:
return 0.0
trades_per_minute = (len(self.trade_timestamps) / time_span) * 60
if trades_per_minute > self.frequency_threshold:
penalty = min(self.max_frequency_penalty, (trades_per_minute - self.frequency_threshold) * 0.001)
return penalty
return 0.0
def _calculate_risk_adjustment(self, reward):
"""Adjust rewards based on risk (simple Sharpe ratio implementation)"""
if len(self.trade_pnls) < 5:
return reward
pnl_array = np.array(self.trade_pnls)
mean_return = np.mean(pnl_array)
std_return = np.std(pnl_array)
if std_return == 0:
return reward
sharpe = mean_return / std_return
adjustment_factor = np.clip(1.0 + 0.5 * sharpe, 0.5, 2.0)
return reward * adjustment_factor
def _calculate_holding_reward(self, position_held_time, price_change):
"""Calculate reward for holding a position"""
base_holding_reward = 0.0005 * (position_held_time / 60.0)
if price_change > 0:
return base_holding_reward * 2
elif price_change < 0:
return base_holding_reward * 0.5
return base_holding_reward
def calculate_basic_reward(self, pnl, confidence):
"""Calculate basic training reward based on P&L and confidence"""
try:
# Reward based on net PnL after fees and confidence alignment
base_reward = pnl
# Stronger penalty for confident wrong decisions
if pnl < 0 and confidence >= 0.6:
confidence_adjustment = -confidence * 3.0
elif pnl > 0 and confidence >= 0.6:
confidence_adjustment = confidence * 1.0
else:
confidence_adjustment = 0.0
final_reward = base_reward + confidence_adjustment
# Reduce tanh compression so small PnL changes are not flattened
normalized_reward = np.tanh(final_reward / 2.5)
logger.debug(f"Basic reward calculation: P&L={pnl:.4f}, confidence={confidence:.2f}, reward={normalized_reward:.4f}")
return float(normalized_reward)
except Exception as e:
logger.error(f"Error calculating basic reward: {e}")
return 0.0
def calculate_enhanced_reward(self, action, price_change, position_held_time=0, volatility=None, is_profitable=False, confidence=0.0, predicted_change=0.0, actual_change=0.0, current_pnl=0.0, symbol='UNKNOWN'):
"""Calculate enhanced reward for trading actions"""
fee = self.base_fee_rate
frequency_penalty = self._calculate_frequency_penalty()
if action == 0: # Buy
reward = -fee - frequency_penalty
elif action == 1: # Sell
profit_pct = price_change
net_profit = profit_pct - (fee * 2)
reward = net_profit * self.reward_scaling
reward -= frequency_penalty
self.record_pnl(net_profit)
else: # Hold
if is_profitable:
reward = self._calculate_holding_reward(position_held_time, price_change)
else:
reward = -0.0001
if action in [0, 1] and predicted_change != 0:
if (action == 0 and actual_change > 0) or (action == 1 and actual_change < 0):
reward += abs(actual_change) * 5.0
else:
reward -= abs(predicted_change) * 2.0
reward += current_pnl * 0.1
if volatility is not None:
reward -= abs(volatility) * 100
if self.risk_aversion > 0 and len(self.returns) > 1:
returns_std = np.std(self.returns)
reward -= returns_std * self.risk_aversion
self.record_trade(action)
return reward
def calculate_prediction_reward(self, symbol, predicted_direction, actual_direction, confidence, predicted_change, actual_change, current_pnl=0.0, position_duration=0.0):
"""Calculate reward for prediction accuracy"""
reward = 0.0
if predicted_direction == actual_direction:
reward += 1.0 * confidence
else:
reward -= 0.5
if predicted_direction == actual_direction and abs(predicted_change) > 0.001:
reward += abs(actual_change) * 5.0
if predicted_direction != actual_direction and abs(predicted_change) > 0.001:
reward -= abs(predicted_change) * 2.0
reward += current_pnl * 0.1
# Dynamic adjustment based on recent PnL (loss cutting incentive)
if hasattr(self, 'pnl_history') and symbol in self.pnl_history and self.pnl_history[symbol]:
latest_pnl_entry = self.pnl_history[symbol][-1]
latest_pnl_value = latest_pnl_entry.get('pnl', 0.0) if isinstance(latest_pnl_entry, dict) else 0.0
if latest_pnl_value < 0 and position_duration > 60:
reward -= (abs(latest_pnl_value) * 0.2)
pnl_values = [entry.get('pnl', 0.0) for entry in self.pnl_history[symbol] if isinstance(entry, dict)]
best_pnl = max(pnl_values) if pnl_values else 0.0
if best_pnl < 0.0:
reward -= 0.1
return reward
# Example usage:
if __name__ == "__main__":
# Create calculator instance
reward_calc = RewardCalculator()
# Example reward for a buy action
buy_reward = reward_calc.calculate_enhanced_reward(action=0, price_change=0)
print(f"Buy action reward: {buy_reward:.5f}")
# Record a trade for frequency tracking
reward_calc.record_trade(0)
# Wait a bit and make another trade to test frequency penalty
import time
time.sleep(0.1)
# Example reward for a sell action with profit
sell_reward = reward_calc.calculate_enhanced_reward(action=1, price_change=0.015, position_held_time=60)
print(f"Sell action reward (with profit): {sell_reward:.5f}")
# Example reward for a hold action on profitable position
hold_reward = reward_calc.calculate_enhanced_reward(action=2, price_change=0.01, position_held_time=30, is_profitable=True)
print(f"Hold action reward (profitable): {hold_reward:.5f}")
# Example reward for a hold action on unprofitable position
hold_reward_neg = reward_calc.calculate_enhanced_reward(action=2, price_change=-0.01, position_held_time=30, is_profitable=False)
print(f"Hold action reward (unprofitable): {hold_reward_neg:.5f}")