wip symbols tidy up

This commit is contained in:
Dobromir Popov
2025-07-24 16:08:58 +03:00
parent d17af5ca4b
commit 045780758a
4 changed files with 184 additions and 58 deletions

View File

@ -110,7 +110,9 @@ class TradingOrchestrator:
self.confidence_threshold_close = self.config.orchestrator.get('confidence_threshold_close', 0.08) # Lowered from 0.10
# Decision frequency limit to prevent excessive trading
self.decision_frequency = self.config.orchestrator.get('decision_frequency', 30)
self.symbols = self.config.get('symbols', ['ETH/USDT', 'BTC/USDT']) # Enhanced to support multiple symbols
self.symbol = self.config.get('symbol', "ETH/USDT") # main symbol we wre trading and making predictions on. only one!
self.ref_symbols = self.config.get('ref_symbols', [ 'BTC/USDT']) # Enhanced to support multiple reference symbols. ToDo: we can add 'SOL/USDT' later
# NEW: Aggressiveness parameters
self.entry_aggressiveness = self.config.orchestrator.get('entry_aggressiveness', 0.5) # 0.0 = conservative, 1.0 = very aggressive
@ -153,12 +155,11 @@ class TradingOrchestrator:
self.recent_cnn_predictions: Dict[str, deque] = {} # {symbol: List[Dict]} - Recent CNN predictions
self.prediction_accuracy_history: Dict[str, deque] = {} # {symbol: List[Dict]} - Prediction accuracy tracking
# Initialize prediction tracking for each symbol
for symbol in self.symbols:
self.recent_dqn_predictions[symbol] = deque(maxlen=100)
self.recent_cnn_predictions[symbol] = deque(maxlen=50)
self.prediction_accuracy_history[symbol] = deque(maxlen=200)
self.signal_accumulator[symbol] = []
# Initialize prediction tracking for the primary trading symbol only
self.recent_dqn_predictions[self.symbol] = deque(maxlen=100)
self.recent_cnn_predictions[self.symbol] = deque(maxlen=50)
self.prediction_accuracy_history[self.symbol] = deque(maxlen=200)
self.signal_accumulator[self.symbol] = []
# Decision callbacks
self.decision_callbacks: List[Any] = []
@ -177,7 +178,7 @@ class TradingOrchestrator:
self.latest_cob_data: Dict[str, Any] = {} # {symbol: COBSnapshot}
self.latest_cob_features: Dict[str, Any] = {} # {symbol: np.ndarray} - CNN features
self.latest_cob_state: Dict[str, Any] = {} # {symbol: np.ndarray} - DQN state features
self.cob_feature_history: Dict[str, List[Any]] = {symbol: [] for symbol in self.symbols} # Rolling history for models
self.cob_feature_history: Dict[str, List[Any]] = {self.symbol: []} # Rolling history for primary trading symbol
# Enhanced ML Models
self.rl_agent: Any = None # DQN Agent
@ -204,13 +205,13 @@ class TradingOrchestrator:
# Training tracking
self.last_trained_symbols: Dict[str, datetime] = {}
# INFERENCE DATA STORAGE - Store model inputs and outputs for training
self.inference_history: Dict[str, deque] = {} # {symbol: deque of inference records}
self.max_inference_history = 1000 # Keep last 1000 inference records per symbol
# INFERENCE DATA STORAGE - Per-model storage with memory optimization
self.inference_history: Dict[str, deque] = {} # {model_name: deque of last 5 inference records}
self.max_memory_inferences = 5 # Keep only last 5 inferences in memory per model
self.max_disk_files_per_model = 200 # Cap disk files per model
# Initialize inference history for each symbol
for symbol in self.symbols:
self.inference_history[symbol] = deque(maxlen=self.max_inference_history)
# Initialize inference history for each model (will be populated as models make predictions)
# We'll create entries dynamically as models are used
# ENHANCED: Real-time Training System Integration
self.enhanced_training_system = None # Will be set to EnhancedRealtimeTrainingSystem if available
@ -1071,9 +1072,9 @@ class TradingOrchestrator:
# Store input data for generic model
model_input = input_data.get('generic_input')
# Store inference data for training
# Store inference data for training (per-model, async)
if prediction and model_input is not None:
self._store_inference_data(symbol, model_name, model_input, prediction, current_time)
await self._store_inference_data_async(model_name, model_input, prediction, current_time)
except Exception as e:
logger.error(f"Error getting prediction from {model_name}: {e}")
@ -1085,61 +1086,134 @@ class TradingOrchestrator:
return predictions
async def _collect_model_input_data(self, symbol: str) -> Dict[str, Any]:
"""Collect comprehensive input data for all models"""
"""Collect standardized input data for all models - ETH primary + BTC reference"""
try:
input_data = {}
# Only collect data for ETH (primary symbol) - we inference only for ETH
if symbol != 'ETH/USDT':
return {}
# Get current market data from data provider
current_price = self.data_provider.get_current_price(symbol)
# Standardized input: 4 ETH timeframes + 1s BTC reference
eth_data = {}
eth_timeframes = ['1s', '1m', '1h', '1d']
# Collect OHLCV data for multiple timeframes
ohlcv_data = {}
timeframes = ['1s', '1m', '1h', '1d']
for tf in timeframes:
df = self.data_provider.get_historical_data(symbol, tf, limit=300)
# Collect ETH data for all timeframes
for tf in eth_timeframes:
df = self.data_provider.get_historical_data('ETH/USDT', tf, limit=300)
if df is not None and not df.empty:
ohlcv_data[tf] = df
eth_data[f'ETH_{tf}'] = df
# Collect COB data if available
cob_data = self.get_cob_snapshot(symbol)
# Collect BTC 1s reference data
btc_1s = self.data_provider.get_historical_data('BTC/USDT', '1s', limit=300)
btc_data = {}
if btc_1s is not None and not btc_1s.empty:
btc_data['BTC_1s'] = btc_1s
# Collect technical indicators
technical_indicators = {}
if '1h' in ohlcv_data:
df = ohlcv_data['1h']
if len(df) > 20:
technical_indicators['sma_20'] = df['close'].rolling(20).mean().iloc[-1]
technical_indicators['rsi'] = self._calculate_rsi(df['close'])
# Get current prices
eth_price = self.data_provider.get_current_price('ETH/USDT')
btc_price = self.data_provider.get_current_price('BTC/USDT')
# Prepare CNN input
cnn_input = self._prepare_cnn_input_data(ohlcv_data, cob_data, technical_indicators)
# Prepare RL input
rl_input = self._prepare_rl_input_data(ohlcv_data, cob_data, technical_indicators)
# Prepare generic input
generic_input = {
'symbol': symbol,
'current_price': current_price,
'ohlcv_data': ohlcv_data,
'cob_data': cob_data,
'technical_indicators': technical_indicators
}
input_data = {
'cnn_input': cnn_input,
'rl_input': rl_input,
'generic_input': generic_input,
# Create standardized input package
standardized_input = {
'timestamp': datetime.now(),
'symbol': symbol
'primary_symbol': 'ETH/USDT',
'reference_symbol': 'BTC/USDT',
'eth_data': eth_data,
'btc_data': btc_data,
'current_prices': {
'ETH': eth_price,
'BTC': btc_price
},
'data_completeness': {
'eth_timeframes': len(eth_data),
'btc_reference': len(btc_data),
'total_expected': 5 # 4 ETH + 1 BTC
}
}
return input_data
return standardized_input
except Exception as e:
logger.error(f"Error collecting model input data for {symbol}: {e}")
logger.error(f"Error collecting standardized model input data: {e}")
return {}
async def _store_inference_data_async(self, model_name: str, model_input: Any, prediction: Prediction, timestamp: datetime):
"""Store inference data per-model with async file operations and memory optimization"""
try:
# Create comprehensive inference record
inference_record = {
'timestamp': timestamp.isoformat(),
'model_name': model_name,
'model_input': model_input,
'prediction': {
'action': prediction.action,
'confidence': prediction.confidence,
'probabilities': prediction.probabilities,
'timeframe': prediction.timeframe
},
'metadata': prediction.metadata or {}
}
# Store in memory (only last 5 per model)
if model_name not in self.inference_history:
self.inference_history[model_name] = deque(maxlen=self.max_memory_inferences)
self.inference_history[model_name].append(inference_record)
# Async file storage (don't wait for completion)
asyncio.create_task(self._save_inference_to_disk_async(model_name, inference_record))
logger.debug(f"Stored inference data for {model_name} (memory: {len(self.inference_history[model_name])}/5)")
except Exception as e:
logger.error(f"Error storing inference data for {model_name}: {e}")
async def _save_inference_to_disk_async(self, model_name: str, inference_record: Dict):
"""Async save inference record to disk with file capping"""
try:
# Create model-specific directory
model_dir = Path(f"training_data/inference_history/{model_name}")
model_dir.mkdir(parents=True, exist_ok=True)
# Create filename with timestamp
timestamp_str = datetime.fromisoformat(inference_record['timestamp']).strftime('%Y%m%d_%H%M%S_%f')[:-3]
filename = f"inference_{timestamp_str}.json"
filepath = model_dir / filename
# Convert to JSON-serializable format
serializable_record = self._make_json_serializable(inference_record)
# Save to file
with open(filepath, 'w') as f:
json.dump(serializable_record, f, indent=2)
# Cap files per model (keep only latest 200)
await self._cap_model_files(model_dir)
logger.debug(f"Saved inference record to disk: {filepath}")
except Exception as e:
logger.error(f"Error saving inference to disk for {model_name}: {e}")
async def _cap_model_files(self, model_dir: Path):
"""Cap the number of files per model to max_disk_files_per_model"""
try:
# Get all inference files
files = list(model_dir.glob("inference_*.json"))
if len(files) > self.max_disk_files_per_model:
# Sort by modification time (oldest first)
files.sort(key=lambda x: x.stat().st_mtime)
# Remove oldest files
files_to_remove = files[:-self.max_disk_files_per_model]
for file_path in files_to_remove:
file_path.unlink()
logger.debug(f"Removed {len(files_to_remove)} old inference files from {model_dir.name}")
except Exception as e:
logger.error(f"Error capping model files in {model_dir}: {e}")
def _prepare_cnn_input_data(self, ohlcv_data: Dict, cob_data: Any, technical_indicators: Dict) -> np.ndarray:
"""Prepare standardized input data for CNN models"""
try:
@ -1350,6 +1424,35 @@ class TradingOrchestrator:
logger.error(f"Error loading inference history from disk: {e}")
return []
async def load_model_inference_history(self, model_name: str, limit: int = 50) -> List[Dict]:
"""Load inference history for a specific model from disk"""
try:
model_dir = Path(f"training_data/inference_history/{model_name}")
if not model_dir.exists():
return []
# Get all inference files
files = list(model_dir.glob("inference_*.json"))
files.sort(key=lambda x: x.stat().st_mtime, reverse=True) # Newest first
# Load up to 'limit' files
inference_records = []
for filepath in files[:limit]:
try:
with open(filepath, 'r') as f:
record = json.load(f)
inference_records.append(record)
except Exception as e:
logger.warning(f"Error loading inference file {filepath}: {e}")
continue
logger.info(f"Loaded {len(inference_records)} inference records for {model_name}")
return inference_records
except Exception as e:
logger.error(f"Error loading model inference history for {model_name}: {e}")
return []
def get_model_training_data(self, model_name: str, symbol: str = None) -> List[Dict]:
"""Get training data for a specific model"""
try: