wip
This commit is contained in:
395
core/model_output_manager.py
Normal file
395
core/model_output_manager.py
Normal file
@ -0,0 +1,395 @@
|
||||
"""
|
||||
Model Output Manager
|
||||
|
||||
This module provides extensible model output storage and management for the multi-modal trading system.
|
||||
Supports CNN, RL, LSTM, Transformer, and future model types with cross-model feeding capabilities.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import pickle
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Any, Union
|
||||
from collections import deque, defaultdict
|
||||
from threading import Lock
|
||||
from pathlib import Path
|
||||
|
||||
from .data_models import ModelOutput, create_model_output
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ModelOutputManager:
|
||||
"""
|
||||
Extensible model output storage and management system
|
||||
|
||||
Features:
|
||||
- Standardized ModelOutput storage for all model types
|
||||
- Cross-model feeding with hidden states
|
||||
- Historical output tracking
|
||||
- Metadata management
|
||||
- Persistence and recovery
|
||||
- Performance analytics
|
||||
"""
|
||||
|
||||
def __init__(self, cache_dir: str = "cache/model_outputs", max_history: int = 1000):
|
||||
"""
|
||||
Initialize the model output manager
|
||||
|
||||
Args:
|
||||
cache_dir: Directory for persistent storage
|
||||
max_history: Maximum number of outputs to keep in memory per model
|
||||
"""
|
||||
self.cache_dir = Path(cache_dir)
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.max_history = max_history
|
||||
|
||||
# In-memory storage
|
||||
self.current_outputs: Dict[str, Dict[str, ModelOutput]] = defaultdict(dict) # {symbol: {model_name: ModelOutput}}
|
||||
self.output_history: Dict[str, Dict[str, deque]] = defaultdict(lambda: defaultdict(lambda: deque(maxlen=max_history))) # {symbol: {model_name: deque}}
|
||||
self.cross_model_states: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(lambda: defaultdict(dict)) # {symbol: {model_name: hidden_states}}
|
||||
|
||||
# Metadata tracking
|
||||
self.model_metadata: Dict[str, Dict[str, Any]] = defaultdict(dict) # {model_name: metadata}
|
||||
self.performance_stats: Dict[str, Dict[str, Any]] = defaultdict(lambda: defaultdict(dict)) # {symbol: {model_name: stats}}
|
||||
|
||||
# Thread safety
|
||||
self.storage_lock = Lock()
|
||||
|
||||
# Supported model types
|
||||
self.supported_model_types = {
|
||||
'cnn', 'rl', 'lstm', 'transformer', 'orchestrator',
|
||||
'ensemble', 'hybrid', 'custom' # Extensible for future types
|
||||
}
|
||||
|
||||
logger.info(f"ModelOutputManager initialized with cache dir: {self.cache_dir}")
|
||||
logger.info(f"Supported model types: {self.supported_model_types}")
|
||||
|
||||
def store_output(self, model_output: ModelOutput) -> bool:
|
||||
"""
|
||||
Store model output with full extensibility support
|
||||
|
||||
Args:
|
||||
model_output: ModelOutput from any model type
|
||||
|
||||
Returns:
|
||||
bool: True if stored successfully, False otherwise
|
||||
"""
|
||||
try:
|
||||
with self.storage_lock:
|
||||
symbol = model_output.symbol
|
||||
model_name = model_output.model_name
|
||||
model_type = model_output.model_type
|
||||
|
||||
# Validate model type (extensible)
|
||||
if model_type not in self.supported_model_types:
|
||||
logger.warning(f"Unknown model type '{model_type}' - adding to supported types")
|
||||
self.supported_model_types.add(model_type)
|
||||
|
||||
# Store current output
|
||||
self.current_outputs[symbol][model_name] = model_output
|
||||
|
||||
# Add to history
|
||||
self.output_history[symbol][model_name].append(model_output)
|
||||
|
||||
# Store cross-model states if available
|
||||
if model_output.hidden_states:
|
||||
self.cross_model_states[symbol][model_name] = model_output.hidden_states
|
||||
|
||||
# Update model metadata
|
||||
self._update_model_metadata(model_name, model_type, model_output.metadata)
|
||||
|
||||
# Update performance statistics
|
||||
self._update_performance_stats(symbol, model_name, model_output)
|
||||
|
||||
# Persist to disk (async to avoid blocking)
|
||||
self._persist_output_async(model_output)
|
||||
|
||||
logger.debug(f"Stored output from {model_name} ({model_type}) for {symbol}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error storing model output: {e}")
|
||||
return False
|
||||
|
||||
def get_current_output(self, symbol: str, model_name: str) -> Optional[ModelOutput]:
|
||||
"""
|
||||
Get the current (latest) output from a specific model
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol
|
||||
model_name: Name of the model
|
||||
|
||||
Returns:
|
||||
ModelOutput: Latest output from the model, or None if not available
|
||||
"""
|
||||
try:
|
||||
return self.current_outputs.get(symbol, {}).get(model_name)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting current output for {model_name}: {e}")
|
||||
return None
|
||||
|
||||
def get_all_current_outputs(self, symbol: str) -> Dict[str, ModelOutput]:
|
||||
"""
|
||||
Get all current outputs for a symbol (for cross-model feeding)
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol
|
||||
|
||||
Returns:
|
||||
Dict[str, ModelOutput]: Dictionary of current outputs by model name
|
||||
"""
|
||||
try:
|
||||
return dict(self.current_outputs.get(symbol, {}))
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting all current outputs for {symbol}: {e}")
|
||||
return {}
|
||||
|
||||
def get_output_history(self, symbol: str, model_name: str, count: int = 10) -> List[ModelOutput]:
|
||||
"""
|
||||
Get historical outputs from a model
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol
|
||||
model_name: Name of the model
|
||||
count: Number of historical outputs to retrieve
|
||||
|
||||
Returns:
|
||||
List[ModelOutput]: List of historical outputs (most recent first)
|
||||
"""
|
||||
try:
|
||||
history = self.output_history.get(symbol, {}).get(model_name, deque())
|
||||
return list(history)[-count:][::-1] # Most recent first
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting output history for {model_name}: {e}")
|
||||
return []
|
||||
|
||||
def get_cross_model_states(self, symbol: str, requesting_model: str) -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
Get hidden states from other models for cross-model feeding
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol
|
||||
requesting_model: Name of the model requesting the states
|
||||
|
||||
Returns:
|
||||
Dict[str, Dict[str, Any]]: Hidden states from other models
|
||||
"""
|
||||
try:
|
||||
all_states = self.cross_model_states.get(symbol, {})
|
||||
# Return states from all models except the requesting one
|
||||
return {model_name: states for model_name, states in all_states.items()
|
||||
if model_name != requesting_model}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting cross-model states for {requesting_model}: {e}")
|
||||
return {}
|
||||
|
||||
def get_model_types_active(self, symbol: str) -> List[str]:
|
||||
"""
|
||||
Get list of active model types for a symbol
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol
|
||||
|
||||
Returns:
|
||||
List[str]: List of active model types
|
||||
"""
|
||||
try:
|
||||
current_outputs = self.current_outputs.get(symbol, {})
|
||||
return [output.model_type for output in current_outputs.values()]
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting active model types for {symbol}: {e}")
|
||||
return []
|
||||
|
||||
def get_consensus_prediction(self, symbol: str, confidence_threshold: float = 0.5) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get consensus prediction from all active models
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol
|
||||
confidence_threshold: Minimum confidence threshold for inclusion
|
||||
|
||||
Returns:
|
||||
Dict containing consensus prediction or None
|
||||
"""
|
||||
try:
|
||||
current_outputs = self.current_outputs.get(symbol, {})
|
||||
if not current_outputs:
|
||||
return None
|
||||
|
||||
# Filter by confidence threshold
|
||||
high_confidence_outputs = [
|
||||
output for output in current_outputs.values()
|
||||
if output.confidence >= confidence_threshold
|
||||
]
|
||||
|
||||
if not high_confidence_outputs:
|
||||
return None
|
||||
|
||||
# Calculate consensus
|
||||
buy_votes = sum(1 for output in high_confidence_outputs
|
||||
if output.predictions.get('action') == 'BUY')
|
||||
sell_votes = sum(1 for output in high_confidence_outputs
|
||||
if output.predictions.get('action') == 'SELL')
|
||||
hold_votes = sum(1 for output in high_confidence_outputs
|
||||
if output.predictions.get('action') == 'HOLD')
|
||||
|
||||
total_votes = len(high_confidence_outputs)
|
||||
avg_confidence = sum(output.confidence for output in high_confidence_outputs) / total_votes
|
||||
|
||||
# Determine consensus action
|
||||
if buy_votes > sell_votes and buy_votes > hold_votes:
|
||||
consensus_action = 'BUY'
|
||||
elif sell_votes > buy_votes and sell_votes > hold_votes:
|
||||
consensus_action = 'SELL'
|
||||
else:
|
||||
consensus_action = 'HOLD'
|
||||
|
||||
return {
|
||||
'action': consensus_action,
|
||||
'confidence': avg_confidence,
|
||||
'votes': {'BUY': buy_votes, 'SELL': sell_votes, 'HOLD': hold_votes},
|
||||
'total_models': total_votes,
|
||||
'model_types': [output.model_type for output in high_confidence_outputs]
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating consensus prediction for {symbol}: {e}")
|
||||
return None
|
||||
|
||||
def _update_model_metadata(self, model_name: str, model_type: str, metadata: Dict[str, Any]):
|
||||
"""Update metadata for a model"""
|
||||
try:
|
||||
if model_name not in self.model_metadata:
|
||||
self.model_metadata[model_name] = {
|
||||
'model_type': model_type,
|
||||
'first_seen': datetime.now(),
|
||||
'total_predictions': 0,
|
||||
'custom_metadata': {}
|
||||
}
|
||||
|
||||
self.model_metadata[model_name]['total_predictions'] += 1
|
||||
self.model_metadata[model_name]['last_seen'] = datetime.now()
|
||||
|
||||
# Merge custom metadata
|
||||
if metadata:
|
||||
self.model_metadata[model_name]['custom_metadata'].update(metadata)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating model metadata: {e}")
|
||||
|
||||
def _update_performance_stats(self, symbol: str, model_name: str, model_output: ModelOutput):
|
||||
"""Update performance statistics for a model"""
|
||||
try:
|
||||
stats = self.performance_stats[symbol][model_name]
|
||||
|
||||
if 'prediction_count' not in stats:
|
||||
stats['prediction_count'] = 0
|
||||
stats['confidence_sum'] = 0.0
|
||||
stats['action_counts'] = {'BUY': 0, 'SELL': 0, 'HOLD': 0}
|
||||
stats['first_prediction'] = model_output.timestamp
|
||||
|
||||
stats['prediction_count'] += 1
|
||||
stats['confidence_sum'] += model_output.confidence
|
||||
stats['avg_confidence'] = stats['confidence_sum'] / stats['prediction_count']
|
||||
stats['last_prediction'] = model_output.timestamp
|
||||
|
||||
action = model_output.predictions.get('action', 'HOLD')
|
||||
if action in stats['action_counts']:
|
||||
stats['action_counts'][action] += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating performance stats: {e}")
|
||||
|
||||
def _persist_output_async(self, model_output: ModelOutput):
|
||||
"""Persist model output to disk (simplified version)"""
|
||||
try:
|
||||
# Create filename based on model and timestamp
|
||||
timestamp_str = model_output.timestamp.strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"{model_output.model_name}_{model_output.symbol.replace('/', '_')}_{timestamp_str}.json"
|
||||
filepath = self.cache_dir / filename
|
||||
|
||||
# Convert to JSON-serializable format
|
||||
output_dict = {
|
||||
'model_type': model_output.model_type,
|
||||
'model_name': model_output.model_name,
|
||||
'symbol': model_output.symbol,
|
||||
'timestamp': model_output.timestamp.isoformat(),
|
||||
'confidence': model_output.confidence,
|
||||
'predictions': model_output.predictions,
|
||||
'metadata': model_output.metadata
|
||||
}
|
||||
|
||||
# Save to file (in a real implementation, this would be async)
|
||||
with open(filepath, 'w') as f:
|
||||
json.dump(output_dict, f, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error persisting model output: {e}")
|
||||
|
||||
def get_performance_summary(self, symbol: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get performance summary for all models for a symbol
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol
|
||||
|
||||
Returns:
|
||||
Dict containing performance summary
|
||||
"""
|
||||
try:
|
||||
summary = {
|
||||
'symbol': symbol,
|
||||
'active_models': len(self.current_outputs.get(symbol, {})),
|
||||
'model_stats': {}
|
||||
}
|
||||
|
||||
for model_name, stats in self.performance_stats.get(symbol, {}).items():
|
||||
summary['model_stats'][model_name] = {
|
||||
'predictions': stats.get('prediction_count', 0),
|
||||
'avg_confidence': round(stats.get('avg_confidence', 0.0), 3),
|
||||
'action_distribution': stats.get('action_counts', {}),
|
||||
'model_type': self.model_metadata.get(model_name, {}).get('model_type', 'unknown')
|
||||
}
|
||||
|
||||
return summary
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting performance summary: {e}")
|
||||
return {'symbol': symbol, 'error': str(e)}
|
||||
|
||||
def cleanup_old_outputs(self, max_age_hours: int = 24):
|
||||
"""
|
||||
Clean up old outputs to manage memory usage
|
||||
|
||||
Args:
|
||||
max_age_hours: Maximum age of outputs to keep in hours
|
||||
"""
|
||||
try:
|
||||
cutoff_time = datetime.now() - timedelta(hours=max_age_hours)
|
||||
|
||||
with self.storage_lock:
|
||||
for symbol in self.output_history:
|
||||
for model_name in self.output_history[symbol]:
|
||||
history = self.output_history[symbol][model_name]
|
||||
# Remove old outputs
|
||||
while history and history[0].timestamp < cutoff_time:
|
||||
history.popleft()
|
||||
|
||||
logger.info(f"Cleaned up outputs older than {max_age_hours} hours")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up old outputs: {e}")
|
||||
|
||||
def add_custom_model_type(self, model_type: str):
|
||||
"""
|
||||
Add support for a new custom model type
|
||||
|
||||
Args:
|
||||
model_type: Name of the new model type
|
||||
"""
|
||||
self.supported_model_types.add(model_type)
|
||||
logger.info(f"Added support for custom model type: {model_type}")
|
||||
|
||||
def get_supported_model_types(self) -> List[str]:
|
||||
"""Get list of all supported model types"""
|
||||
return list(self.supported_model_types)
|
Reference in New Issue
Block a user