This commit is contained in:
Dobromir Popov
2025-10-23 00:21:22 +03:00
parent dbab0283c9
commit b0771ff34e
8 changed files with 451 additions and 125 deletions

View File

@@ -1269,6 +1269,110 @@ class DataProvider:
logger.error(f"Error getting price range for {symbol}: {e}")
return None
def get_historical_data_replay(self, symbol: str, start_time: datetime, end_time: datetime,
timeframes: List[str] = None) -> Dict[str, pd.DataFrame]:
"""
Get historical data for a specific time period for replay/training purposes.
This method allows "going back in time" to replay market moves.
Args:
symbol: Trading symbol (e.g., 'ETH/USDT')
start_time: Start of the time period
end_time: End of the time period
timeframes: List of timeframes to fetch (default: ['1s', '1m', '1h', '1d'])
Returns:
Dict mapping timeframe to DataFrame with OHLCV data for the period
"""
if timeframes is None:
timeframes = ['1s', '1m', '1h', '1d']
logger.info(f"Replaying historical data for {symbol} from {start_time} to {end_time}")
replay_data = {}
for timeframe in timeframes:
try:
# Calculate how many candles we need for the time period
if timeframe == '1s':
limit = int((end_time - start_time).total_seconds()) + 100 # Extra buffer
elif timeframe == '1m':
limit = int((end_time - start_time).total_seconds() / 60) + 10
elif timeframe == '1h':
limit = int((end_time - start_time).total_seconds() / 3600) + 5
elif timeframe == '1d':
limit = int((end_time - start_time).total_seconds() / 86400) + 2
else:
limit = 1000
# Fetch historical data
df = self.get_historical_data(symbol, timeframe, limit=limit, refresh=True)
if df is not None and not df.empty:
# Filter to the exact time period
df_filtered = df[(df.index >= start_time) & (df.index <= end_time)]
if not df_filtered.empty:
replay_data[timeframe] = df_filtered
logger.info(f" {timeframe}: {len(df_filtered)} candles in replay period")
else:
logger.warning(f" {timeframe}: No data in replay period")
replay_data[timeframe] = pd.DataFrame()
else:
logger.warning(f" {timeframe}: No data available")
replay_data[timeframe] = pd.DataFrame()
except Exception as e:
logger.error(f"Error fetching {timeframe} data for replay: {e}")
replay_data[timeframe] = pd.DataFrame()
logger.info(f"Historical replay data prepared: {len([tf for tf, df in replay_data.items() if not df.empty])} timeframes")
return replay_data
def get_market_state_at_time(self, symbol: str, timestamp: datetime,
context_window_minutes: int = 5) -> Dict[str, Any]:
"""
Get complete market state at a specific point in time for training.
This includes OHLCV data ±context_window_minutes around the timestamp.
Args:
symbol: Trading symbol
timestamp: The specific point in time
context_window_minutes: Minutes before/after the timestamp to include
Returns:
Dict with market state data in training format
"""
try:
start_time = timestamp - timedelta(minutes=context_window_minutes)
end_time = timestamp + timedelta(minutes=context_window_minutes)
logger.info(f"Getting market state for {symbol} at {timestamp}{context_window_minutes}min)")
# Get replay data for the time window
replay_data = self.get_historical_data_replay(symbol, start_time, end_time)
# Convert to training format
market_state = {}
for timeframe, df in replay_data.items():
if not df.empty:
market_state[f'ohlcv_{timeframe}'] = {
'timestamps': df.index.strftime('%Y-%m-%d %H:%M:%S').tolist(),
'open': df['open'].tolist(),
'high': df['high'].tolist(),
'low': df['low'].tolist(),
'close': df['close'].tolist(),
'volume': df['volume'].tolist()
}
logger.info(f"Market state prepared with {len(market_state)} timeframes")
return market_state
except Exception as e:
logger.error(f"Error getting market state at time: {e}")
return {}
def get_historical_data(self, symbol: str, timeframe: str, limit: int = 1000, refresh: bool = False) -> Optional[pd.DataFrame]:
"""Get historical OHLCV data.
- Prefer cached data for low latency.