293 lines
10 KiB
Python
293 lines
10 KiB
Python
"""
|
|
Historical Data Loader - Integrates with existing DataProvider
|
|
|
|
Provides data loading and caching for the annotation UI, ensuring the same
|
|
data quality and structure used by training and inference systems.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Dict, List, Optional, Tuple
|
|
from datetime import datetime, timedelta
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
import pickle
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class HistoricalDataLoader:
|
|
"""
|
|
Loads historical data from the main system's DataProvider
|
|
Ensures consistency with training/inference data
|
|
"""
|
|
|
|
def __init__(self, data_provider):
|
|
"""
|
|
Initialize with existing DataProvider
|
|
|
|
Args:
|
|
data_provider: Instance of core.data_provider.DataProvider
|
|
"""
|
|
self.data_provider = data_provider
|
|
self.cache_dir = Path("ANNOTATE/data/cache")
|
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Cache for recently loaded data
|
|
self.memory_cache = {}
|
|
self.cache_ttl = timedelta(minutes=5)
|
|
|
|
logger.info("HistoricalDataLoader initialized with existing DataProvider")
|
|
|
|
def get_data(self, symbol: str, timeframe: str,
|
|
start_time: Optional[datetime] = None,
|
|
end_time: Optional[datetime] = None,
|
|
limit: int = 500) -> Optional[pd.DataFrame]:
|
|
"""
|
|
Get historical data for symbol and timeframe
|
|
|
|
Args:
|
|
symbol: Trading pair (e.g., 'ETH/USDT')
|
|
timeframe: Timeframe (e.g., '1s', '1m', '1h', '1d')
|
|
start_time: Start time for data range
|
|
end_time: End time for data range
|
|
limit: Maximum number of candles to return
|
|
|
|
Returns:
|
|
DataFrame with OHLCV data or None if unavailable
|
|
"""
|
|
# Check memory cache first
|
|
cache_key = f"{symbol}_{timeframe}_{start_time}_{end_time}_{limit}"
|
|
if cache_key in self.memory_cache:
|
|
cached_data, cached_time = self.memory_cache[cache_key]
|
|
if datetime.now() - cached_time < self.cache_ttl:
|
|
logger.debug(f"Returning cached data for {symbol} {timeframe}")
|
|
return cached_data
|
|
|
|
try:
|
|
# Use DataProvider's cached data if available
|
|
if hasattr(self.data_provider, 'cached_data'):
|
|
if symbol in self.data_provider.cached_data:
|
|
if timeframe in self.data_provider.cached_data[symbol]:
|
|
df = self.data_provider.cached_data[symbol][timeframe]
|
|
|
|
if df is not None and not df.empty:
|
|
# Filter by time range if specified
|
|
if start_time or end_time:
|
|
df = self._filter_by_time_range(df, start_time, end_time)
|
|
|
|
# Limit number of candles
|
|
if len(df) > limit:
|
|
df = df.tail(limit)
|
|
|
|
# Cache in memory
|
|
self.memory_cache[cache_key] = (df.copy(), datetime.now())
|
|
|
|
logger.info(f"Loaded {len(df)} candles for {symbol} {timeframe}")
|
|
return df
|
|
|
|
# Fallback: fetch from DataProvider's historical data method
|
|
logger.info(f"Fetching fresh data for {symbol} {timeframe}")
|
|
df = self.data_provider.get_historical_data(
|
|
symbol=symbol,
|
|
timeframe=timeframe,
|
|
limit=limit
|
|
)
|
|
|
|
if df is not None and not df.empty:
|
|
# Filter by time range if specified
|
|
if start_time or end_time:
|
|
df = self._filter_by_time_range(df, start_time, end_time)
|
|
|
|
# Cache in memory
|
|
self.memory_cache[cache_key] = (df.copy(), datetime.now())
|
|
|
|
logger.info(f"Fetched {len(df)} candles for {symbol} {timeframe}")
|
|
return df
|
|
|
|
logger.warning(f"No data available for {symbol} {timeframe}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error loading data for {symbol} {timeframe}: {e}")
|
|
return None
|
|
|
|
def _filter_by_time_range(self, df: pd.DataFrame,
|
|
start_time: Optional[datetime],
|
|
end_time: Optional[datetime]) -> pd.DataFrame:
|
|
"""Filter DataFrame by time range"""
|
|
if start_time:
|
|
df = df[df.index >= start_time]
|
|
if end_time:
|
|
df = df[df.index <= end_time]
|
|
return df
|
|
|
|
def get_multi_timeframe_data(self, symbol: str,
|
|
timeframes: List[str],
|
|
start_time: Optional[datetime] = None,
|
|
end_time: Optional[datetime] = None,
|
|
limit: int = 500) -> Dict[str, pd.DataFrame]:
|
|
"""
|
|
Get data for multiple timeframes at once
|
|
|
|
Args:
|
|
symbol: Trading pair
|
|
timeframes: List of timeframes
|
|
start_time: Start time for data range
|
|
end_time: End time for data range
|
|
limit: Maximum number of candles per timeframe
|
|
|
|
Returns:
|
|
Dictionary mapping timeframe to DataFrame
|
|
"""
|
|
result = {}
|
|
|
|
for timeframe in timeframes:
|
|
df = self.get_data(
|
|
symbol=symbol,
|
|
timeframe=timeframe,
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
limit=limit
|
|
)
|
|
|
|
if df is not None:
|
|
result[timeframe] = df
|
|
|
|
logger.info(f"Loaded data for {len(result)}/{len(timeframes)} timeframes")
|
|
return result
|
|
|
|
def prefetch_data(self, symbol: str, timeframes: List[str], limit: int = 1000):
|
|
"""
|
|
Prefetch data for smooth scrolling
|
|
|
|
Args:
|
|
symbol: Trading pair
|
|
timeframes: List of timeframes to prefetch
|
|
limit: Number of candles to prefetch
|
|
"""
|
|
logger.info(f"Prefetching data for {symbol}: {timeframes}")
|
|
|
|
for timeframe in timeframes:
|
|
self.get_data(symbol, timeframe, limit=limit)
|
|
|
|
def clear_cache(self):
|
|
"""Clear memory cache"""
|
|
self.memory_cache.clear()
|
|
logger.info("Memory cache cleared")
|
|
|
|
def get_data_boundaries(self, symbol: str, timeframe: str) -> Tuple[Optional[datetime], Optional[datetime]]:
|
|
"""
|
|
Get the earliest and latest available data timestamps
|
|
|
|
Args:
|
|
symbol: Trading pair
|
|
timeframe: Timeframe
|
|
|
|
Returns:
|
|
Tuple of (earliest_time, latest_time) or (None, None) if no data
|
|
"""
|
|
try:
|
|
df = self.get_data(symbol, timeframe, limit=10000)
|
|
|
|
if df is not None and not df.empty:
|
|
return (df.index.min(), df.index.max())
|
|
|
|
return (None, None)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting data boundaries: {e}")
|
|
return (None, None)
|
|
|
|
|
|
class TimeRangeManager:
|
|
"""Manages time range calculations and data prefetching"""
|
|
|
|
def __init__(self, data_loader: HistoricalDataLoader):
|
|
"""
|
|
Initialize with data loader
|
|
|
|
Args:
|
|
data_loader: HistoricalDataLoader instance
|
|
"""
|
|
self.data_loader = data_loader
|
|
|
|
# Time range presets in seconds
|
|
self.range_presets = {
|
|
'1h': 3600,
|
|
'4h': 14400,
|
|
'1d': 86400,
|
|
'1w': 604800,
|
|
'1M': 2592000
|
|
}
|
|
|
|
logger.info("TimeRangeManager initialized")
|
|
|
|
def calculate_time_range(self, center_time: datetime,
|
|
range_preset: str) -> Tuple[datetime, datetime]:
|
|
"""
|
|
Calculate start and end times for a range preset
|
|
|
|
Args:
|
|
center_time: Center point of the range
|
|
range_preset: Range preset ('1h', '4h', '1d', '1w', '1M')
|
|
|
|
Returns:
|
|
Tuple of (start_time, end_time)
|
|
"""
|
|
range_seconds = self.range_presets.get(range_preset, 86400)
|
|
half_range = timedelta(seconds=range_seconds / 2)
|
|
|
|
start_time = center_time - half_range
|
|
end_time = center_time + half_range
|
|
|
|
return (start_time, end_time)
|
|
|
|
def get_navigation_increment(self, range_preset: str) -> timedelta:
|
|
"""
|
|
Get time increment for navigation (10% of range)
|
|
|
|
Args:
|
|
range_preset: Range preset
|
|
|
|
Returns:
|
|
timedelta for navigation increment
|
|
"""
|
|
range_seconds = self.range_presets.get(range_preset, 86400)
|
|
increment_seconds = range_seconds / 10
|
|
|
|
return timedelta(seconds=increment_seconds)
|
|
|
|
def prefetch_adjacent_ranges(self, symbol: str, timeframes: List[str],
|
|
center_time: datetime, range_preset: str):
|
|
"""
|
|
Prefetch data for adjacent time ranges for smooth scrolling
|
|
|
|
Args:
|
|
symbol: Trading pair
|
|
timeframes: List of timeframes
|
|
center_time: Current center time
|
|
range_preset: Current range preset
|
|
"""
|
|
increment = self.get_navigation_increment(range_preset)
|
|
|
|
# Prefetch previous range
|
|
prev_center = center_time - increment
|
|
prev_start, prev_end = self.calculate_time_range(prev_center, range_preset)
|
|
|
|
# Prefetch next range
|
|
next_center = center_time + increment
|
|
next_start, next_end = self.calculate_time_range(next_center, range_preset)
|
|
|
|
logger.debug(f"Prefetching adjacent ranges for {symbol}")
|
|
|
|
# Prefetch in background (non-blocking)
|
|
import threading
|
|
|
|
def prefetch():
|
|
for timeframe in timeframes:
|
|
self.data_loader.get_data(symbol, timeframe, prev_start, prev_end)
|
|
self.data_loader.get_data(symbol, timeframe, next_start, next_end)
|
|
|
|
thread = threading.Thread(target=prefetch, daemon=True)
|
|
thread.start()
|