diff --git a/ANNOTATE/core/data_loader.py b/ANNOTATE/core/data_loader.py index 1ba6aff..afa4f4b 100644 --- a/ANNOTATE/core/data_loader.py +++ b/ANNOTATE/core/data_loader.py @@ -11,6 +11,7 @@ from datetime import datetime, timedelta import pandas as pd from pathlib import Path import pickle +import time logger = logging.getLogger(__name__) @@ -44,7 +45,7 @@ class HistoricalDataLoader: def get_data(self, symbol: str, timeframe: str, start_time: Optional[datetime] = None, end_time: Optional[datetime] = None, - limit: int = 500, + limit: int = 2500, direction: str = 'latest') -> Optional[pd.DataFrame]: """ Get historical data for symbol and timeframe @@ -60,12 +61,15 @@ class HistoricalDataLoader: Returns: DataFrame with OHLCV data or None if unavailable """ - # Check memory cache first - cache_key = f"{symbol}_{timeframe}_{start_time}_{end_time}_{limit}_{direction}" - if cache_key in self.memory_cache: + start_time_ms = time.time() + + # Check memory cache first (exclude direction from cache key for infinite scroll) + cache_key = f"{symbol}_{timeframe}_{start_time}_{end_time}_{limit}" + if cache_key in self.memory_cache and direction == 'latest': cached_data, cached_time = self.memory_cache[cache_key] if datetime.now() - cached_time < self.cache_ttl: - logger.debug(f"Returning cached data for {symbol} {timeframe}") + elapsed_ms = (time.time() - start_time_ms) * 1000 + logger.debug(f"⚡ Memory cache hit for {symbol} {timeframe} ({elapsed_ms:.1f}ms)") return cached_data try: @@ -77,7 +81,8 @@ class HistoricalDataLoader: if cached_df is not None and not cached_df.empty: # Use cached data if we have enough candles if len(cached_df) >= min(limit, 100): # Use cached if we have at least 100 candles - logger.debug(f"Using DataProvider cached data for {symbol} {timeframe} ({len(cached_df)} candles)") + elapsed_ms = (time.time() - start_time_ms) * 1000 + logger.debug(f"🚀 DataProvider cache hit for {symbol} {timeframe} ({len(cached_df)} candles, {elapsed_ms:.1f}ms)") # Filter by time range with direction support filtered_df = self._filter_by_time_range( @@ -158,20 +163,29 @@ class HistoricalDataLoader: logger.info(f"Loaded {len(df)} candles for {symbol} {timeframe}") return df - # Fallback: Try DuckDB first, then fetch from API if needed - if self.startup_mode: - logger.info(f"Loading data for {symbol} {timeframe} (startup mode: allow stale cache)") - df = self.data_provider.get_historical_data( + # Check DuckDB first for historical data (always check for infinite scroll) + if self.data_provider.duckdb_storage and (start_time or end_time): + logger.info(f"Checking DuckDB for {symbol} {timeframe} historical data (direction={direction})") + df = self.data_provider.duckdb_storage.get_ohlcv_data( symbol=symbol, timeframe=timeframe, + start_time=start_time, + end_time=end_time, limit=limit, - allow_stale_cache=True + direction=direction ) - else: - # Check DuckDB first for historical data - if self.data_provider.duckdb_storage and (start_time or end_time): - logger.info(f"Checking DuckDB for {symbol} {timeframe} historical data (direction={direction})") - df = self.data_provider.duckdb_storage.get_ohlcv_data( + + if df is not None and not df.empty: + elapsed_ms = (time.time() - start_time_ms) * 1000 + logger.info(f"✅ DuckDB hit for {symbol} {timeframe} ({len(df)} candles, {elapsed_ms:.1f}ms)") + # Cache in memory + self.memory_cache[cache_key] = (df.copy(), datetime.now()) + return df + else: + logger.info(f"📡 No data in DuckDB, fetching from exchange API for {symbol} {timeframe}") + + # Fetch from exchange API with time range + df = self._fetch_from_exchange_api( symbol=symbol, timeframe=timeframe, start_time=start_time, @@ -181,40 +195,35 @@ class HistoricalDataLoader: ) if df is not None and not df.empty: - logger.info(f"✅ Loaded {len(df)} candles from DuckDB for {symbol} {timeframe}") + elapsed_ms = (time.time() - start_time_ms) * 1000 + logger.info(f"🌐 Exchange API hit for {symbol} {timeframe} ({len(df)} candles, {elapsed_ms:.1f}ms)") + + # Store in DuckDB for future use + if self.data_provider.duckdb_storage: + stored_count = self.data_provider.duckdb_storage.store_ohlcv_data( + symbol=symbol, + timeframe=timeframe, + df=df + ) + logger.info(f"💾 Stored {stored_count} new candles in DuckDB") + # Cache in memory self.memory_cache[cache_key] = (df.copy(), datetime.now()) return df else: - logger.info(f"📡 No data in DuckDB, fetching from exchange API for {symbol} {timeframe}") - - # Fetch from exchange API with time range - df = self._fetch_from_exchange_api( - symbol=symbol, - timeframe=timeframe, - start_time=start_time, - end_time=end_time, - limit=limit, - direction=direction - ) - - if df is not None and not df.empty: - # Store in DuckDB for future use - if self.data_provider.duckdb_storage: - stored_count = self.data_provider.duckdb_storage.store_ohlcv_data( - symbol=symbol, - timeframe=timeframe, - df=df - ) - logger.info(f"💾 Stored {stored_count} new candles in DuckDB") - - # Cache in memory - self.memory_cache[cache_key] = (df.copy(), datetime.now()) - return df - else: - logger.warning(f"No data available from exchange API for {symbol} {timeframe}") - return None - + logger.warning(f"No data available from exchange API for {symbol} {timeframe}") + return None + + # Fallback: Use DataProvider for latest data (startup mode or no time range) + if self.startup_mode and not (start_time or end_time): + logger.info(f"Loading data for {symbol} {timeframe} (startup mode: allow stale cache)") + df = self.data_provider.get_historical_data( + symbol=symbol, + timeframe=timeframe, + limit=limit, + allow_stale_cache=True + ) + else: # Fetch from API and store in DuckDB (no time range specified) logger.info(f"Fetching latest data from API for {symbol} {timeframe}") df = self.data_provider.get_historical_data( @@ -403,7 +412,7 @@ class HistoricalDataLoader: timeframes: List[str], start_time: Optional[datetime] = None, end_time: Optional[datetime] = None, - limit: int = 500) -> Dict[str, pd.DataFrame]: + limit: int = 2500) -> Dict[str, pd.DataFrame]: """ Get data for multiple timeframes at once diff --git a/ANNOTATE/data/annotations/annotations_db.json b/ANNOTATE/data/annotations/annotations_db.json index 2ceafcd..f8aadd4 100644 --- a/ANNOTATE/data/annotations/annotations_db.json +++ b/ANNOTATE/data/annotations/annotations_db.json @@ -22,10 +22,33 @@ "entry_state": {}, "exit_state": {} } + }, + { + "annotation_id": "5d5c4354-12dd-4e0c-92a8-eff631a5dfab", + "symbol": "ETH/USDT", + "timeframe": "1h", + "entry": { + "timestamp": "2025-10-23 20:00", + "price": 3818.72, + "index": 5 + }, + "exit": { + "timestamp": "2025-10-24 05:00", + "price": 3989.2, + "index": 6 + }, + "direction": "LONG", + "profit_loss_pct": 4.4643231239787164, + "notes": "", + "created_at": "2025-10-24T23:35:14.215744", + "market_context": { + "entry_state": {}, + "exit_state": {} + } } ], "metadata": { - "total_annotations": 1, - "last_updated": "2025-10-24T22:33:26.194492" + "total_annotations": 2, + "last_updated": "2025-10-24T23:35:14.216759" } } \ No newline at end of file diff --git a/ANNOTATE/web/app.py b/ANNOTATE/web/app.py index 4a3011f..32d0c78 100644 --- a/ANNOTATE/web/app.py +++ b/ANNOTATE/web/app.py @@ -534,7 +534,7 @@ class AnnotationDashboard: timeframes = data.get('timeframes', ['1s', '1m', '1h', '1d']) start_time_str = data.get('start_time') end_time_str = data.get('end_time') - limit = data.get('limit', 2000) # Default 2000 candles for training + limit = data.get('limit', 2500) # Default 2500 candles for training direction = data.get('direction', 'latest') # 'latest', 'before', or 'after' logger.info(f"📊 Chart data request: {symbol} {timeframes} direction={direction} limit={limit}") diff --git a/ANNOTATE/web/templates/annotation_dashboard.html b/ANNOTATE/web/templates/annotation_dashboard.html index 42325b8..ab62d5b 100644 --- a/ANNOTATE/web/templates/annotation_dashboard.html +++ b/ANNOTATE/web/templates/annotation_dashboard.html @@ -59,10 +59,11 @@ {% block extra_js %}