WIP oclcv in storage. migrate do duckdb

2025-10-24 15:57:33 +03:00
parent d4ed894a92
commit 809c32e7a8
7 changed files with 1417 additions and 92 deletions
--- a/ANNOTATE/core/annotation_manager.py
+++ b/ANNOTATE/core/annotation_manager.py
@@ -2,10 +2,12 @@
 Annotation Manager - Manages trade annotations and test case generation

 Handles storage, retrieval, and test case generation from manual trade annotations.
+Stores annotations in both JSON (legacy) and SQLite (with full market data).
 """

 import json
 import uuid
+import sys
 from pathlib import Path
 from datetime import datetime, timedelta
 from typing import List, Dict, Optional, Any
@@ -13,8 +15,20 @@ from dataclasses import dataclass, asdict
 import logging
 import pytz

+# Add parent directory to path for imports
+parent_dir = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(parent_dir))
+
 logger = logging.getLogger(__name__)

+# Import DuckDB storage
+try:
+    from core.duckdb_storage import DuckDBStorage
+    DUCKDB_AVAILABLE = True
+except ImportError:
+    DUCKDB_AVAILABLE = False
+    logger.warning("DuckDB storage not available for annotations")
+

@dataclass
 class TradeAnnotation:
@@ -51,6 +65,15 @@ class AnnotationManager:
        
        self.annotations_db = self._load_annotations()
        
+        # Initialize DuckDB storage for complete annotation data
+        self.duckdb_storage: Optional[DuckDBStorage] = None
+        if DUCKDB_AVAILABLE:
+            try:
+                self.duckdb_storage = DuckDBStorage()
+                logger.info("DuckDB storage initialized for annotations")
+            except Exception as e:
+                logger.warning(f"Could not initialize DuckDB storage: {e}")
+        
        logger.info(f"AnnotationManager initialized with storage: {self.storage_path}")
    
    def _load_annotations(self) -> Dict[str, List[Dict]]:
@@ -122,17 +145,39 @@ class AnnotationManager:
        logger.info(f"  Exit state: {len(exit_market_state or {})} timeframes")
        return annotation
    
-    def save_annotation(self, annotation: TradeAnnotation):
-        """Save annotation to storage"""
+    def save_annotation(self, annotation: TradeAnnotation, 
+                       market_snapshots: Dict = None,
+                       model_predictions: List[Dict] = None):
+        """
+        Save annotation to storage (JSON + SQLite)
+        
+        Args:
+            annotation: TradeAnnotation object
+            market_snapshots: Dict of {timeframe: DataFrame} with OHLCV data
+            model_predictions: List of model predictions at annotation time
+        """
        # Convert to dict
        ann_dict = asdict(annotation)
        
-        # Add to database
+        # Add to JSON database (legacy)
        self.annotations_db["annotations"].append(ann_dict)
        
-        # Save to file
+        # Save to JSON file
        self._save_annotations()
        
+        # Save to DuckDB with complete market data
+        if self.duckdb_storage and market_snapshots:
+            try:
+                self.duckdb_storage.store_annotation(
+                    annotation_id=annotation.annotation_id,
+                    annotation_data=ann_dict,
+                    market_snapshots=market_snapshots,
+                    model_predictions=model_predictions
+                )
+                logger.info(f"Saved annotation {annotation.annotation_id} to DuckDB with {len(market_snapshots)} timeframes")
+            except Exception as e:
+                logger.error(f"Could not save annotation to DuckDB: {e}")
+        
        logger.info(f"Saved annotation: {annotation.annotation_id}")
    
    def get_annotations(self, symbol: str = None, 
--- a/ANNOTATE/core/data_loader.py
+++ b/ANNOTATE/core/data_loader.py
@@ -36,7 +36,10 @@ class HistoricalDataLoader:
        self.memory_cache = {}
        self.cache_ttl = timedelta(minutes=5)
        
-        logger.info("HistoricalDataLoader initialized with existing DataProvider")
+        # Startup mode - allow stale cache for faster loading
+        self.startup_mode = True
+        
+        logger.info("HistoricalDataLoader initialized with existing DataProvider (startup mode: ON)")
    
    def get_data(self, symbol: str, timeframe: str, 
                 start_time: Optional[datetime] = None,
@@ -130,12 +133,22 @@ class HistoricalDataLoader:
                            return df
            
            # Fallback: fetch from DataProvider's historical data method
-            logger.info(f"Fetching fresh data for {symbol} {timeframe}")
-            df = self.data_provider.get_historical_data(
-                symbol=symbol,
-                timeframe=timeframe,
-                limit=limit
-            )
+            # During startup, allow stale cache to avoid slow API calls
+            if self.startup_mode:
+                logger.info(f"Loading data for {symbol} {timeframe} (startup mode: allow stale cache)")
+                df = self.data_provider.get_historical_data(
+                    symbol=symbol,
+                    timeframe=timeframe,
+                    limit=limit,
+                    allow_stale_cache=True
+                )
+            else:
+                logger.info(f"Fetching fresh data for {symbol} {timeframe}")
+                df = self.data_provider.get_historical_data(
+                    symbol=symbol,
+                    timeframe=timeframe,
+                    limit=limit
+                )
            
            if df is not None and not df.empty:
                # Filter by time range if specified
@@ -219,6 +232,11 @@ class HistoricalDataLoader:
        self.memory_cache.clear()
        logger.info("Memory cache cleared")
    
+    def disable_startup_mode(self):
+        """Disable startup mode to fetch fresh data"""
+        self.startup_mode = False
+        logger.info("Startup mode disabled - will fetch fresh data on next request")
+    
    def get_data_boundaries(self, symbol: str, timeframe: str) -> Tuple[Optional[datetime], Optional[datetime]]:
        """
        Get the earliest and latest available data timestamps