WIP oclcv in storage. migrate do duckdb

This commit is contained in:
Dobromir Popov
2025-10-24 15:57:33 +03:00
parent d4ed894a92
commit 809c32e7a8
7 changed files with 1417 additions and 92 deletions

View File

@@ -2,10 +2,12 @@
Annotation Manager - Manages trade annotations and test case generation
Handles storage, retrieval, and test case generation from manual trade annotations.
Stores annotations in both JSON (legacy) and SQLite (with full market data).
"""
import json
import uuid
import sys
from pathlib import Path
from datetime import datetime, timedelta
from typing import List, Dict, Optional, Any
@@ -13,8 +15,20 @@ from dataclasses import dataclass, asdict
import logging
import pytz
# Add parent directory to path for imports
parent_dir = Path(__file__).parent.parent.parent
sys.path.insert(0, str(parent_dir))
logger = logging.getLogger(__name__)
# Import DuckDB storage
try:
from core.duckdb_storage import DuckDBStorage
DUCKDB_AVAILABLE = True
except ImportError:
DUCKDB_AVAILABLE = False
logger.warning("DuckDB storage not available for annotations")
@dataclass
class TradeAnnotation:
@@ -51,6 +65,15 @@ class AnnotationManager:
self.annotations_db = self._load_annotations()
# Initialize DuckDB storage for complete annotation data
self.duckdb_storage: Optional[DuckDBStorage] = None
if DUCKDB_AVAILABLE:
try:
self.duckdb_storage = DuckDBStorage()
logger.info("DuckDB storage initialized for annotations")
except Exception as e:
logger.warning(f"Could not initialize DuckDB storage: {e}")
logger.info(f"AnnotationManager initialized with storage: {self.storage_path}")
def _load_annotations(self) -> Dict[str, List[Dict]]:
@@ -122,17 +145,39 @@ class AnnotationManager:
logger.info(f" Exit state: {len(exit_market_state or {})} timeframes")
return annotation
def save_annotation(self, annotation: TradeAnnotation):
"""Save annotation to storage"""
def save_annotation(self, annotation: TradeAnnotation,
market_snapshots: Dict = None,
model_predictions: List[Dict] = None):
"""
Save annotation to storage (JSON + SQLite)
Args:
annotation: TradeAnnotation object
market_snapshots: Dict of {timeframe: DataFrame} with OHLCV data
model_predictions: List of model predictions at annotation time
"""
# Convert to dict
ann_dict = asdict(annotation)
# Add to database
# Add to JSON database (legacy)
self.annotations_db["annotations"].append(ann_dict)
# Save to file
# Save to JSON file
self._save_annotations()
# Save to DuckDB with complete market data
if self.duckdb_storage and market_snapshots:
try:
self.duckdb_storage.store_annotation(
annotation_id=annotation.annotation_id,
annotation_data=ann_dict,
market_snapshots=market_snapshots,
model_predictions=model_predictions
)
logger.info(f"Saved annotation {annotation.annotation_id} to DuckDB with {len(market_snapshots)} timeframes")
except Exception as e:
logger.error(f"Could not save annotation to DuckDB: {e}")
logger.info(f"Saved annotation: {annotation.annotation_id}")
def get_annotations(self, symbol: str = None,

View File

@@ -36,7 +36,10 @@ class HistoricalDataLoader:
self.memory_cache = {}
self.cache_ttl = timedelta(minutes=5)
logger.info("HistoricalDataLoader initialized with existing DataProvider")
# Startup mode - allow stale cache for faster loading
self.startup_mode = True
logger.info("HistoricalDataLoader initialized with existing DataProvider (startup mode: ON)")
def get_data(self, symbol: str, timeframe: str,
start_time: Optional[datetime] = None,
@@ -130,12 +133,22 @@ class HistoricalDataLoader:
return df
# Fallback: fetch from DataProvider's historical data method
logger.info(f"Fetching fresh data for {symbol} {timeframe}")
df = self.data_provider.get_historical_data(
symbol=symbol,
timeframe=timeframe,
limit=limit
)
# During startup, allow stale cache to avoid slow API calls
if self.startup_mode:
logger.info(f"Loading data for {symbol} {timeframe} (startup mode: allow stale cache)")
df = self.data_provider.get_historical_data(
symbol=symbol,
timeframe=timeframe,
limit=limit,
allow_stale_cache=True
)
else:
logger.info(f"Fetching fresh data for {symbol} {timeframe}")
df = self.data_provider.get_historical_data(
symbol=symbol,
timeframe=timeframe,
limit=limit
)
if df is not None and not df.empty:
# Filter by time range if specified
@@ -219,6 +232,11 @@ class HistoricalDataLoader:
self.memory_cache.clear()
logger.info("Memory cache cleared")
def disable_startup_mode(self):
"""Disable startup mode to fetch fresh data"""
self.startup_mode = False
logger.info("Startup mode disabled - will fetch fresh data on next request")
def get_data_boundaries(self, symbol: str, timeframe: str) -> Tuple[Optional[datetime], Optional[datetime]]:
"""
Get the earliest and latest available data timestamps