WIP oclcv in storage. migrate do duckdb

This commit is contained in:
Dobromir Popov
2025-10-24 15:57:33 +03:00
parent d4ed894a92
commit 809c32e7a8
7 changed files with 1417 additions and 92 deletions

View File

@@ -2,10 +2,12 @@
Annotation Manager - Manages trade annotations and test case generation
Handles storage, retrieval, and test case generation from manual trade annotations.
Stores annotations in both JSON (legacy) and SQLite (with full market data).
"""
import json
import uuid
import sys
from pathlib import Path
from datetime import datetime, timedelta
from typing import List, Dict, Optional, Any
@@ -13,8 +15,20 @@ from dataclasses import dataclass, asdict
import logging
import pytz
# Add parent directory to path for imports
parent_dir = Path(__file__).parent.parent.parent
sys.path.insert(0, str(parent_dir))
logger = logging.getLogger(__name__)
# Import DuckDB storage
try:
from core.duckdb_storage import DuckDBStorage
DUCKDB_AVAILABLE = True
except ImportError:
DUCKDB_AVAILABLE = False
logger.warning("DuckDB storage not available for annotations")
@dataclass
class TradeAnnotation:
@@ -51,6 +65,15 @@ class AnnotationManager:
self.annotations_db = self._load_annotations()
# Initialize DuckDB storage for complete annotation data
self.duckdb_storage: Optional[DuckDBStorage] = None
if DUCKDB_AVAILABLE:
try:
self.duckdb_storage = DuckDBStorage()
logger.info("DuckDB storage initialized for annotations")
except Exception as e:
logger.warning(f"Could not initialize DuckDB storage: {e}")
logger.info(f"AnnotationManager initialized with storage: {self.storage_path}")
def _load_annotations(self) -> Dict[str, List[Dict]]:
@@ -122,17 +145,39 @@ class AnnotationManager:
logger.info(f" Exit state: {len(exit_market_state or {})} timeframes")
return annotation
def save_annotation(self, annotation: TradeAnnotation):
"""Save annotation to storage"""
def save_annotation(self, annotation: TradeAnnotation,
market_snapshots: Dict = None,
model_predictions: List[Dict] = None):
"""
Save annotation to storage (JSON + SQLite)
Args:
annotation: TradeAnnotation object
market_snapshots: Dict of {timeframe: DataFrame} with OHLCV data
model_predictions: List of model predictions at annotation time
"""
# Convert to dict
ann_dict = asdict(annotation)
# Add to database
# Add to JSON database (legacy)
self.annotations_db["annotations"].append(ann_dict)
# Save to file
# Save to JSON file
self._save_annotations()
# Save to DuckDB with complete market data
if self.duckdb_storage and market_snapshots:
try:
self.duckdb_storage.store_annotation(
annotation_id=annotation.annotation_id,
annotation_data=ann_dict,
market_snapshots=market_snapshots,
model_predictions=model_predictions
)
logger.info(f"Saved annotation {annotation.annotation_id} to DuckDB with {len(market_snapshots)} timeframes")
except Exception as e:
logger.error(f"Could not save annotation to DuckDB: {e}")
logger.info(f"Saved annotation: {annotation.annotation_id}")
def get_annotations(self, symbol: str = None,

View File

@@ -36,7 +36,10 @@ class HistoricalDataLoader:
self.memory_cache = {}
self.cache_ttl = timedelta(minutes=5)
logger.info("HistoricalDataLoader initialized with existing DataProvider")
# Startup mode - allow stale cache for faster loading
self.startup_mode = True
logger.info("HistoricalDataLoader initialized with existing DataProvider (startup mode: ON)")
def get_data(self, symbol: str, timeframe: str,
start_time: Optional[datetime] = None,
@@ -130,12 +133,22 @@ class HistoricalDataLoader:
return df
# Fallback: fetch from DataProvider's historical data method
logger.info(f"Fetching fresh data for {symbol} {timeframe}")
df = self.data_provider.get_historical_data(
symbol=symbol,
timeframe=timeframe,
limit=limit
)
# During startup, allow stale cache to avoid slow API calls
if self.startup_mode:
logger.info(f"Loading data for {symbol} {timeframe} (startup mode: allow stale cache)")
df = self.data_provider.get_historical_data(
symbol=symbol,
timeframe=timeframe,
limit=limit,
allow_stale_cache=True
)
else:
logger.info(f"Fetching fresh data for {symbol} {timeframe}")
df = self.data_provider.get_historical_data(
symbol=symbol,
timeframe=timeframe,
limit=limit
)
if df is not None and not df.empty:
# Filter by time range if specified
@@ -219,6 +232,11 @@ class HistoricalDataLoader:
self.memory_cache.clear()
logger.info("Memory cache cleared")
def disable_startup_mode(self):
"""Disable startup mode to fetch fresh data"""
self.startup_mode = False
logger.info("Startup mode disabled - will fetch fresh data on next request")
def get_data_boundaries(self, symbol: str, timeframe: str) -> Tuple[Optional[datetime], Optional[datetime]]:
"""
Get the earliest and latest available data timestamps

View File

@@ -163,6 +163,10 @@ class AnnotationDashboard:
# Setup routes
self._setup_routes()
# Start background data refresh after startup
if self.data_loader:
self._start_background_data_refresh()
logger.info("Annotation Dashboard initialized")
def _enable_unified_storage_async(self):
@@ -201,6 +205,58 @@ class AnnotationDashboard:
storage_thread = threading.Thread(target=enable_storage, daemon=True)
storage_thread.start()
def _start_background_data_refresh(self):
"""Start background task to refresh recent data after startup"""
def refresh_recent_data():
try:
import time
# Wait for app to fully start
time.sleep(5)
logger.info("🔄 Starting background data refresh (fetching only recent missing data)")
# Disable startup mode to fetch fresh data
self.data_loader.disable_startup_mode()
# Fetch only last 5 minutes of 1m data and 300 seconds of 1s data
symbols = self.config.get('symbols', ['ETH/USDT', 'BTC/USDT'])
for symbol in symbols:
try:
# Fetch last 5 candles of 1m data (5 minutes)
logger.info(f"Refreshing recent 1m data for {symbol}")
self.data_provider.get_historical_data(
symbol=symbol,
timeframe='1m',
limit=5,
refresh=True
)
# Fetch last 300 candles of 1s data (5 minutes)
logger.info(f"Refreshing recent 1s data for {symbol}")
self.data_provider.get_historical_data(
symbol=symbol,
timeframe='1s',
limit=300,
refresh=True
)
logger.info(f"✅ Refreshed recent data for {symbol}")
except Exception as e:
logger.warning(f"Could not refresh recent data for {symbol}: {e}")
logger.info("✅ Background data refresh completed")
except Exception as e:
logger.error(f"Error in background data refresh: {e}")
# Start in background thread
import threading
refresh_thread = threading.Thread(target=refresh_recent_data, daemon=True)
refresh_thread.start()
logger.info("📊 Background data refresh scheduled")
def _get_pivot_markers_for_timeframe(self, symbol: str, timeframe: str, df: pd.DataFrame) -> dict:
"""
Get pivot markers for a specific timeframe using WilliamsMarketStructure directly
@@ -526,8 +582,38 @@ class AnnotationDashboard:
exit_market_state=exit_market_state
)
# Save annotation
self.annotation_manager.save_annotation(annotation)
# Collect market snapshots for SQLite storage
market_snapshots = {}
if self.data_loader:
try:
# Get OHLCV data for all timeframes around the annotation time
entry_time = datetime.fromisoformat(data['entry']['timestamp'].replace('Z', '+00:00'))
exit_time = datetime.fromisoformat(data['exit']['timestamp'].replace('Z', '+00:00'))
# Get data from 5 minutes before entry to 5 minutes after exit
start_time = entry_time - timedelta(minutes=5)
end_time = exit_time + timedelta(minutes=5)
for timeframe in ['1s', '1m', '1h', '1d']:
df = self.data_loader.get_data(
symbol=data['symbol'],
timeframe=timeframe,
start_time=start_time,
end_time=end_time,
limit=1500
)
if df is not None and not df.empty:
market_snapshots[timeframe] = df
logger.info(f"Collected {len(market_snapshots)} timeframes for annotation storage")
except Exception as e:
logger.error(f"Error collecting market snapshots: {e}")
# Save annotation with market snapshots
self.annotation_manager.save_annotation(
annotation=annotation,
market_snapshots=market_snapshots
)
# Automatically generate test case with ±5min data
try: