""" Annotation Manager - Manages trade annotations and test case generation Handles storage, retrieval, and test case generation from manual trade annotations. Stores annotations in both JSON (legacy) and SQLite (with full market data). """ import json import uuid import sys from pathlib import Path from datetime import datetime, timedelta from typing import List, Dict, Optional, Any from dataclasses import dataclass, asdict import logging import pytz # Add parent directory to path for imports parent_dir = Path(__file__).parent.parent.parent sys.path.insert(0, str(parent_dir)) logger = logging.getLogger(__name__) # Import DuckDB storage try: from core.duckdb_storage import DuckDBStorage DUCKDB_AVAILABLE = True except ImportError: DUCKDB_AVAILABLE = False logger.warning("DuckDB storage not available for annotations") @dataclass class TradeAnnotation: """Represents a manually marked trade""" annotation_id: str symbol: str timeframe: str entry: Dict[str, Any] # {timestamp, price, index} exit: Dict[str, Any] # {timestamp, price, index} direction: str # 'LONG' or 'SHORT' profit_loss_pct: float notes: str = "" created_at: str = None market_context: Dict[str, Any] = None def __post_init__(self): if self.created_at is None: self.created_at = datetime.now().isoformat() if self.market_context is None: self.market_context = {} class AnnotationManager: """Manages trade annotations and test case generation""" def __init__(self, storage_path: str = "ANNOTATE/data/annotations"): """Initialize annotation manager""" self.storage_path = Path(storage_path) self.storage_path.mkdir(parents=True, exist_ok=True) self.annotations_file = self.storage_path / "annotations_db.json" self.test_cases_dir = self.storage_path.parent / "test_cases" self.test_cases_dir.mkdir(parents=True, exist_ok=True) self.annotations_db = self._load_annotations() # Initialize DuckDB storage for complete annotation data self.duckdb_storage: Optional[DuckDBStorage] = None if DUCKDB_AVAILABLE: try: self.duckdb_storage = DuckDBStorage() logger.info("DuckDB storage initialized for annotations") except Exception as e: logger.warning(f"Could not initialize DuckDB storage: {e}") logger.info(f"AnnotationManager initialized with storage: {self.storage_path}") def _load_annotations(self) -> Dict[str, List[Dict]]: """Load annotations from storage""" if self.annotations_file.exists(): try: with open(self.annotations_file, 'r') as f: data = json.load(f) logger.info(f"Loaded {len(data.get('annotations', []))} annotations") return data except Exception as e: logger.error(f"Error loading annotations: {e}") return {"annotations": [], "metadata": {}} else: return {"annotations": [], "metadata": {}} def _save_annotations(self): """Save annotations to storage""" try: # Update metadata self.annotations_db["metadata"] = { "total_annotations": len(self.annotations_db["annotations"]), "last_updated": datetime.now().isoformat() } with open(self.annotations_file, 'w') as f: json.dump(self.annotations_db, f, indent=2) logger.info(f"Saved {len(self.annotations_db['annotations'])} annotations") except Exception as e: logger.error(f"Error saving annotations: {e}") raise def create_annotation(self, entry_point: Dict, exit_point: Dict, symbol: str, timeframe: str, entry_market_state: Dict = None, exit_market_state: Dict = None) -> TradeAnnotation: """Create new trade annotation""" # Calculate direction and P&L entry_price = entry_point['price'] exit_price = exit_point['price'] if exit_price > entry_price: direction = 'LONG' profit_loss_pct = ((exit_price - entry_price) / entry_price) * 100 else: direction = 'SHORT' profit_loss_pct = ((entry_price - exit_price) / entry_price) * 100 # Store complete market context for training market_context = { 'entry_state': entry_market_state or {}, 'exit_state': exit_market_state or {} } annotation = TradeAnnotation( annotation_id=str(uuid.uuid4()), symbol=symbol, timeframe=timeframe, entry=entry_point, exit=exit_point, direction=direction, profit_loss_pct=profit_loss_pct, market_context=market_context ) logger.info(f"Created annotation: {annotation.annotation_id} ({direction}, {profit_loss_pct:.2f}%)") logger.info(f" Entry state: {len(entry_market_state or {})} timeframes") logger.info(f" Exit state: {len(exit_market_state or {})} timeframes") return annotation def save_annotation(self, annotation: TradeAnnotation, market_snapshots: Dict = None, model_predictions: List[Dict] = None): """ Save annotation to storage (JSON + SQLite) Args: annotation: TradeAnnotation object market_snapshots: Dict of {timeframe: DataFrame} with OHLCV data model_predictions: List of model predictions at annotation time """ # Convert to dict ann_dict = asdict(annotation) # Add to JSON database (legacy) self.annotations_db["annotations"].append(ann_dict) # Save to JSON file self._save_annotations() # Save to DuckDB with complete market data if self.duckdb_storage and market_snapshots: try: self.duckdb_storage.store_annotation( annotation_id=annotation.annotation_id, annotation_data=ann_dict, market_snapshots=market_snapshots, model_predictions=model_predictions ) logger.info(f"Saved annotation {annotation.annotation_id} to DuckDB with {len(market_snapshots)} timeframes") except Exception as e: logger.error(f"Could not save annotation to DuckDB: {e}") logger.info(f"Saved annotation: {annotation.annotation_id}") def get_annotations(self, symbol: str = None, timeframe: str = None) -> List[TradeAnnotation]: """Retrieve annotations with optional filtering""" annotations = self.annotations_db.get("annotations", []) # Filter by symbol if symbol: annotations = [a for a in annotations if a.get('symbol') == symbol] # Filter by timeframe if timeframe: annotations = [a for a in annotations if a.get('timeframe') == timeframe] # Convert to TradeAnnotation objects result = [] for ann_dict in annotations: try: annotation = TradeAnnotation(**ann_dict) result.append(annotation) except Exception as e: logger.error(f"Error converting annotation: {e}") return result def delete_annotation(self, annotation_id: str) -> bool: """ Delete annotation and its associated test case file Args: annotation_id: ID of annotation to delete Returns: bool: True if annotation was deleted, False if not found Raises: Exception: If there's an error during deletion """ original_count = len(self.annotations_db["annotations"]) self.annotations_db["annotations"] = [ a for a in self.annotations_db["annotations"] if a.get('annotation_id') != annotation_id ] if len(self.annotations_db["annotations"]) < original_count: # Annotation was found and removed self._save_annotations() # Also delete the associated test case file test_case_file = self.test_cases_dir / f"annotation_{annotation_id}.json" if test_case_file.exists(): try: test_case_file.unlink() logger.info(f"Deleted test case file: {test_case_file}") except Exception as e: logger.error(f"Error deleting test case file {test_case_file}: {e}") # Don't fail the whole operation if test case deletion fails logger.info(f"Deleted annotation: {annotation_id}") return True else: logger.warning(f"Annotation not found: {annotation_id}") return False def clear_all_annotations(self, symbol: str = None): """ Clear all annotations (optionally filtered by symbol) More efficient than deleting one by one Args: symbol: Optional symbol filter. If None, clears all annotations. Returns: int: Number of annotations deleted """ # Get annotations to delete if symbol: annotations_to_delete = [ a for a in self.annotations_db["annotations"] if a.get('symbol') == symbol ] # Keep annotations for other symbols self.annotations_db["annotations"] = [ a for a in self.annotations_db["annotations"] if a.get('symbol') != symbol ] else: annotations_to_delete = self.annotations_db["annotations"].copy() self.annotations_db["annotations"] = [] deleted_count = len(annotations_to_delete) if deleted_count > 0: # Save updated annotations database self._save_annotations() # Delete associated test case files for annotation in annotations_to_delete: annotation_id = annotation.get('annotation_id') test_case_file = self.test_cases_dir / f"annotation_{annotation_id}.json" if test_case_file.exists(): try: test_case_file.unlink() logger.debug(f"Deleted test case file: {test_case_file}") except Exception as e: logger.error(f"Error deleting test case file {test_case_file}: {e}") logger.info(f"Cleared {deleted_count} annotations" + (f" for symbol {symbol}" if symbol else "")) return deleted_count def generate_test_case(self, annotation: TradeAnnotation, data_provider=None, auto_save: bool = True) -> Dict: """ Generate lightweight test case metadata (no OHLCV data stored) OHLCV data will be fetched dynamically from cache/database during training Args: annotation: TradeAnnotation object data_provider: Optional DataProvider instance (not used for storage) Returns: Test case metadata dictionary """ test_case = { "test_case_id": f"annotation_{annotation.annotation_id}", "symbol": annotation.symbol, "timestamp": annotation.entry['timestamp'], "action": "BUY" if annotation.direction == "LONG" else "SELL", "expected_outcome": { "direction": annotation.direction, "profit_loss_pct": annotation.profit_loss_pct, "holding_period_seconds": self._calculate_holding_period(annotation), "exit_price": annotation.exit['price'], "entry_price": annotation.entry['price'] }, "annotation_metadata": { "annotator": "manual", "confidence": 1.0, "notes": annotation.notes, "created_at": annotation.created_at, "timeframe": annotation.timeframe }, "training_config": { "context_window_minutes": 5, # ±5 minutes around entry/exit "timeframes": ["1s", "1m", "1h", "1d"], "data_source": "cache" # Will fetch from cache/database } } # Save lightweight test case metadata to file if auto_save is True if auto_save: test_case_file = self.test_cases_dir / f"{test_case['test_case_id']}.json" with open(test_case_file, 'w') as f: json.dump(test_case, f, indent=2) logger.info(f"Saved test case metadata to: {test_case_file}") logger.info(f"Generated lightweight test case: {test_case['test_case_id']} (OHLCV data will be fetched dynamically)") return test_case def get_all_test_cases(self, symbol: Optional[str] = None) -> List[Dict]: """ Load all test cases from disk Args: symbol: Optional symbol filter (e.g., 'ETH/USDT'). If provided, only returns test cases for that symbol. Critical for avoiding cross-symbol training. Returns: List of test case dictionaries """ test_cases = [] if not self.test_cases_dir.exists(): return test_cases for test_case_file in self.test_cases_dir.glob("annotation_*.json"): try: with open(test_case_file, 'r') as f: test_case = json.load(f) # CRITICAL: Filter by symbol to avoid training on wrong symbol if symbol: test_case_symbol = test_case.get('symbol', '') if test_case_symbol != symbol: logger.debug(f"Skipping {test_case_file.name}: symbol {test_case_symbol} != {symbol}") continue test_cases.append(test_case) except Exception as e: logger.error(f"Error loading test case {test_case_file}: {e}") if symbol: logger.info(f"Loaded {len(test_cases)} test cases for symbol {symbol}") else: logger.info(f"Loaded {len(test_cases)} test cases (all symbols)") return test_cases def _calculate_holding_period(self, annotation: TradeAnnotation) -> float: """Calculate holding period in seconds""" try: entry_time = datetime.fromisoformat(annotation.entry['timestamp'].replace('Z', '+00:00')) exit_time = datetime.fromisoformat(annotation.exit['timestamp'].replace('Z', '+00:00')) return (exit_time - entry_time).total_seconds() except Exception as e: logger.error(f"Error calculating holding period: {e}") return 0.0 def _generate_training_labels(self, market_state: Dict, entry_time: datetime, exit_time: datetime, direction: str) -> Dict: """ Generate training labels for each timestamp in the market data. This helps the model learn WHERE to signal and WHERE NOT to signal. Labels: - 0 = NO SIGNAL (before entry or after exit) - 1 = ENTRY SIGNAL (at entry time) - 2 = HOLD (between entry and exit) - 3 = EXIT SIGNAL (at exit time) """ labels = {} # Use 1m timeframe as reference for labeling if 'ohlcv_1m' in market_state and 'timestamps' in market_state['ohlcv_1m']: timestamps = market_state['ohlcv_1m']['timestamps'] label_list = [] for ts_str in timestamps: try: ts = datetime.strptime(ts_str, '%Y-%m-%d %H:%M:%S') # Make timezone-aware to match entry_time if ts.tzinfo is None: ts = pytz.UTC.localize(ts) # Determine label based on position relative to entry/exit if abs((ts - entry_time).total_seconds()) < 60: # Within 1 minute of entry label = 1 # ENTRY SIGNAL elif abs((ts - exit_time).total_seconds()) < 60: # Within 1 minute of exit label = 3 # EXIT SIGNAL elif entry_time < ts < exit_time: # Between entry and exit label = 2 # HOLD else: # Before entry or after exit label = 0 # NO SIGNAL label_list.append(label) except Exception as e: logger.error(f"Error parsing timestamp {ts_str}: {e}") label_list.append(0) labels['labels_1m'] = label_list labels['direction'] = direction labels['entry_timestamp'] = entry_time.strftime('%Y-%m-%d %H:%M:%S') labels['exit_timestamp'] = exit_time.strftime('%Y-%m-%d %H:%M:%S') logger.info(f"Generated {len(label_list)} training labels: " f"{label_list.count(0)} NO_SIGNAL, " f"{label_list.count(1)} ENTRY, " f"{label_list.count(2)} HOLD, " f"{label_list.count(3)} EXIT") return labels def export_annotations(self, annotations: List[TradeAnnotation] = None, format_type: str = 'json') -> Path: """Export annotations to file""" if annotations is None: annotations = self.get_annotations() # Convert to dicts export_data = [asdict(ann) for ann in annotations] # Create export file timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') export_file = self.storage_path / f"export_{timestamp}.{format_type}" if format_type == 'json': with open(export_file, 'w') as f: json.dump(export_data, f, indent=2) elif format_type == 'csv': import csv with open(export_file, 'w', newline='') as f: if export_data: writer = csv.DictWriter(f, fieldnames=export_data[0].keys()) writer.writeheader() writer.writerows(export_data) logger.info(f"Exported {len(annotations)} annotations to {export_file}") return export_file