wip

2025-12-10 00:45:41 +02:00
parent c21d8cbea1
commit fadfa8c741
5 changed files with 256 additions and 117 deletions
--- a/ANNOTATE/core/real_training_adapter.py
+++ b/ANNOTATE/core/real_training_adapter.py
@@ -163,8 +163,12 @@ class RealTrainingAdapter:
        # CRITICAL: Training lock to prevent concurrent model access
        # Multiple threads (batch training + per-candle training) can corrupt
        # the computation graph if they access the model simultaneously
+        # Use RLock (reentrant lock) to allow same thread to acquire multiple times
        import threading
-        self._training_lock = threading.Lock()
+        self._training_lock = threading.RLock()
+        
+        # Track which thread currently holds the training lock (for debugging)
+        self._training_lock_holder = None
        
        # Use orchestrator's inference training coordinator (if available)
        # This reduces duplication and centralizes coordination logic
@@ -4142,7 +4146,16 @@ class RealTrainingAdapter:
            # CRITICAL: Acquire training lock to prevent concurrent model access
            # This prevents "inplace operation" errors when batch training runs simultaneously
            import torch
-            with self._training_lock:
+            import threading
+            
+            # Try to acquire lock with timeout to prevent deadlock
+            lock_acquired = self._training_lock.acquire(timeout=5.0)
+            if not lock_acquired:
+                logger.warning("Could not acquire training lock within 5 seconds - skipping this training step")
+                return
+            
+            try:
+                self._training_lock_holder = threading.current_thread().name
                with torch.enable_grad():
                    trainer.model.train()
                    result = trainer.train_step(batch, accumulate_gradients=False)
@@ -4193,6 +4206,10 @@ class RealTrainingAdapter:
                            improved=improved
                        )
                        self.realtime_training_metrics['last_checkpoint_step'] = self.realtime_training_metrics['total_steps']
+            finally:
+                # CRITICAL: Always release the lock, even if an exception occurs
+                self._training_lock_holder = None
+                self._training_lock.release()
        
        except Exception as e:
            logger.warning(f"Error training transformer on sample: {e}")
--- a/ANNOTATE/web/app.py
+++ b/ANNOTATE/web/app.py
@@ -2626,6 +2626,95 @@ class AnnotationDashboard:
                    'error': str(e)
                })
        
+        @self.server.route('/api/live-updates-batch', methods=['POST'])
+        def get_live_updates_batch():
+            """Get live chart and prediction updates for multiple timeframes (optimized batch endpoint)"""
+            try:
+                data = request.get_json() or {}
+                symbol = data.get('symbol', 'ETH/USDT')
+                timeframes = data.get('timeframes', ['1m'])
+                
+                response = {
+                    'success': True,
+                    'chart_updates': {},  # Dict of timeframe -> chart_update
+                    'prediction': None     # Single prediction for all timeframes
+                }
+                
+                # Get latest candle for each requested timeframe
+                if self.data_loader:
+                    for timeframe in timeframes:
+                        try:
+                            df = self.data_loader.get_data(symbol, timeframe, limit=2, direction='latest')
+                            if df is not None and not df.empty:
+                                latest_candle = df.iloc[-1]
+                                
+                                # Format timestamp as ISO string
+                                timestamp = latest_candle.name
+                                if hasattr(timestamp, 'isoformat'):
+                                    if timestamp.tzinfo is not None:
+                                        timestamp_str = timestamp.astimezone(timezone.utc).isoformat()
+                                    else:
+                                        timestamp_str = timestamp.isoformat() + 'Z'
+                                else:
+                                    timestamp_str = str(timestamp)
+                                
+                                is_confirmed = len(df) >= 2
+                                
+                                response['chart_updates'][timeframe] = {
+                                    'symbol': symbol,
+                                    'timeframe': timeframe,
+                                    'candle': {
+                                        'timestamp': timestamp_str,
+                                        'open': float(latest_candle['open']),
+                                        'high': float(latest_candle['high']),
+                                        'low': float(latest_candle['low']),
+                                        'close': float(latest_candle['close']),
+                                        'volume': float(latest_candle['volume'])
+                                    },
+                                    'is_confirmed': is_confirmed
+                                }
+                        except Exception as e:
+                            logger.debug(f"Error getting candle for {timeframe}: {e}")
+                
+                # Get latest model predictions (same for all timeframes)
+                if self.orchestrator:
+                    try:
+                        predictions = {}
+                        
+                        # DQN predictions
+                        if hasattr(self.orchestrator, 'recent_dqn_predictions') and symbol in self.orchestrator.recent_dqn_predictions:
+                            dqn_preds = list(self.orchestrator.recent_dqn_predictions[symbol])
+                            if dqn_preds:
+                                predictions['dqn'] = dqn_preds[-1]
+                        
+                        # CNN predictions
+                        if hasattr(self.orchestrator, 'recent_cnn_predictions') and symbol in self.orchestrator.recent_cnn_predictions:
+                            cnn_preds = list(self.orchestrator.recent_cnn_predictions[symbol])
+                            if cnn_preds:
+                                predictions['cnn'] = cnn_preds[-1]
+                        
+                        # Transformer predictions
+                        if hasattr(self.orchestrator, 'recent_transformer_predictions') and symbol in self.orchestrator.recent_transformer_predictions:
+                            transformer_preds = list(self.orchestrator.recent_transformer_predictions[symbol])
+                            if transformer_preds:
+                                transformer_pred = transformer_preds[-1].copy()
+                                predictions['transformer'] = self._serialize_prediction(transformer_pred)
+                        
+                        if predictions:
+                            response['prediction'] = predictions
+                            
+                    except Exception as e:
+                        logger.debug(f"Error getting predictions: {e}")
+                
+                return jsonify(response)
+                
+            except Exception as e:
+                logger.error(f"Error in batch live updates: {e}")
+                return jsonify({
+                    'success': False,
+                    'error': str(e)
+                })
+        
        @self.server.route('/api/realtime-inference/signals', methods=['GET'])
        def get_realtime_signals():
            """Get latest real-time inference signals"""
--- a/ANNOTATE/web/static/js/chart_manager.js
+++ b/ANNOTATE/web/static/js/chart_manager.js
@@ -3059,30 +3059,37 @@ class ChartManager {
        
        let targetPrice = currentPrice;
        
-        // CRITICAL FIX: Check if price_delta is normalized (< 1.0) or real price change
-        if (trendVector.price_delta !== undefined && trendVector.price_delta !== null) {
-            const priceDelta = parseFloat(trendVector.price_delta);
+        // CRITICAL FIX: Use calculated_direction and calculated_steepness from trend_vector
+        // The price_delta in trend_vector is the pivot range, not the predicted change
+        // We should use direction and steepness to estimate the trend
+        const direction = parseFloat(trendVector.calculated_direction) || 0; // -1, 0, or 1
+        const steepness = parseFloat(trendVector.calculated_steepness) || 0;
        
-            // If price_delta is very small (< 1.0), it's likely normalized - scale it
-            if (Math.abs(priceDelta) < 1.0) {
-                // Normalized value - treat as percentage of current price
-                targetPrice = currentPrice * (1 + priceDelta);
+        // Steepness is in price units, but we need to scale it reasonably
+        // If steepness is > 100, it's likely in absolute price units (too large)
+        // Scale it down to a reasonable percentage move
+        let priceChange = 0;
+        
+        if (steepness > 0) {
+            // If steepness is large (> 10), treat it as absolute price change but cap it
+            if (steepness > 10) {
+                // Cap at 2% of current price
+                const maxChange = 0.02 * currentPrice;
+                priceChange = Math.min(steepness, maxChange) * direction;
            } else {
-                // Real price delta - add directly
-                targetPrice = currentPrice + priceDelta;
+                // Small steepness - use as percentage
+                priceChange = (steepness / 100) * currentPrice * direction;
            }
        } else {
-            // Fallback: Use direction and steepness
-            const direction = trendVector.direction === 'up' ? 1 : 
-                            (trendVector.direction === 'down' ? -1 : 0);
-            const steepness = parseFloat(trendVector.steepness) || 0; // 0 to 1
-            
-            // Estimate price change based on steepness (max 1% move per projection period)
-            const maxChange = 0.01 * currentPrice;
-            const projectedChange = maxChange * steepness * direction;
-            targetPrice = currentPrice + projectedChange;
+            // Fallback: Use angle if available
+            const angle = parseFloat(trendVector.calculated_angle) || 0;
+            // Angle is in radians, convert to price change
+            // Small angle = small change, large angle = large change
+            priceChange = Math.tan(angle) * currentPrice * 0.01; // Scale down
        }
        
+        targetPrice = currentPrice + priceChange;
+        
        // Sanity check: Don't let target price go to 0 or negative
        if (targetPrice <= 0 || !isFinite(targetPrice)) {
            console.warn('Invalid target price calculated:', targetPrice, 'using current price instead');
--- a/ANNOTATE/web/static/js/live_updates_polling.js
+++ b/ANNOTATE/web/static/js/live_updates_polling.js
@@ -57,28 +57,42 @@ class LiveUpdatesPolling {
    }
    
    _poll() {
-        // Poll each subscription
+        // OPTIMIZATION: Batch all subscriptions into a single API call
+        // Group by symbol to reduce API calls from 4 to 1
+        const symbolGroups = {};
        this.subscriptions.forEach(sub => {
-            fetch('/api/live-updates', {
+            if (!symbolGroups[sub.symbol]) {
+                symbolGroups[sub.symbol] = [];
+            }
+            symbolGroups[sub.symbol].push(sub.timeframe);
+        });
+        
+        // Make one call per symbol with all timeframes
+        Object.entries(symbolGroups).forEach(([symbol, timeframes]) => {
+            fetch('/api/live-updates-batch', {
                method: 'POST',
                headers: { 'Content-Type': 'application/json' },
                body: JSON.stringify({
-                    symbol: sub.symbol,
-                    timeframe: sub.timeframe
+                    symbol: symbol,
+                    timeframes: timeframes
                })
            })
            .then(response => response.json())
            .then(data => {
                if (data.success) {
-                    // Handle chart update (even if null, predictions should still be processed)
-                    if (data.chart_update && this.onChartUpdate) {
-                        this.onChartUpdate(data.chart_update);
+                    // Handle chart updates for each timeframe
+                    if (data.chart_updates && this.onChartUpdate) {
+                        // chart_updates is an object: { '1s': {...}, '1m': {...}, ... }
+                        Object.entries(data.chart_updates).forEach(([timeframe, update]) => {
+                            if (update) {
+                                this.onChartUpdate(update);
+                            }
+                        });
                    }
                    
-                    // CRITICAL FIX: Handle prediction update properly
-                    // data.prediction is already in format { transformer: {...}, dqn: {...}, cnn: {...} }
+                    // Handle prediction update (single prediction for all timeframes)
+                    // data.prediction is in format { transformer: {...}, dqn: {...}, cnn: {...} }
                    if (data.prediction && this.onPredictionUpdate) {
-                        // Log prediction data for debugging
                        console.log('[Live Updates] Received prediction data:', {
                            has_transformer: !!data.prediction.transformer,
                            has_dqn: !!data.prediction.dqn,
@@ -88,10 +102,7 @@ class LiveUpdatesPolling {
                            has_predicted_candle: !!data.prediction.transformer?.predicted_candle
                        });
                        
-                        // Pass the prediction object directly (it's already in the correct format)
                        this.onPredictionUpdate(data.prediction);
-                    } else if (!data.prediction) {
-                        console.debug('[Live Updates] No prediction data in response');
                    }
                } else {
                    console.debug('[Live Updates] Response not successful:', data);
--- a/core/duckdb_storage.py
+++ b/core/duckdb_storage.py
@@ -38,6 +38,10 @@ class DuckDBStorage:
        # Connect to DuckDB
        self.conn = duckdb.connect(str(self.db_path))
        
+        # CRITICAL: DuckDB connections are NOT thread-safe
+        # All database operations must be serialized with this lock
+        self._conn_lock = threading.RLock()  # Use RLock to allow reentrant calls from same thread
+        
        # Batch logging for compact output
        self._batch_buffer = []  # List of (symbol, timeframe, count, total) tuples
        self._batch_lock = threading.Lock()
@@ -54,9 +58,10 @@ class DuckDBStorage:
    
    def _init_schema(self):
        """Initialize database schema - all data in DuckDB tables"""
-        
-        # Create OHLCV data table - stores ALL candles
-        self.conn.execute("""
+        # CRITICAL: Schema initialization must be serialized
+        with self._conn_lock:
+            # Create OHLCV data table - stores ALL candles
+            self.conn.execute("""
            CREATE SEQUENCE IF NOT EXISTS ohlcv_id_seq START 1
        """)
        self.conn.execute("""
@@ -207,34 +212,36 @@ class DuckDBStorage:
            # Insert data directly into DuckDB (ignore duplicates)
            # Note: id column is auto-generated, so we don't include it
            # Using INSERT OR IGNORE for better DuckDB compatibility
-            self.conn.execute("""
-                INSERT OR IGNORE INTO ohlcv_data (symbol, timeframe, timestamp, open, high, low, close, volume, created_at)
-                SELECT symbol, timeframe, timestamp, open, high, low, close, volume, created_at 
-                FROM df_insert
-            """)
+            # CRITICAL: All DuckDB operations must be serialized with lock
+            with self._conn_lock:
+                self.conn.execute("""
+                    INSERT OR IGNORE INTO ohlcv_data (symbol, timeframe, timestamp, open, high, low, close, volume, created_at)
+                    SELECT symbol, timeframe, timestamp, open, high, low, close, volume, created_at 
+                    FROM df_insert
+                """)
                
-            # Update metadata
-            result = self.conn.execute("""
-                SELECT 
-                    MIN(timestamp) as first_ts,
-                    MAX(timestamp) as last_ts,
-                    COUNT(*) as count
-                FROM ohlcv_data
-                WHERE symbol = ? AND timeframe = ?
-            """, (symbol, timeframe)).fetchone()
+                # Update metadata
+                result = self.conn.execute("""
+                    SELECT 
+                        MIN(timestamp) as first_ts,
+                        MAX(timestamp) as last_ts,
+                        COUNT(*) as count
+                    FROM ohlcv_data
+                    WHERE symbol = ? AND timeframe = ?
+                """, (symbol, timeframe)).fetchone()
                
-            # Handle case where no data exists yet
-            if result is None or result[0] is None:
-                first_ts, last_ts, count = 0, 0, 0
-            else:
-                first_ts, last_ts, count = result
-            now_ts = int(datetime.now().timestamp() * 1000)
+                # Handle case where no data exists yet
+                if result is None or result[0] is None:
+                    first_ts, last_ts, count = 0, 0, 0
+                else:
+                    first_ts, last_ts, count = result
+                now_ts = int(datetime.now().timestamp() * 1000)
                
-            self.conn.execute("""
-                INSERT OR REPLACE INTO cache_metadata 
-                (symbol, timeframe, parquet_path, first_timestamp, last_timestamp, candle_count, last_update)
-                VALUES (?, ?, ?, ?, ?, ?, ?)
-            """, (symbol, timeframe, '', first_ts, last_ts, count, now_ts))
+                self.conn.execute("""
+                    INSERT OR REPLACE INTO cache_metadata 
+                    (symbol, timeframe, parquet_path, first_timestamp, last_timestamp, candle_count, last_update)
+                    VALUES (?, ?, ?, ?, ?, ?, ?)
+                """, (symbol, timeframe, '', first_ts, last_ts, count, now_ts))
            
            # Add to batch buffer instead of logging immediately
            with self._batch_lock:
@@ -303,8 +310,9 @@ class DuckDBStorage:
            if limit:
                query += f" LIMIT {limit}"
            
-            # Execute query
-            df = self.conn.execute(query, params).df()
+            # Execute query with thread-safe lock
+            with self._conn_lock:
+                df = self.conn.execute(query, params).df()
            
            if df.empty:
                return None
@@ -341,7 +349,8 @@ class DuckDBStorage:
                WHERE symbol = ? AND timeframe = ?
            """
            
-            result = self.conn.execute(query, [symbol, timeframe]).fetchone()
+            with self._conn_lock:
+                result = self.conn.execute(query, [symbol, timeframe]).fetchone()
            
            if result and result[0] is not None:
                last_timestamp = pd.to_datetime(result[0], unit='ms', utc=True)
@@ -385,7 +394,8 @@ class DuckDBStorage:
                limit
            ]
            
-            df = self.conn.execute(query, params).df()
+            with self._conn_lock:
+                df = self.conn.execute(query, params).df()
            
            if df.empty:
                return None
@@ -449,14 +459,15 @@ class DuckDBStorage:
                df_copy.to_parquet(parquet_file, index=False, compression='snappy')
            
            # Store annotation metadata in DuckDB
-            self.conn.execute("""
-                INSERT OR REPLACE INTO annotations
-                (annotation_id, symbol, timeframe, direction,
-                 entry_timestamp, entry_price, exit_timestamp, exit_price,
-                 profit_loss_pct, notes, created_at, market_context, 
-                 model_features, pivot_data, parquet_path)
-                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-            """, (
+            with self._conn_lock:
+                self.conn.execute("""
+                    INSERT OR REPLACE INTO annotations
+                    (annotation_id, symbol, timeframe, direction,
+                     entry_timestamp, entry_price, exit_timestamp, exit_price,
+                     profit_loss_pct, notes, created_at, market_context, 
+                     model_features, pivot_data, parquet_path)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """, (
                annotation_id,
                annotation_data.get('symbol'),
                annotation_data.get('timeframe'),
@@ -495,15 +506,16 @@ class DuckDBStorage:
        """
        try:
            # Get annotation metadata
-            result = self.conn.execute("""
-                SELECT * FROM annotations WHERE annotation_id = ?
-            """, (annotation_id,)).fetchone()
+            with self._conn_lock:
+                result = self.conn.execute("""
+                    SELECT * FROM annotations WHERE annotation_id = ?
+                """, (annotation_id,)).fetchone()
                
-            if not result:
-                return None
+                if not result:
+                    return None
                
-            # Parse annotation data
-            columns = [desc[0] for desc in self.conn.description]
+                # Parse annotation data
+                columns = [desc[0] for desc in self.conn.description]
            annotation = dict(zip(columns, result))
            
            # Parse JSON fields
@@ -520,11 +532,12 @@ class DuckDBStorage:
                    timeframe = parquet_file.stem
                    
                    # Query parquet directly with DuckDB
-                    df = self.conn.execute(f"""
-                        SELECT timestamp, open, high, low, close, volume
-                        FROM read_parquet('{parquet_file}')
-                        ORDER BY timestamp
-                    """).df()
+                    with self._conn_lock:
+                        df = self.conn.execute(f"""
+                            SELECT timestamp, open, high, low, close, volume
+                            FROM read_parquet('{parquet_file}')
+                            ORDER BY timestamp
+                        """).df()
                    
                    if not df.empty:
                        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms', utc=True)
@@ -550,12 +563,13 @@ class DuckDBStorage:
            DataFrame with results
        """
        try:
-            if params:
-                result = self.conn.execute(query, params)
-            else:
-                result = self.conn.execute(query)
+            with self._conn_lock:
+                if params:
+                    result = self.conn.execute(query, params)
+                else:
+                    result = self.conn.execute(query)
                
-            return result.df()
+                return result.df()
            
        except Exception as e:
            logger.error(f"Error executing query: {e}")
@@ -564,26 +578,27 @@ class DuckDBStorage:
    def get_cache_stats(self) -> Dict[str, Any]:
        """Get cache statistics"""
        try:
-            # Get OHLCV stats
-            ohlcv_stats = self.conn.execute("""
-                SELECT symbol, timeframe, candle_count, first_timestamp, last_timestamp
-                FROM cache_metadata
-                ORDER BY symbol, timeframe
-            """).df()
+            with self._conn_lock:
+                # Get OHLCV stats
+                ohlcv_stats = self.conn.execute("""
+                    SELECT symbol, timeframe, candle_count, first_timestamp, last_timestamp
+                    FROM cache_metadata
+                    ORDER BY symbol, timeframe
+                """).df()
                
-            if not ohlcv_stats.empty:
-                ohlcv_stats['first_timestamp'] = pd.to_datetime(ohlcv_stats['first_timestamp'], unit='ms')
-                ohlcv_stats['last_timestamp'] = pd.to_datetime(ohlcv_stats['last_timestamp'], unit='ms')
+                if not ohlcv_stats.empty:
+                    ohlcv_stats['first_timestamp'] = pd.to_datetime(ohlcv_stats['first_timestamp'], unit='ms')
+                    ohlcv_stats['last_timestamp'] = pd.to_datetime(ohlcv_stats['last_timestamp'], unit='ms')
                
-            # Get annotation count
-            annotation_count = self.conn.execute("""
-                SELECT COUNT(*) as count FROM annotations
-            """).fetchone()[0]
+                # Get annotation count
+                annotation_count = self.conn.execute("""
+                    SELECT COUNT(*) as count FROM annotations
+                """).fetchone()[0]
                
-            # Get total candles
-            total_candles = self.conn.execute("""
-                SELECT SUM(candle_count) as total FROM cache_metadata
-            """).fetchone()[0] or 0
+                # Get total candles
+                total_candles = self.conn.execute("""
+                    SELECT SUM(candle_count) as total FROM cache_metadata
+                """).fetchone()[0] or 0
            
            return {
                'ohlcv_stats': ohlcv_stats.to_dict('records') if not ohlcv_stats.empty else [],