infinite lowad WIP

2025-10-24 23:04:29 +03:00
parent 07b82f0a1f
commit 2233a88d3e
5 changed files with 522 additions and 73 deletions
--- a/ANNOTATE/core/data_loader.py
+++ b/ANNOTATE/core/data_loader.py
@@ -79,11 +79,14 @@ class HistoricalDataLoader:
                    if len(cached_df) >= min(limit, 100):  # Use cached if we have at least 100 candles
                        logger.debug(f"Using DataProvider cached data for {symbol} {timeframe} ({len(cached_df)} candles)")
                        
-                        # Filter by time range if specified
-                        if start_time or end_time:
-                            filtered_df = self._filter_by_time_range(cached_df.copy(), start_time, end_time)
-                        else:
-                            filtered_df = cached_df.tail(limit).copy()
+                        # Filter by time range with direction support
+                        filtered_df = self._filter_by_time_range(
+                            cached_df.copy(), 
+                            start_time, 
+                            end_time, 
+                            direction, 
+                            limit
+                        )
                        
                        # Cache in memory
                        self.memory_cache[cache_key] = (filtered_df, datetime.now())
@@ -140,13 +143,14 @@ class HistoricalDataLoader:
                        df = self.data_provider.cached_data[symbol][timeframe]
                        
                        if df is not None and not df.empty:
-                            # Filter by time range if specified
-                            if start_time or end_time:
-                                df = self._filter_by_time_range(df, start_time, end_time)
-                            
-                            # Limit number of candles
-                            if len(df) > limit:
-                                df = df.tail(limit)
+                            # Filter by time range with direction support
+                            df = self._filter_by_time_range(
+                                df.copy(), 
+                                start_time, 
+                                end_time, 
+                                direction, 
+                                limit
+                            )
                            
                            # Cache in memory
                            self.memory_cache[cache_key] = (df.copy(), datetime.now())
@@ -182,10 +186,37 @@ class HistoricalDataLoader:
                        self.memory_cache[cache_key] = (df.copy(), datetime.now())
                        return df
                    else:
-                        logger.info(f"No data in DuckDB, fetching from API for {symbol} {timeframe}")
+                        logger.info(f"📡 No data in DuckDB, fetching from exchange API for {symbol} {timeframe}")
                        
-                # Fetch from API and store in DuckDB
-                logger.info(f"Fetching data from API for {symbol} {timeframe}")
+                        # Fetch from exchange API with time range
+                        df = self._fetch_from_exchange_api(
+                            symbol=symbol,
+                            timeframe=timeframe,
+                            start_time=start_time,
+                            end_time=end_time,
+                            limit=limit,
+                            direction=direction
+                        )
+                        
+                        if df is not None and not df.empty:
+                            # Store in DuckDB for future use
+                            if self.data_provider.duckdb_storage:
+                                stored_count = self.data_provider.duckdb_storage.store_ohlcv_data(
+                                    symbol=symbol,
+                                    timeframe=timeframe,
+                                    df=df
+                                )
+                                logger.info(f"💾 Stored {stored_count} new candles in DuckDB")
+                            
+                            # Cache in memory
+                            self.memory_cache[cache_key] = (df.copy(), datetime.now())
+                            return df
+                        else:
+                            logger.warning(f"No data available from exchange API for {symbol} {timeframe}")
+                            return None
+                
+                # Fetch from API and store in DuckDB (no time range specified)
+                logger.info(f"Fetching latest data from API for {symbol} {timeframe}")
                df = self.data_provider.get_historical_data(
                    symbol=symbol,
                    timeframe=timeframe,
@@ -194,9 +225,14 @@ class HistoricalDataLoader:
                )
            
            if df is not None and not df.empty:
-                # Filter by time range if specified
-                if start_time or end_time:
-                    df = self._filter_by_time_range(df, start_time, end_time)
+                # Filter by time range with direction support
+                df = self._filter_by_time_range(
+                    df.copy(), 
+                    start_time, 
+                    end_time, 
+                    direction, 
+                    limit
+                )
                
                # Cache in memory
                self.memory_cache[cache_key] = (df.copy(), datetime.now())
@@ -211,14 +247,156 @@ class HistoricalDataLoader:
            logger.error(f"Error loading data for {symbol} {timeframe}: {e}")
            return None
    
+    def _fetch_from_exchange_api(self, symbol: str, timeframe: str,
+                                 start_time: Optional[datetime] = None,
+                                 end_time: Optional[datetime] = None,
+                                 limit: int = 1000,
+                                 direction: str = 'latest') -> Optional[pd.DataFrame]:
+        """
+        Fetch historical data from exchange API (Binance/MEXC) with time range support
+        
+        Args:
+            symbol: Trading pair
+            timeframe: Timeframe
+            start_time: Start time for data range
+            end_time: End time for data range
+            limit: Maximum number of candles
+            direction: 'latest', 'before', or 'after'
+            
+        Returns:
+            DataFrame with OHLCV data or None
+        """
+        try:
+            import requests
+            from core.api_rate_limiter import get_rate_limiter
+            
+            # Convert symbol format for Binance
+            binance_symbol = symbol.replace('/', '').upper()
+            
+            # Convert timeframe
+            timeframe_map = {
+                '1s': '1s', '1m': '1m', '5m': '5m', '15m': '15m', '30m': '30m',
+                '1h': '1h', '4h': '4h', '1d': '1d'
+            }
+            binance_timeframe = timeframe_map.get(timeframe, '1m')
+            
+            # Build API parameters
+            params = {
+                'symbol': binance_symbol,
+                'interval': binance_timeframe,
+                'limit': min(limit, 1000)  # Binance max is 1000
+            }
+            
+            # Add time range parameters if specified
+            if direction == 'before' and end_time:
+                # Get data ending at end_time
+                params['endTime'] = int(end_time.timestamp() * 1000)
+            elif direction == 'after' and start_time:
+                # Get data starting at start_time
+                params['startTime'] = int(start_time.timestamp() * 1000)
+            elif start_time:
+                params['startTime'] = int(start_time.timestamp() * 1000)
+            if end_time and direction != 'before':
+                params['endTime'] = int(end_time.timestamp() * 1000)
+            
+            # Use rate limiter
+            rate_limiter = get_rate_limiter()
+            url = "https://api.binance.com/api/v3/klines"
+            
+            logger.info(f"Fetching from Binance: {symbol} {timeframe} (direction={direction}, limit={limit})")
+            
+            response = rate_limiter.make_request('binance_api', url, 'GET', params=params)
+            
+            if response is None or response.status_code != 200:
+                logger.warning(f"Binance API failed, trying MEXC...")
+                # Try MEXC as fallback
+                return self._fetch_from_mexc_with_time_range(
+                    symbol, timeframe, start_time, end_time, limit, direction
+                )
+            
+            data = response.json()
+            
+            if not data:
+                logger.warning(f"No data returned from Binance for {symbol} {timeframe}")
+                return None
+            
+            # Convert to DataFrame
+            df = pd.DataFrame(data, columns=[
+                'timestamp', 'open', 'high', 'low', 'close', 'volume',
+                'close_time', 'quote_volume', 'trades', 'taker_buy_base',
+                'taker_buy_quote', 'ignore'
+            ])
+            
+            # Process columns
+            df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms', utc=True)
+            for col in ['open', 'high', 'low', 'close', 'volume']:
+                df[col] = df[col].astype(float)
+            
+            # Keep only OHLCV columns
+            df = df[['timestamp', 'open', 'high', 'low', 'close', 'volume']]
+            df = df.set_index('timestamp')
+            df = df.sort_index()
+            
+            logger.info(f"✅ Fetched {len(df)} candles from Binance for {symbol} {timeframe}")
+            return df
+            
+        except Exception as e:
+            logger.error(f"Error fetching from exchange API: {e}")
+            return None
+    
+    def _fetch_from_mexc_with_time_range(self, symbol: str, timeframe: str,
+                                         start_time: Optional[datetime] = None,
+                                         end_time: Optional[datetime] = None,
+                                         limit: int = 1000,
+                                         direction: str = 'latest') -> Optional[pd.DataFrame]:
+        """Fetch from MEXC with time range support (fallback)"""
+        try:
+            # MEXC implementation would go here
+            # For now, just return None to indicate unavailable
+            logger.warning("MEXC time range fetch not implemented yet")
+            return None
+        except Exception as e:
+            logger.error(f"Error fetching from MEXC: {e}")
+            return None
+    
    def _filter_by_time_range(self, df: pd.DataFrame, 
                             start_time: Optional[datetime],
-                             end_time: Optional[datetime]) -> pd.DataFrame:
-        """Filter DataFrame by time range"""
-        if start_time:
-            df = df[df.index >= start_time]
-        if end_time:
-            df = df[df.index <= end_time]
+                             end_time: Optional[datetime],
+                             direction: str = 'latest',
+                             limit: int = 500) -> pd.DataFrame:
+        """
+        Filter DataFrame by time range with direction support
+        
+        Args:
+            df: DataFrame to filter
+            start_time: Start time filter
+            end_time: End time filter
+            direction: 'latest', 'before', or 'after'
+            limit: Maximum number of candles
+            
+        Returns:
+            Filtered DataFrame
+        """
+        if direction == 'before' and end_time:
+            # Get candles BEFORE end_time
+            df = df[df.index < end_time]
+            # Return the most recent N candles before end_time
+            df = df.tail(limit)
+        elif direction == 'after' and start_time:
+            # Get candles AFTER start_time
+            df = df[df.index > start_time]
+            # Return the oldest N candles after start_time
+            df = df.head(limit)
+        else:
+            # Default: filter by range
+            if start_time:
+                df = df[df.index >= start_time]
+            if end_time:
+                df = df[df.index <= end_time]
+            # Return most recent candles
+            if len(df) > limit:
+                df = df.tail(limit)
+        
        return df
    
    def get_multi_timeframe_data(self, symbol: str, 
--- a/ANNOTATE/web/static/js/chart_manager.js
+++ b/ANNOTATE/web/static/js/chart_manager.js
@@ -1153,23 +1153,28 @@ class ChartManager {
        this.showLoadingIndicator(timeframe, direction);

        try {
-            // Calculate time range to fetch
-            const limit = 500; // Fetch 500 more candles
+            // Fetch more candles - no limit, get as much as available
            let startTime, endTime;

            if (direction === 'before') {
-                // Load older data
-                endTime = referenceTime.toISOString();
-                startTime = null; // Let backend calculate based on limit
+                // Load older data: get candles BEFORE the first candle we have
+                // Use the actual first timestamp from our data
+                const firstTimestamp = chart.data.timestamps[0];
+                endTime = new Date(firstTimestamp).toISOString();
+                startTime = null;
+
+                console.log(`Loading older data before ${endTime} for ${timeframe}`);
            } else {
-                // Load newer data
-                startTime = referenceTime.toISOString();
+                // Load newer data: get candles AFTER the last candle we have
+                // Use the actual last timestamp from our data
+                const lastTimestamp = chart.data.timestamps[chart.data.timestamps.length - 1];
+                startTime = new Date(lastTimestamp).toISOString();
                endTime = null;
+
+                console.log(`Loading newer data after ${startTime} for ${timeframe}`);
            }

-            console.log(`Loading ${limit} more candles ${direction} ${referenceTime.toISOString()} for ${timeframe}`);
-
-            // Fetch more data from backend
+            // Fetch more data from backend (no limit - get all available)
            const response = await fetch('/api/chart-data', {
                method: 'POST',
                headers: { 'Content-Type': 'application/json' },
@@ -1178,7 +1183,7 @@ class ChartManager {
                    timeframes: [timeframe],
                    start_time: startTime,
                    end_time: endTime,
-                    limit: limit,
+                    limit: 1000,  // Request 1000 candles at a time
                    direction: direction
                })
            });
@@ -1188,6 +1193,13 @@ class ChartManager {
            if (result.success && result.chart_data && result.chart_data[timeframe]) {
                const newData = result.chart_data[timeframe];

+                // Check if we got any new data
+                if (newData.timestamps.length === 0) {
+                    console.warn(`No more data available for ${timeframe} ${direction}`);
+                    window.showWarning('No more historical data available');
+                    return;
+                }
+
                // Merge with existing data
                this.mergeChartData(timeframe, newData, direction);

@@ -1208,36 +1220,67 @@ class ChartManager {
    }

    /**
-     * Merge new data with existing chart data
+     * Merge new data with existing chart data (with deduplication)
     */
    mergeChartData(timeframe, newData, direction) {
        const chart = this.charts[timeframe];
        if (!chart || !chart.data) return;

        const existingData = chart.data;
+
+        // Create a set of existing timestamps for deduplication
+        const existingTimestamps = new Set(existingData.timestamps);
+
+        // Filter out duplicate timestamps from new data
+        const uniqueIndices = [];
+        newData.timestamps.forEach((ts, idx) => {
+            if (!existingTimestamps.has(ts)) {
+                uniqueIndices.push(idx);
+            }
+        });
+
+        // If no unique data, nothing to merge
+        if (uniqueIndices.length === 0) {
+            console.log(`No unique data to merge for ${timeframe}`);
+            return;
+        }
+
+        // Extract only unique data points
+        const uniqueNewData = {
+            timestamps: uniqueIndices.map(i => newData.timestamps[i]),
+            open: uniqueIndices.map(i => newData.open[i]),
+            high: uniqueIndices.map(i => newData.high[i]),
+            low: uniqueIndices.map(i => newData.low[i]),
+            close: uniqueIndices.map(i => newData.close[i]),
+            volume: uniqueIndices.map(i => newData.volume[i]),
+            pivot_markers: newData.pivot_markers || {}
+        };
+
+        console.log(`Merging ${uniqueIndices.length} unique candles (filtered ${newData.timestamps.length - uniqueIndices.length} duplicates)`);
+
        let mergedData;

        if (direction === 'before') {
            // Prepend older data
            mergedData = {
-                timestamps: [...newData.timestamps, ...existingData.timestamps],
-                open: [...newData.open, ...existingData.open],
-                high: [...newData.high, ...existingData.high],
-                low: [...newData.low, ...existingData.low],
-                close: [...newData.close, ...existingData.close],
-                volume: [...newData.volume, ...existingData.volume],
-                pivot_markers: { ...newData.pivot_markers, ...existingData.pivot_markers }
+                timestamps: [...uniqueNewData.timestamps, ...existingData.timestamps],
+                open: [...uniqueNewData.open, ...existingData.open],
+                high: [...uniqueNewData.high, ...existingData.high],
+                low: [...uniqueNewData.low, ...existingData.low],
+                close: [...uniqueNewData.close, ...existingData.close],
+                volume: [...uniqueNewData.volume, ...existingData.volume],
+                pivot_markers: { ...uniqueNewData.pivot_markers, ...existingData.pivot_markers }
            };
        } else {
            // Append newer data
            mergedData = {
-                timestamps: [...existingData.timestamps, ...newData.timestamps],
-                open: [...existingData.open, ...newData.open],
-                high: [...existingData.high, ...newData.high],
-                low: [...existingData.low, ...newData.low],
-                close: [...existingData.close, ...newData.close],
-                volume: [...existingData.volume, ...newData.volume],
-                pivot_markers: { ...existingData.pivot_markers, ...newData.pivot_markers }
+                timestamps: [...existingData.timestamps, ...uniqueNewData.timestamps],
+                open: [...existingData.open, ...uniqueNewData.open],
+                high: [...existingData.high, ...uniqueNewData.high],
+                low: [...existingData.low, ...uniqueNewData.low],
+                close: [...existingData.close, ...uniqueNewData.close],
+                volume: [...existingData.volume, ...uniqueNewData.volume],
+                pivot_markers: { ...existingData.pivot_markers, ...uniqueNewData.pivot_markers }
            };
        }

--- a/ANNOTATE/web/templates/annotation_dashboard.html
+++ b/ANNOTATE/web/templates/annotation_dashboard.html
@@ -62,7 +62,7 @@
    window.appState = {
        currentSymbol: '{{ current_symbol }}',
        currentTimeframes: {{ timeframes | tojson }},
-    annotations: { { annotations | tojson } },
+    annotations: {{ annotations | tojson }},
    pendingAnnotation: null,
        chartManager: null,
            annotationManager: null,
--- a/test_direction_filter.py
+++ b/test_direction_filter.py
@@ -0,0 +1,89 @@
+"""
+Test the direction filtering logic without DuckDB
+"""
+
+import pandas as pd
+from datetime import datetime, timedelta
+
+def test_filter_by_time_range():
+    """Test the _filter_by_time_range logic"""
+    
+    print("=" * 80)
+    print("Testing Direction Filter Logic")
+    print("=" * 80)
+    
+    # Create sample data
+    base_time = datetime(2024, 10, 24, 12, 0, 0)
+    timestamps = [base_time + timedelta(minutes=i) for i in range(100)]
+    
+    df = pd.DataFrame({
+        'open': range(100),
+        'high': range(100),
+        'low': range(100),
+        'close': range(100),
+        'volume': range(100)
+    }, index=timestamps)
+    
+    print(f"\nCreated test data with {len(df)} candles")
+    print(f"First timestamp: {df.index[0]}")
+    print(f"Last timestamp:  {df.index[-1]}")
+    
+    # Test 1: Direction 'before'
+    print("\n" + "-" * 80)
+    print("Test 1: Direction 'before' - Get 10 candles BEFORE minute 50")
+    end_time = timestamps[50]
+    print(f"End time: {end_time}")
+    
+    # Simulate the filter logic
+    filtered = df[df.index < end_time]
+    result = filtered.tail(10)
+    
+    print(f"Result: {len(result)} candles")
+    print(f"First: {result.index[0]} (should be minute 40)")
+    print(f"Last:  {result.index[-1]} (should be minute 49)")
+    
+    if result.index[-1] < end_time:
+        print("✅ PASS: Last candle is before end_time")
+    else:
+        print("❌ FAIL: Last candle is NOT before end_time")
+    
+    # Test 2: Direction 'after'
+    print("\n" + "-" * 80)
+    print("Test 2: Direction 'after' - Get 10 candles AFTER minute 50")
+    start_time = timestamps[50]
+    print(f"Start time: {start_time}")
+    
+    # Simulate the filter logic
+    filtered = df[df.index > start_time]
+    result = filtered.head(10)
+    
+    print(f"Result: {len(result)} candles")
+    print(f"First: {result.index[0]} (should be minute 51)")
+    print(f"Last:  {result.index[-1]} (should be minute 60)")
+    
+    if result.index[0] > start_time:
+        print("✅ PASS: First candle is after start_time")
+    else:
+        print("❌ FAIL: First candle is NOT after start_time")
+    
+    # Test 3: Direction 'latest' (default)
+    print("\n" + "-" * 80)
+    print("Test 3: Direction 'latest' - Get most recent 10 candles")
+    
+    result = df.tail(10)
+    
+    print(f"Result: {len(result)} candles")
+    print(f"First: {result.index[0]} (should be minute 90)")
+    print(f"Last:  {result.index[-1]} (should be minute 99)")
+    
+    if result.index[-1] == df.index[-1]:
+        print("✅ PASS: Got most recent candles")
+    else:
+        print("❌ FAIL: Did NOT get most recent candles")
+    
+    print("\n" + "=" * 80)
+    print("All Tests Complete")
+    print("=" * 80)
+
+if __name__ == "__main__":
+    test_filter_by_time_range()
--- a/test_infinite_scroll_backend.py
+++ b/test_infinite_scroll_backend.py
@@ -0,0 +1,139 @@
+"""
+Test infinite scroll backend data loading
+"""
+
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent))
+
+from datetime import datetime, timedelta
+from core.data_provider import DataProvider
+from ANNOTATE.core.data_loader import HistoricalDataLoader
+
+def test_backend_data_loading():
+    """Test if backend can load historical data with direction parameter"""
+    
+    print("=" * 80)
+    print("Testing Infinite Scroll Backend Data Loading")
+    print("=" * 80)
+    
+    # Initialize data provider
+    print("\n1. Initializing DataProvider...")
+    data_provider = DataProvider()
+    
+    # Initialize data loader
+    print("2. Initializing HistoricalDataLoader...")
+    data_loader = HistoricalDataLoader(data_provider)
+    data_loader.disable_startup_mode()  # Force fresh data
+    
+    symbol = "ETH/USDT"
+    timeframe = "1m"
+    
+    # Test 1: Load initial data (latest)
+    print(f"\n3. Loading initial data for {symbol} {timeframe}...")
+    initial_df = data_loader.get_data(
+        symbol=symbol,
+        timeframe=timeframe,
+        limit=100,
+        direction='latest'
+    )
+    
+    if initial_df is None or initial_df.empty:
+        print("❌ FAILED: No initial data loaded")
+        return
+    
+    print(f"✅ Loaded {len(initial_df)} initial candles")
+    print(f"   First timestamp: {initial_df.index[0]}")
+    print(f"   Last timestamp:  {initial_df.index[-1]}")
+    
+    # Test 2: Load older data (before first timestamp)
+    print(f"\n4. Loading older data BEFORE {initial_df.index[0]}...")
+    first_timestamp = initial_df.index[0]
+    
+    older_df = data_loader.get_data(
+        symbol=symbol,
+        timeframe=timeframe,
+        end_time=first_timestamp,
+        limit=100,
+        direction='before'
+    )
+    
+    if older_df is None or older_df.empty:
+        print("❌ FAILED: No older data loaded")
+        print("   This might mean:")
+        print("   - No data exists before this timestamp in DuckDB")
+        print("   - The query is not working correctly")
+    else:
+        print(f"✅ Loaded {len(older_df)} older candles")
+        print(f"   First timestamp: {older_df.index[0]}")
+        print(f"   Last timestamp:  {older_df.index[-1]}")
+        
+        # Check if older data is actually older
+        if older_df.index[-1] < first_timestamp:
+            print(f"✅ Data is correctly older (last older candle: {older_df.index[-1]} < first initial: {first_timestamp})")
+        else:
+            print(f"❌ WARNING: Data is NOT older! Last older: {older_df.index[-1]} >= first initial: {first_timestamp}")
+    
+    # Test 3: Load newer data (after last timestamp)
+    print(f"\n5. Loading newer data AFTER {initial_df.index[-1]}...")
+    last_timestamp = initial_df.index[-1]
+    
+    newer_df = data_loader.get_data(
+        symbol=symbol,
+        timeframe=timeframe,
+        start_time=last_timestamp,
+        limit=100,
+        direction='after'
+    )
+    
+    if newer_df is None or newer_df.empty:
+        print("❌ No newer data loaded (this is expected if we're at the latest data)")
+    else:
+        print(f"✅ Loaded {len(newer_df)} newer candles")
+        print(f"   First timestamp: {newer_df.index[0]}")
+        print(f"   Last timestamp:  {newer_df.index[-1]}")
+        
+        # Check if newer data is actually newer
+        if newer_df.index[0] > last_timestamp:
+            print(f"✅ Data is correctly newer (first newer candle: {newer_df.index[0]} > last initial: {last_timestamp})")
+        else:
+            print(f"❌ WARNING: Data is NOT newer! First newer: {newer_df.index[0]} <= last initial: {last_timestamp}")
+    
+    # Test 4: Check DuckDB directly
+    print(f"\n6. Checking DuckDB storage directly...")
+    if data_provider.duckdb_storage:
+        # Get total count
+        query = "SELECT COUNT(*) as count FROM ohlcv_data WHERE symbol = ? AND timeframe = ?"
+        result = data_provider.duckdb_storage.conn.execute(query, [symbol, timeframe]).fetchone()
+        total_count = result[0] if result else 0
+        
+        print(f"   Total candles in DuckDB: {total_count}")
+        
+        if total_count == 0:
+            print("   ❌ No data in DuckDB! Need to fetch from API first.")
+        else:
+            # Get time range
+            query = """
+                SELECT 
+                    MIN(timestamp) as min_ts,
+                    MAX(timestamp) as max_ts
+                FROM ohlcv_data 
+                WHERE symbol = ? AND timeframe = ?
+            """
+            result = data_provider.duckdb_storage.conn.execute(query, [symbol, timeframe]).fetchone()
+            
+            if result:
+                import pandas as pd
+                min_ts = pd.to_datetime(result[0], unit='ms', utc=True)
+                max_ts = pd.to_datetime(result[1], unit='ms', utc=True)
+                print(f"   Time range: {min_ts} to {max_ts}")
+                print(f"   Duration: {max_ts - min_ts}")
+    else:
+        print("   ❌ DuckDB storage not available")
+    
+    print("\n" + "=" * 80)
+    print("Test Complete")
+    print("=" * 80)
+
+if __name__ == "__main__":
+    test_backend_data_loading()