infinite lowad WIP

This commit is contained in:
Dobromir Popov
2025-10-24 23:04:29 +03:00
parent 07b82f0a1f
commit 2233a88d3e
5 changed files with 522 additions and 73 deletions

View File

@@ -79,11 +79,14 @@ class HistoricalDataLoader:
if len(cached_df) >= min(limit, 100): # Use cached if we have at least 100 candles
logger.debug(f"Using DataProvider cached data for {symbol} {timeframe} ({len(cached_df)} candles)")
# Filter by time range if specified
if start_time or end_time:
filtered_df = self._filter_by_time_range(cached_df.copy(), start_time, end_time)
else:
filtered_df = cached_df.tail(limit).copy()
# Filter by time range with direction support
filtered_df = self._filter_by_time_range(
cached_df.copy(),
start_time,
end_time,
direction,
limit
)
# Cache in memory
self.memory_cache[cache_key] = (filtered_df, datetime.now())
@@ -140,13 +143,14 @@ class HistoricalDataLoader:
df = self.data_provider.cached_data[symbol][timeframe]
if df is not None and not df.empty:
# Filter by time range if specified
if start_time or end_time:
df = self._filter_by_time_range(df, start_time, end_time)
# Limit number of candles
if len(df) > limit:
df = df.tail(limit)
# Filter by time range with direction support
df = self._filter_by_time_range(
df.copy(),
start_time,
end_time,
direction,
limit
)
# Cache in memory
self.memory_cache[cache_key] = (df.copy(), datetime.now())
@@ -182,10 +186,37 @@ class HistoricalDataLoader:
self.memory_cache[cache_key] = (df.copy(), datetime.now())
return df
else:
logger.info(f"No data in DuckDB, fetching from API for {symbol} {timeframe}")
logger.info(f"📡 No data in DuckDB, fetching from exchange API for {symbol} {timeframe}")
# Fetch from API and store in DuckDB
logger.info(f"Fetching data from API for {symbol} {timeframe}")
# Fetch from exchange API with time range
df = self._fetch_from_exchange_api(
symbol=symbol,
timeframe=timeframe,
start_time=start_time,
end_time=end_time,
limit=limit,
direction=direction
)
if df is not None and not df.empty:
# Store in DuckDB for future use
if self.data_provider.duckdb_storage:
stored_count = self.data_provider.duckdb_storage.store_ohlcv_data(
symbol=symbol,
timeframe=timeframe,
df=df
)
logger.info(f"💾 Stored {stored_count} new candles in DuckDB")
# Cache in memory
self.memory_cache[cache_key] = (df.copy(), datetime.now())
return df
else:
logger.warning(f"No data available from exchange API for {symbol} {timeframe}")
return None
# Fetch from API and store in DuckDB (no time range specified)
logger.info(f"Fetching latest data from API for {symbol} {timeframe}")
df = self.data_provider.get_historical_data(
symbol=symbol,
timeframe=timeframe,
@@ -194,9 +225,14 @@ class HistoricalDataLoader:
)
if df is not None and not df.empty:
# Filter by time range if specified
if start_time or end_time:
df = self._filter_by_time_range(df, start_time, end_time)
# Filter by time range with direction support
df = self._filter_by_time_range(
df.copy(),
start_time,
end_time,
direction,
limit
)
# Cache in memory
self.memory_cache[cache_key] = (df.copy(), datetime.now())
@@ -211,14 +247,156 @@ class HistoricalDataLoader:
logger.error(f"Error loading data for {symbol} {timeframe}: {e}")
return None
def _fetch_from_exchange_api(self, symbol: str, timeframe: str,
start_time: Optional[datetime] = None,
end_time: Optional[datetime] = None,
limit: int = 1000,
direction: str = 'latest') -> Optional[pd.DataFrame]:
"""
Fetch historical data from exchange API (Binance/MEXC) with time range support
Args:
symbol: Trading pair
timeframe: Timeframe
start_time: Start time for data range
end_time: End time for data range
limit: Maximum number of candles
direction: 'latest', 'before', or 'after'
Returns:
DataFrame with OHLCV data or None
"""
try:
import requests
from core.api_rate_limiter import get_rate_limiter
# Convert symbol format for Binance
binance_symbol = symbol.replace('/', '').upper()
# Convert timeframe
timeframe_map = {
'1s': '1s', '1m': '1m', '5m': '5m', '15m': '15m', '30m': '30m',
'1h': '1h', '4h': '4h', '1d': '1d'
}
binance_timeframe = timeframe_map.get(timeframe, '1m')
# Build API parameters
params = {
'symbol': binance_symbol,
'interval': binance_timeframe,
'limit': min(limit, 1000) # Binance max is 1000
}
# Add time range parameters if specified
if direction == 'before' and end_time:
# Get data ending at end_time
params['endTime'] = int(end_time.timestamp() * 1000)
elif direction == 'after' and start_time:
# Get data starting at start_time
params['startTime'] = int(start_time.timestamp() * 1000)
elif start_time:
params['startTime'] = int(start_time.timestamp() * 1000)
if end_time and direction != 'before':
params['endTime'] = int(end_time.timestamp() * 1000)
# Use rate limiter
rate_limiter = get_rate_limiter()
url = "https://api.binance.com/api/v3/klines"
logger.info(f"Fetching from Binance: {symbol} {timeframe} (direction={direction}, limit={limit})")
response = rate_limiter.make_request('binance_api', url, 'GET', params=params)
if response is None or response.status_code != 200:
logger.warning(f"Binance API failed, trying MEXC...")
# Try MEXC as fallback
return self._fetch_from_mexc_with_time_range(
symbol, timeframe, start_time, end_time, limit, direction
)
data = response.json()
if not data:
logger.warning(f"No data returned from Binance for {symbol} {timeframe}")
return None
# Convert to DataFrame
df = pd.DataFrame(data, columns=[
'timestamp', 'open', 'high', 'low', 'close', 'volume',
'close_time', 'quote_volume', 'trades', 'taker_buy_base',
'taker_buy_quote', 'ignore'
])
# Process columns
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms', utc=True)
for col in ['open', 'high', 'low', 'close', 'volume']:
df[col] = df[col].astype(float)
# Keep only OHLCV columns
df = df[['timestamp', 'open', 'high', 'low', 'close', 'volume']]
df = df.set_index('timestamp')
df = df.sort_index()
logger.info(f"✅ Fetched {len(df)} candles from Binance for {symbol} {timeframe}")
return df
except Exception as e:
logger.error(f"Error fetching from exchange API: {e}")
return None
def _fetch_from_mexc_with_time_range(self, symbol: str, timeframe: str,
start_time: Optional[datetime] = None,
end_time: Optional[datetime] = None,
limit: int = 1000,
direction: str = 'latest') -> Optional[pd.DataFrame]:
"""Fetch from MEXC with time range support (fallback)"""
try:
# MEXC implementation would go here
# For now, just return None to indicate unavailable
logger.warning("MEXC time range fetch not implemented yet")
return None
except Exception as e:
logger.error(f"Error fetching from MEXC: {e}")
return None
def _filter_by_time_range(self, df: pd.DataFrame,
start_time: Optional[datetime],
end_time: Optional[datetime]) -> pd.DataFrame:
"""Filter DataFrame by time range"""
if start_time:
df = df[df.index >= start_time]
if end_time:
df = df[df.index <= end_time]
end_time: Optional[datetime],
direction: str = 'latest',
limit: int = 500) -> pd.DataFrame:
"""
Filter DataFrame by time range with direction support
Args:
df: DataFrame to filter
start_time: Start time filter
end_time: End time filter
direction: 'latest', 'before', or 'after'
limit: Maximum number of candles
Returns:
Filtered DataFrame
"""
if direction == 'before' and end_time:
# Get candles BEFORE end_time
df = df[df.index < end_time]
# Return the most recent N candles before end_time
df = df.tail(limit)
elif direction == 'after' and start_time:
# Get candles AFTER start_time
df = df[df.index > start_time]
# Return the oldest N candles after start_time
df = df.head(limit)
else:
# Default: filter by range
if start_time:
df = df[df.index >= start_time]
if end_time:
df = df[df.index <= end_time]
# Return most recent candles
if len(df) > limit:
df = df.tail(limit)
return df
def get_multi_timeframe_data(self, symbol: str,

View File

@@ -1153,23 +1153,28 @@ class ChartManager {
this.showLoadingIndicator(timeframe, direction);
try {
// Calculate time range to fetch
const limit = 500; // Fetch 500 more candles
// Fetch more candles - no limit, get as much as available
let startTime, endTime;
if (direction === 'before') {
// Load older data
endTime = referenceTime.toISOString();
startTime = null; // Let backend calculate based on limit
// Load older data: get candles BEFORE the first candle we have
// Use the actual first timestamp from our data
const firstTimestamp = chart.data.timestamps[0];
endTime = new Date(firstTimestamp).toISOString();
startTime = null;
console.log(`Loading older data before ${endTime} for ${timeframe}`);
} else {
// Load newer data
startTime = referenceTime.toISOString();
// Load newer data: get candles AFTER the last candle we have
// Use the actual last timestamp from our data
const lastTimestamp = chart.data.timestamps[chart.data.timestamps.length - 1];
startTime = new Date(lastTimestamp).toISOString();
endTime = null;
console.log(`Loading newer data after ${startTime} for ${timeframe}`);
}
console.log(`Loading ${limit} more candles ${direction} ${referenceTime.toISOString()} for ${timeframe}`);
// Fetch more data from backend
// Fetch more data from backend (no limit - get all available)
const response = await fetch('/api/chart-data', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
@@ -1178,7 +1183,7 @@ class ChartManager {
timeframes: [timeframe],
start_time: startTime,
end_time: endTime,
limit: limit,
limit: 1000, // Request 1000 candles at a time
direction: direction
})
});
@@ -1188,6 +1193,13 @@ class ChartManager {
if (result.success && result.chart_data && result.chart_data[timeframe]) {
const newData = result.chart_data[timeframe];
// Check if we got any new data
if (newData.timestamps.length === 0) {
console.warn(`No more data available for ${timeframe} ${direction}`);
window.showWarning('No more historical data available');
return;
}
// Merge with existing data
this.mergeChartData(timeframe, newData, direction);
@@ -1208,36 +1220,67 @@ class ChartManager {
}
/**
* Merge new data with existing chart data
* Merge new data with existing chart data (with deduplication)
*/
mergeChartData(timeframe, newData, direction) {
const chart = this.charts[timeframe];
if (!chart || !chart.data) return;
const existingData = chart.data;
// Create a set of existing timestamps for deduplication
const existingTimestamps = new Set(existingData.timestamps);
// Filter out duplicate timestamps from new data
const uniqueIndices = [];
newData.timestamps.forEach((ts, idx) => {
if (!existingTimestamps.has(ts)) {
uniqueIndices.push(idx);
}
});
// If no unique data, nothing to merge
if (uniqueIndices.length === 0) {
console.log(`No unique data to merge for ${timeframe}`);
return;
}
// Extract only unique data points
const uniqueNewData = {
timestamps: uniqueIndices.map(i => newData.timestamps[i]),
open: uniqueIndices.map(i => newData.open[i]),
high: uniqueIndices.map(i => newData.high[i]),
low: uniqueIndices.map(i => newData.low[i]),
close: uniqueIndices.map(i => newData.close[i]),
volume: uniqueIndices.map(i => newData.volume[i]),
pivot_markers: newData.pivot_markers || {}
};
console.log(`Merging ${uniqueIndices.length} unique candles (filtered ${newData.timestamps.length - uniqueIndices.length} duplicates)`);
let mergedData;
if (direction === 'before') {
// Prepend older data
mergedData = {
timestamps: [...newData.timestamps, ...existingData.timestamps],
open: [...newData.open, ...existingData.open],
high: [...newData.high, ...existingData.high],
low: [...newData.low, ...existingData.low],
close: [...newData.close, ...existingData.close],
volume: [...newData.volume, ...existingData.volume],
pivot_markers: { ...newData.pivot_markers, ...existingData.pivot_markers }
timestamps: [...uniqueNewData.timestamps, ...existingData.timestamps],
open: [...uniqueNewData.open, ...existingData.open],
high: [...uniqueNewData.high, ...existingData.high],
low: [...uniqueNewData.low, ...existingData.low],
close: [...uniqueNewData.close, ...existingData.close],
volume: [...uniqueNewData.volume, ...existingData.volume],
pivot_markers: { ...uniqueNewData.pivot_markers, ...existingData.pivot_markers }
};
} else {
// Append newer data
mergedData = {
timestamps: [...existingData.timestamps, ...newData.timestamps],
open: [...existingData.open, ...newData.open],
high: [...existingData.high, ...newData.high],
low: [...existingData.low, ...newData.low],
close: [...existingData.close, ...newData.close],
volume: [...existingData.volume, ...newData.volume],
pivot_markers: { ...existingData.pivot_markers, ...newData.pivot_markers }
timestamps: [...existingData.timestamps, ...uniqueNewData.timestamps],
open: [...existingData.open, ...uniqueNewData.open],
high: [...existingData.high, ...uniqueNewData.high],
low: [...existingData.low, ...uniqueNewData.low],
close: [...existingData.close, ...uniqueNewData.close],
volume: [...existingData.volume, ...uniqueNewData.volume],
pivot_markers: { ...existingData.pivot_markers, ...uniqueNewData.pivot_markers }
};
}

View File

@@ -62,7 +62,7 @@
window.appState = {
currentSymbol: '{{ current_symbol }}',
currentTimeframes: {{ timeframes | tojson }},
annotations: { { annotations | tojson } },
annotations: {{ annotations | tojson }},
pendingAnnotation: null,
chartManager: null,
annotationManager: null,

89
test_direction_filter.py Normal file
View File

@@ -0,0 +1,89 @@
"""
Test the direction filtering logic without DuckDB
"""
import pandas as pd
from datetime import datetime, timedelta
def test_filter_by_time_range():
"""Test the _filter_by_time_range logic"""
print("=" * 80)
print("Testing Direction Filter Logic")
print("=" * 80)
# Create sample data
base_time = datetime(2024, 10, 24, 12, 0, 0)
timestamps = [base_time + timedelta(minutes=i) for i in range(100)]
df = pd.DataFrame({
'open': range(100),
'high': range(100),
'low': range(100),
'close': range(100),
'volume': range(100)
}, index=timestamps)
print(f"\nCreated test data with {len(df)} candles")
print(f"First timestamp: {df.index[0]}")
print(f"Last timestamp: {df.index[-1]}")
# Test 1: Direction 'before'
print("\n" + "-" * 80)
print("Test 1: Direction 'before' - Get 10 candles BEFORE minute 50")
end_time = timestamps[50]
print(f"End time: {end_time}")
# Simulate the filter logic
filtered = df[df.index < end_time]
result = filtered.tail(10)
print(f"Result: {len(result)} candles")
print(f"First: {result.index[0]} (should be minute 40)")
print(f"Last: {result.index[-1]} (should be minute 49)")
if result.index[-1] < end_time:
print("✅ PASS: Last candle is before end_time")
else:
print("❌ FAIL: Last candle is NOT before end_time")
# Test 2: Direction 'after'
print("\n" + "-" * 80)
print("Test 2: Direction 'after' - Get 10 candles AFTER minute 50")
start_time = timestamps[50]
print(f"Start time: {start_time}")
# Simulate the filter logic
filtered = df[df.index > start_time]
result = filtered.head(10)
print(f"Result: {len(result)} candles")
print(f"First: {result.index[0]} (should be minute 51)")
print(f"Last: {result.index[-1]} (should be minute 60)")
if result.index[0] > start_time:
print("✅ PASS: First candle is after start_time")
else:
print("❌ FAIL: First candle is NOT after start_time")
# Test 3: Direction 'latest' (default)
print("\n" + "-" * 80)
print("Test 3: Direction 'latest' - Get most recent 10 candles")
result = df.tail(10)
print(f"Result: {len(result)} candles")
print(f"First: {result.index[0]} (should be minute 90)")
print(f"Last: {result.index[-1]} (should be minute 99)")
if result.index[-1] == df.index[-1]:
print("✅ PASS: Got most recent candles")
else:
print("❌ FAIL: Did NOT get most recent candles")
print("\n" + "=" * 80)
print("All Tests Complete")
print("=" * 80)
if __name__ == "__main__":
test_filter_by_time_range()

View File

@@ -0,0 +1,139 @@
"""
Test infinite scroll backend data loading
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from datetime import datetime, timedelta
from core.data_provider import DataProvider
from ANNOTATE.core.data_loader import HistoricalDataLoader
def test_backend_data_loading():
"""Test if backend can load historical data with direction parameter"""
print("=" * 80)
print("Testing Infinite Scroll Backend Data Loading")
print("=" * 80)
# Initialize data provider
print("\n1. Initializing DataProvider...")
data_provider = DataProvider()
# Initialize data loader
print("2. Initializing HistoricalDataLoader...")
data_loader = HistoricalDataLoader(data_provider)
data_loader.disable_startup_mode() # Force fresh data
symbol = "ETH/USDT"
timeframe = "1m"
# Test 1: Load initial data (latest)
print(f"\n3. Loading initial data for {symbol} {timeframe}...")
initial_df = data_loader.get_data(
symbol=symbol,
timeframe=timeframe,
limit=100,
direction='latest'
)
if initial_df is None or initial_df.empty:
print("❌ FAILED: No initial data loaded")
return
print(f"✅ Loaded {len(initial_df)} initial candles")
print(f" First timestamp: {initial_df.index[0]}")
print(f" Last timestamp: {initial_df.index[-1]}")
# Test 2: Load older data (before first timestamp)
print(f"\n4. Loading older data BEFORE {initial_df.index[0]}...")
first_timestamp = initial_df.index[0]
older_df = data_loader.get_data(
symbol=symbol,
timeframe=timeframe,
end_time=first_timestamp,
limit=100,
direction='before'
)
if older_df is None or older_df.empty:
print("❌ FAILED: No older data loaded")
print(" This might mean:")
print(" - No data exists before this timestamp in DuckDB")
print(" - The query is not working correctly")
else:
print(f"✅ Loaded {len(older_df)} older candles")
print(f" First timestamp: {older_df.index[0]}")
print(f" Last timestamp: {older_df.index[-1]}")
# Check if older data is actually older
if older_df.index[-1] < first_timestamp:
print(f"✅ Data is correctly older (last older candle: {older_df.index[-1]} < first initial: {first_timestamp})")
else:
print(f"❌ WARNING: Data is NOT older! Last older: {older_df.index[-1]} >= first initial: {first_timestamp}")
# Test 3: Load newer data (after last timestamp)
print(f"\n5. Loading newer data AFTER {initial_df.index[-1]}...")
last_timestamp = initial_df.index[-1]
newer_df = data_loader.get_data(
symbol=symbol,
timeframe=timeframe,
start_time=last_timestamp,
limit=100,
direction='after'
)
if newer_df is None or newer_df.empty:
print("❌ No newer data loaded (this is expected if we're at the latest data)")
else:
print(f"✅ Loaded {len(newer_df)} newer candles")
print(f" First timestamp: {newer_df.index[0]}")
print(f" Last timestamp: {newer_df.index[-1]}")
# Check if newer data is actually newer
if newer_df.index[0] > last_timestamp:
print(f"✅ Data is correctly newer (first newer candle: {newer_df.index[0]} > last initial: {last_timestamp})")
else:
print(f"❌ WARNING: Data is NOT newer! First newer: {newer_df.index[0]} <= last initial: {last_timestamp}")
# Test 4: Check DuckDB directly
print(f"\n6. Checking DuckDB storage directly...")
if data_provider.duckdb_storage:
# Get total count
query = "SELECT COUNT(*) as count FROM ohlcv_data WHERE symbol = ? AND timeframe = ?"
result = data_provider.duckdb_storage.conn.execute(query, [symbol, timeframe]).fetchone()
total_count = result[0] if result else 0
print(f" Total candles in DuckDB: {total_count}")
if total_count == 0:
print(" ❌ No data in DuckDB! Need to fetch from API first.")
else:
# Get time range
query = """
SELECT
MIN(timestamp) as min_ts,
MAX(timestamp) as max_ts
FROM ohlcv_data
WHERE symbol = ? AND timeframe = ?
"""
result = data_provider.duckdb_storage.conn.execute(query, [symbol, timeframe]).fetchone()
if result:
import pandas as pd
min_ts = pd.to_datetime(result[0], unit='ms', utc=True)
max_ts = pd.to_datetime(result[1], unit='ms', utc=True)
print(f" Time range: {min_ts} to {max_ts}")
print(f" Duration: {max_ts - min_ts}")
else:
print(" ❌ DuckDB storage not available")
print("\n" + "=" * 80)
print("Test Complete")
print("=" * 80)
if __name__ == "__main__":
test_backend_data_loading()