Shared Pattern Encoder

fix T training
This commit is contained in:
Dobromir Popov
2025-11-06 14:27:52 +02:00
parent 07d97100c0
commit 738c7cb854
5 changed files with 1276 additions and 180 deletions

View File

@@ -722,6 +722,13 @@ class DataProvider:
# Ensure proper datetime index
df = self._ensure_datetime_index(df)
# Store to DuckDB
if self.duckdb_storage:
try:
self.duckdb_storage.store_ohlcv_data(symbol, timeframe, df)
except Exception as e:
logger.warning(f"Could not store catch-up data to DuckDB: {e}")
# Update cached data with lock
with self.data_lock:
current_df = self.cached_data[symbol][timeframe]
@@ -1520,33 +1527,49 @@ class DataProvider:
for timeframe in timeframes:
try:
# Calculate how many candles we need for the time period
if timeframe == '1s':
limit = int((end_time - start_time).total_seconds()) + 100 # Extra buffer
elif timeframe == '1m':
limit = int((end_time - start_time).total_seconds() / 60) + 10
elif timeframe == '1h':
limit = int((end_time - start_time).total_seconds() / 3600) + 5
elif timeframe == '1d':
limit = int((end_time - start_time).total_seconds() / 86400) + 2
else:
limit = 1000
df = None
# Fetch historical data
df = self.get_historical_data(symbol, timeframe, limit=limit, refresh=True)
# Try DuckDB first with time range query (most efficient)
if self.duckdb_storage:
try:
df = self.duckdb_storage.get_ohlcv_data(
symbol=symbol,
timeframe=timeframe,
start_time=start_time,
end_time=end_time,
limit=10000 # Large limit for historical queries
)
if df is not None and not df.empty:
logger.debug(f" {timeframe}: {len(df)} candles from DuckDB")
except Exception as e:
logger.debug(f" {timeframe}: DuckDB query failed: {e}")
# Fallback: try memory cache or API
if df is None or df.empty:
# Calculate how many candles we need for the time period
if timeframe == '1s':
limit = int((end_time - start_time).total_seconds()) + 100 # Extra buffer
elif timeframe == '1m':
limit = int((end_time - start_time).total_seconds() / 60) + 10
elif timeframe == '1h':
limit = int((end_time - start_time).total_seconds() / 3600) + 5
elif timeframe == '1d':
limit = int((end_time - start_time).total_seconds() / 86400) + 2
else:
limit = 1000
# Fetch from cache or API (use cache when available)
df = self.get_historical_data(symbol, timeframe, limit=limit, refresh=False)
if df is not None and not df.empty:
# Filter to the exact time period
df = df[(df.index >= start_time) & (df.index <= end_time)]
if df is not None and not df.empty:
# Filter to the exact time period
df_filtered = df[(df.index >= start_time) & (df.index <= end_time)]
if not df_filtered.empty:
replay_data[timeframe] = df_filtered
logger.info(f" {timeframe}: {len(df_filtered)} candles in replay period")
else:
logger.warning(f" {timeframe}: No data in replay period")
replay_data[timeframe] = pd.DataFrame()
replay_data[timeframe] = df
logger.info(f" {timeframe}: {len(df)} candles in replay period")
else:
logger.warning(f" {timeframe}: No data available")
logger.warning(f" {timeframe}: No data in replay period")
replay_data[timeframe] = pd.DataFrame()
except Exception as e: