better pivots

This commit is contained in:
Dobromir Popov 2025-05-31 00:33:07 +03:00
parent 7a0e468c3e
commit 3a748daff2
3 changed files with 175 additions and 108 deletions

View File

@ -646,95 +646,68 @@ class DataProvider:
# === WILLIAMS MARKET STRUCTURE PIVOT SYSTEM === # === WILLIAMS MARKET STRUCTURE PIVOT SYSTEM ===
def _collect_monthly_1s_data(self, symbol: str) -> Optional[pd.DataFrame]: def _collect_monthly_1m_data(self, symbol: str) -> Optional[pd.DataFrame]:
"""Collect 1 month of 1s candles using paginated API calls""" """Collect 30 days of 1m candles with smart gap-filling cache system"""
try: try:
# Check if we have cached monthly data first # Check for cached data and determine what we need to fetch
cached_monthly_data = self._load_monthly_data_from_cache(symbol) cached_data = self._load_monthly_data_from_cache(symbol)
if cached_monthly_data is not None:
logger.info(f"Using cached monthly 1s data for {symbol}: {len(cached_monthly_data)} candles")
return cached_monthly_data
logger.info(f"Collecting 1 month of 1s data for {symbol}...")
# Calculate time range (30 days)
end_time = datetime.now() end_time = datetime.now()
start_time = end_time - timedelta(days=30) start_time = end_time - timedelta(days=30)
all_candles = [] if cached_data is not None and not cached_data.empty:
current_time = end_time logger.info(f"Found cached monthly 1m data for {symbol}: {len(cached_data)} candles")
api_calls_made = 0
total_candles_collected = 0 # Check cache data range
cache_start = cached_data['timestamp'].min()
cache_end = cached_data['timestamp'].max()
logger.info(f"Cache range: {cache_start} to {cache_end}")
# Remove data older than 30 days
cached_data = cached_data[cached_data['timestamp'] >= start_time]
# Check if we need to fill gaps
gap_start = cache_end + timedelta(minutes=1)
if gap_start < end_time:
# Need to fill gap from cache_end to now
logger.info(f"Filling gap from {gap_start} to {end_time}")
gap_data = self._fetch_1m_data_range(symbol, gap_start, end_time)
if gap_data is not None and not gap_data.empty:
# Combine cached data with gap data
monthly_df = pd.concat([cached_data, gap_data], ignore_index=True)
monthly_df = monthly_df.sort_values('timestamp').drop_duplicates(subset=['timestamp']).reset_index(drop=True)
logger.info(f"Combined cache + gap: {len(monthly_df)} total candles")
else:
monthly_df = cached_data
logger.info(f"Using cached data only: {len(monthly_df)} candles")
else:
monthly_df = cached_data
logger.info(f"Cache is up to date: {len(monthly_df)} candles")
else:
# No cache - fetch full 30 days
logger.info(f"No cache found, collecting full 30 days of 1m data for {symbol}")
monthly_df = self._fetch_1m_data_range(symbol, start_time, end_time)
# Binance rate limit: 1200 requests/minute = 20/second if monthly_df is not None and not monthly_df.empty:
rate_limit_delay = 0.05 # 50ms between requests # Final cleanup: ensure exactly 30 days
monthly_df = monthly_df[monthly_df['timestamp'] >= start_time]
while current_time > start_time and api_calls_made < 3000: # Safety limit monthly_df = monthly_df.sort_values('timestamp').reset_index(drop=True)
try:
# Get 1000 candles working backwards logger.info(f"Final dataset: {len(monthly_df)} 1m candles for {symbol}")
batch_df = self._fetch_1s_batch_with_endtime(symbol, current_time, limit=1000)
# Update cache
if batch_df is None or batch_df.empty: self._save_monthly_data_to_cache(symbol, monthly_df)
logger.warning(f"No data returned for batch ending at {current_time}")
break return monthly_df
else:
api_calls_made += 1 logger.error(f"No monthly 1m data collected for {symbol}")
batch_size = len(batch_df)
total_candles_collected += batch_size
# Add batch to collection
all_candles.append(batch_df)
# Update current time to the earliest timestamp in this batch
earliest_time = batch_df['timestamp'].min()
if earliest_time >= current_time:
logger.warning(f"No progress in time collection, breaking")
break
current_time = earliest_time - timedelta(seconds=1)
# Rate limiting
time.sleep(rate_limit_delay)
# Progress logging every 100 requests
if api_calls_made % 100 == 0:
logger.info(f"Progress: {api_calls_made} API calls, {total_candles_collected} candles collected")
# Break if we have enough data (about 2.6M candles for 30 days)
if total_candles_collected >= 2500000: # 30 days * 24 hours * 3600 seconds ≈ 2.6M
logger.info(f"Collected sufficient data: {total_candles_collected} candles")
break
except Exception as e:
logger.error(f"Error in batch collection: {e}")
time.sleep(1) # Wait longer on error
continue
if not all_candles:
logger.error(f"No monthly data collected for {symbol}")
return None return None
# Combine all batches
logger.info(f"Combining {len(all_candles)} batches...")
monthly_df = pd.concat(all_candles, ignore_index=True)
# Sort by timestamp and remove duplicates
monthly_df = monthly_df.sort_values('timestamp').drop_duplicates(subset=['timestamp']).reset_index(drop=True)
# Filter to exactly 30 days
cutoff_time = end_time - timedelta(days=30)
monthly_df = monthly_df[monthly_df['timestamp'] >= cutoff_time]
logger.info(f"Successfully collected {len(monthly_df)} 1s candles for {symbol} "
f"({api_calls_made} API calls made)")
# Cache the monthly data
self._save_monthly_data_to_cache(symbol, monthly_df)
return monthly_df
except Exception as e: except Exception as e:
logger.error(f"Error collecting monthly 1s data for {symbol}: {e}") logger.error(f"Error collecting monthly 1m data for {symbol}: {e}")
return None return None
def _fetch_1s_batch_with_endtime(self, symbol: str, end_time: datetime, limit: int = 1000) -> Optional[pd.DataFrame]: def _fetch_1s_batch_with_endtime(self, symbol: str, end_time: datetime, limit: int = 1000) -> Optional[pd.DataFrame]:
@ -788,6 +761,105 @@ class DataProvider:
logger.error(f"Error fetching 1s batch for {symbol}: {e}") logger.error(f"Error fetching 1s batch for {symbol}: {e}")
return None return None
def _fetch_1m_data_range(self, symbol: str, start_time: datetime, end_time: datetime) -> Optional[pd.DataFrame]:
"""Fetch 1m candles for a specific time range with efficient batching"""
try:
# Convert symbol format for Binance API
if '/' in symbol:
api_symbol = symbol.replace('/', '')
else:
api_symbol = symbol
logger.info(f"Fetching 1m data for {symbol} from {start_time} to {end_time}")
all_candles = []
current_start = start_time
batch_size = 1000 # Binance limit
api_calls_made = 0
while current_start < end_time and api_calls_made < 50: # Safety limit for 30 days
try:
# Calculate end time for this batch
batch_end = min(current_start + timedelta(minutes=batch_size), end_time)
# Convert to milliseconds
start_timestamp = int(current_start.timestamp() * 1000)
end_timestamp = int(batch_end.timestamp() * 1000)
# Binance API call
url = "https://api.binance.com/api/v3/klines"
params = {
'symbol': api_symbol,
'interval': '1m',
'startTime': start_timestamp,
'endTime': end_timestamp,
'limit': batch_size
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'application/json'
}
response = requests.get(url, params=params, headers=headers, timeout=10)
response.raise_for_status()
data = response.json()
api_calls_made += 1
if not data:
logger.warning(f"No data returned for batch {current_start} to {batch_end}")
break
# Convert to DataFrame
batch_df = pd.DataFrame(data, columns=[
'timestamp', 'open', 'high', 'low', 'close', 'volume',
'close_time', 'quote_volume', 'trades', 'taker_buy_base',
'taker_buy_quote', 'ignore'
])
# Process columns
batch_df['timestamp'] = pd.to_datetime(batch_df['timestamp'], unit='ms')
for col in ['open', 'high', 'low', 'close', 'volume']:
batch_df[col] = batch_df[col].astype(float)
# Keep only OHLCV columns
batch_df = batch_df[['timestamp', 'open', 'high', 'low', 'close', 'volume']]
all_candles.append(batch_df)
# Move to next batch (add 1 minute to avoid overlap)
current_start = batch_end + timedelta(minutes=1)
# Rate limiting (Binance allows 1200/min)
time.sleep(0.05) # 50ms delay
# Progress logging
if api_calls_made % 10 == 0:
total_candles = sum(len(df) for df in all_candles)
logger.info(f"Progress: {api_calls_made} API calls, {total_candles} candles collected")
except Exception as e:
logger.error(f"Error in batch {current_start} to {batch_end}: {e}")
current_start += timedelta(minutes=batch_size)
time.sleep(1) # Wait longer on error
continue
if not all_candles:
logger.error(f"No data collected for {symbol}")
return None
# Combine all batches
df = pd.concat(all_candles, ignore_index=True)
df = df.sort_values('timestamp').drop_duplicates(subset=['timestamp']).reset_index(drop=True)
logger.info(f"Successfully fetched {len(df)} 1m candles for {symbol} ({api_calls_made} API calls)")
return df
except Exception as e:
logger.error(f"Error fetching 1m data range for {symbol}: {e}")
return None
def _extract_pivot_bounds_from_monthly_data(self, symbol: str, monthly_data: pd.DataFrame) -> Optional[PivotBounds]: def _extract_pivot_bounds_from_monthly_data(self, symbol: str, monthly_data: pd.DataFrame) -> Optional[PivotBounds]:
"""Extract pivot bounds using Williams Market Structure analysis""" """Extract pivot bounds using Williams Market Structure analysis"""
try: try:
@ -958,8 +1030,8 @@ class DataProvider:
def _refresh_pivot_bounds_for_symbol(self, symbol: str): def _refresh_pivot_bounds_for_symbol(self, symbol: str):
"""Refresh pivot bounds for a specific symbol""" """Refresh pivot bounds for a specific symbol"""
try: try:
# Collect monthly 1s data # Collect monthly 1m data
monthly_data = self._collect_monthly_1s_data(symbol) monthly_data = self._collect_monthly_1m_data(symbol)
if monthly_data is None or monthly_data.empty: if monthly_data is None or monthly_data.empty:
logger.warning(f"Could not collect monthly data for {symbol}") logger.warning(f"Could not collect monthly data for {symbol}")
return return
@ -1074,17 +1146,13 @@ class DataProvider:
logger.warning(f"Error saving pivot bounds to cache for {symbol}: {e}") logger.warning(f"Error saving pivot bounds to cache for {symbol}: {e}")
def _load_monthly_data_from_cache(self, symbol: str) -> Optional[pd.DataFrame]: def _load_monthly_data_from_cache(self, symbol: str) -> Optional[pd.DataFrame]:
"""Load monthly 1s data from cache""" """Load monthly 1m data from cache"""
try: try:
cache_file = self.monthly_data_cache_dir / f"{symbol.replace('/', '')}_monthly_1s.parquet" cache_file = self.monthly_data_cache_dir / f"{symbol.replace('/', '')}_monthly_1m.parquet"
if cache_file.exists(): if cache_file.exists():
# Check if cache is recent (less than 1 day old) df = pd.read_parquet(cache_file)
cache_age = time.time() - cache_file.stat().st_mtime logger.info(f"Loaded {len(df)} 1m candles from cache for {symbol}")
if cache_age < 86400: # 24 hours return df
df = pd.read_parquet(cache_file)
return df
else:
logger.info(f"Monthly data cache for {symbol} is too old ({cache_age/3600:.1f}h)")
return None return None
@ -1093,11 +1161,11 @@ class DataProvider:
return None return None
def _save_monthly_data_to_cache(self, symbol: str, df: pd.DataFrame): def _save_monthly_data_to_cache(self, symbol: str, df: pd.DataFrame):
"""Save monthly 1s data to cache""" """Save monthly 1m data to cache"""
try: try:
cache_file = self.monthly_data_cache_dir / f"{symbol.replace('/', '')}_monthly_1s.parquet" cache_file = self.monthly_data_cache_dir / f"{symbol.replace('/', '')}_monthly_1m.parquet"
df.to_parquet(cache_file, index=False) df.to_parquet(cache_file, index=False)
logger.info(f"Saved {len(df)} monthly 1s candles to cache for {symbol}") logger.info(f"Saved {len(df)} monthly 1m candles to cache for {symbol}")
except Exception as e: except Exception as e:
logger.warning(f"Error saving monthly data to cache for {symbol}: {e}") logger.warning(f"Error saving monthly data to cache for {symbol}: {e}")

View File

@ -37,7 +37,7 @@ def test_pivot_normalization_system():
# Initialize data provider # Initialize data provider
symbols = ['ETH/USDT'] # Test with ETH only symbols = ['ETH/USDT'] # Test with ETH only
timeframes = ['1s', '1m', '1h'] timeframes = ['1s']
logger.info("Initializing DataProvider with pivot-based normalization...") logger.info("Initializing DataProvider with pivot-based normalization...")
data_provider = DataProvider(symbols=symbols, timeframes=timeframes) data_provider = DataProvider(symbols=symbols, timeframes=timeframes)
@ -52,11 +52,11 @@ def test_pivot_normalization_system():
try: try:
# This will trigger monthly data collection and pivot analysis # This will trigger monthly data collection and pivot analysis
logger.info(f"Testing monthly data collection for {symbol}...") logger.info(f"Testing monthly data collection for {symbol}...")
monthly_data = data_provider._collect_monthly_1s_data(symbol) monthly_data = data_provider._collect_monthly_1m_data(symbol)
if monthly_data is not None: if monthly_data is not None:
print(f"✅ Monthly data collection SUCCESS") print(f"✅ Monthly data collection SUCCESS")
print(f" 📊 Collected {len(monthly_data):,} 1s candles") print(f" 📊 Collected {len(monthly_data):,} 1m candles")
print(f" 📅 Period: {monthly_data['timestamp'].min()} to {monthly_data['timestamp'].max()}") print(f" 📅 Period: {monthly_data['timestamp'].min()} to {monthly_data['timestamp'].max()}")
print(f" 💰 Price range: ${monthly_data['low'].min():.2f} - ${monthly_data['high'].max():.2f}") print(f" 💰 Price range: ${monthly_data['low'].min():.2f} - ${monthly_data['high'].max():.2f}")
print(f" 📈 Volume range: {monthly_data['volume'].min():.2f} - {monthly_data['volume'].max():.2f}") print(f" 📈 Volume range: {monthly_data['volume'].min():.2f} - {monthly_data['volume'].max():.2f}")

View File

@ -3010,10 +3010,9 @@ class TradingDashboard:
# Update internal model storage # Update internal model storage
for model_type, model_data in loaded_models.items(): for model_type, model_data in loaded_models.items():
model_info = model_data['info'] model_info = model_data['info']
logger.info(f"Using best {model_type} model: {model_info.model_name} " logger.info(f"Using best {model_type} model: {model_info.model_name} (Score: {model_info.metrics.get_composite_score():.3f})")
f"(Score: {model_info.metrics.get_composite_score():.3f})")
else: else:
logger.info("No managed models available, falling back to legacy loading") logger.info("No managed models available, falling back to legacy loading")
# Fallback to original model loading logic # Fallback to original model loading logic
self._load_legacy_models() self._load_legacy_models()
@ -3021,7 +3020,7 @@ class TradingDashboard:
except ImportError: except ImportError:
logger.warning("ModelManager not available, using legacy model loading") logger.warning("ModelManager not available, using legacy model loading")
self._load_legacy_models() self._load_legacy_models()
except Exception as e: except Exception as e:
logger.error(f"Error loading models via ModelManager: {e}") logger.error(f"Error loading models via ModelManager: {e}")
self._load_legacy_models() self._load_legacy_models()
@ -3053,7 +3052,7 @@ class TradingDashboard:
with torch.no_grad(): with torch.no_grad():
if hasattr(feature_matrix, 'shape') and len(feature_matrix.shape) == 2: if hasattr(feature_matrix, 'shape') and len(feature_matrix.shape) == 2:
feature_tensor = torch.FloatTensor(feature_matrix).unsqueeze(0) feature_tensor = torch.FloatTensor(feature_matrix).unsqueeze(0)
else: else:
feature_tensor = torch.FloatTensor(feature_matrix) feature_tensor = torch.FloatTensor(feature_matrix)
prediction = self.model(feature_tensor) prediction = self.model(feature_tensor)
@ -3090,7 +3089,7 @@ class TradingDashboard:
}) })
logger.info(f"Loaded CNN model: {model_file}") logger.info(f"Loaded CNN model: {model_file}")
except Exception as e: except Exception as e:
logger.warning(f"Failed to load CNN model {model_file}: {e}") logger.warning(f"Failed to load CNN model {model_file}: {e}")
# Check for RL models # Check for RL models
@ -3101,12 +3100,12 @@ class TradingDashboard:
try: try:
checkpoint_path = os.path.join(rl_models_dir, model_file) checkpoint_path = os.path.join(rl_models_dir, model_file)
class RLWrapper: class RLWrapper:
def __init__(self, checkpoint_path): def __init__(self, checkpoint_path):
self.checkpoint_path = checkpoint_path self.checkpoint_path = checkpoint_path
self.checkpoint = torch.load(checkpoint_path, map_location='cpu') self.checkpoint = torch.load(checkpoint_path, map_location='cpu')
def predict(self, feature_matrix): def predict(self, feature_matrix):
# Mock RL prediction # Mock RL prediction
if hasattr(feature_matrix, 'shape'): if hasattr(feature_matrix, 'shape'):
state_sum = np.sum(feature_matrix) % 100 state_sum = np.sum(feature_matrix) % 100
@ -3117,7 +3116,7 @@ class TradingDashboard:
action_probs = [0.1, 0.1, 0.8] # BUY action_probs = [0.1, 0.1, 0.8] # BUY
elif state_sum < 30: elif state_sum < 30:
action_probs = [0.8, 0.1, 0.1] # SELL action_probs = [0.8, 0.1, 0.1] # SELL
else: else:
action_probs = [0.2, 0.6, 0.2] # HOLD action_probs = [0.2, 0.6, 0.2] # HOLD
return np.array(action_probs) return np.array(action_probs)
@ -3137,7 +3136,7 @@ class TradingDashboard:
}) })
logger.info(f"Loaded RL model: {model_file}") logger.info(f"Loaded RL model: {model_file}")
except Exception as e: except Exception as e:
logger.warning(f"Failed to load RL model {model_file}: {e}") logger.warning(f"Failed to load RL model {model_file}: {e}")
total_models = sum(len(models) for models in self.available_models.values()) total_models = sum(len(models) for models in self.available_models.values())