try fixing COB MA and COB data quality

2025-08-09 23:03:45 +03:00
parent 87193f3d6f
commit 31a41785d6
5 changed files with 174 additions and 95 deletions
--- a/core/data_provider.py
+++ b/core/data_provider.py
@@ -1059,20 +1059,43 @@ class DataProvider:
            return df
    
    def get_historical_data(self, symbol: str, timeframe: str, limit: int = 1000, refresh: bool = False) -> Optional[pd.DataFrame]:
-        """Get historical OHLCV data from cache only - no external API calls"""
+        """Get historical OHLCV data.
+        - Prefer cached data for low latency.
+        - If cache is empty or refresh=True, fetch real data from exchanges.
+        - Never generate synthetic data.
+        """
        try:
-            # Only return cached data - never trigger external API calls
+            # Serve from cache when available
            if symbol in self.cached_data and timeframe in self.cached_data[symbol]:
                cached_df = self.cached_data[symbol][timeframe]
-                if not cached_df.empty:
-                    # Return requested amount from cached data
+                if not cached_df.empty and not refresh:
                    return cached_df.tail(limit)
-            
-            logger.warning(f"No cached data available for {symbol} {timeframe}")
+
+            # Cache empty or refresh requested: fetch real data now
+            df = self._fetch_from_binance(symbol, timeframe, limit)
+            if (df is None or df.empty):
+                df = self._fetch_from_mexc(symbol, timeframe, limit)
+
+            if df is not None and not df.empty:
+                df = self._ensure_datetime_index(df)
+                # Store/merge into cache
+                if symbol not in self.cached_data:
+                    self.cached_data[symbol] = {}
+                if timeframe not in self.cached_data[symbol] or self.cached_data[symbol][timeframe].empty:
+                    self.cached_data[symbol][timeframe] = df.tail(1500)
+                else:
+                    combined_df = pd.concat([self.cached_data[symbol][timeframe], df], ignore_index=False)
+                    combined_df = combined_df[~combined_df.index.duplicated(keep='last')]
+                    combined_df = combined_df.sort_index()
+                    self.cached_data[symbol][timeframe] = combined_df.tail(1500)
+                logger.info(f"Cached {len(self.cached_data[symbol][timeframe])} candles for {symbol} {timeframe}")
+                return self.cached_data[symbol][timeframe].tail(limit)
+
+            logger.warning(f"No real data available for {symbol} {timeframe} at request time")
            return None

        except Exception as e:
-            logger.error(f"Error getting cached data for {symbol} {timeframe}: {e}")
+            logger.error(f"Error getting historical data for {symbol} {timeframe}: {e}")
            return None
    

--- a/core/standardized_data_provider.py
+++ b/core/standardized_data_provider.py
@@ -271,41 +271,70 @@ class StandardizedDataProvider(DataProvider):
            with self.ma_calculation_lock:
                # Add current imbalance data to history
                self.cob_imbalance_history[symbol].append((timestamp, bid_ask_imbalance))
-                
+
                # Calculate MAs for different timeframes
                ma_results = {'1s': {}, '5s': {}, '15s': {}, '60s': {}}
-                
+
                # Get current price for ±5 bucket calculation
                current_price = self.current_prices.get(symbol.replace('/', '').upper(), 0.0)
                if current_price <= 0:
                    return ma_results
-                
+
                bucket_size = 1.0 if 'ETH' in symbol else 10.0
-                
+
+                # Helper: quantize any floating price to the nearest COB bucket center used in snapshots
+                def quantize_to_bucket(p: float) -> float:
+                    try:
+                        # Align bucket to integer multiples of bucket_size around the rounded current price
+                        base = round(current_price / bucket_size) * bucket_size
+                        steps = round((p - base) / bucket_size)
+                        return base + steps * bucket_size
+                    except Exception:
+                        return p
+
                # Calculate MAs for ±5 buckets around current price
                for i in range(-5, 6):
-                    price = current_price + (i * bucket_size)
+                    raw_price = current_price + (i * bucket_size)
+                    price = quantize_to_bucket(raw_price)
                    if price <= 0:
                        continue
-                    
+
                    # Get historical imbalance data for this price bucket
                    history = self.cob_imbalance_history[symbol]
-                    
+
                    # Calculate different MA periods
                    for period, period_name in [(1, '1s'), (5, '5s'), (15, '15s'), (60, '60s')]:
                        recent_data = []
                        cutoff_time = timestamp - timedelta(seconds=period)
-                        
+
                        for hist_timestamp, hist_imbalance in history:
-                            if hist_timestamp >= cutoff_time and price in hist_imbalance:
+                            if hist_timestamp < cutoff_time:
+                                continue
+                            # Attempt exact price key match; if not found, match nearest bucket key
+                            if price in hist_imbalance:
                                recent_data.append(hist_imbalance[price])
-                        
+                            else:
+                                # Find nearest key within half a bucket
+                                try:
+                                    nearest_key = None
+                                    min_diff = bucket_size / 2.0
+                                    for k in hist_imbalance.keys():
+                                        diff = abs(float(k) - price)
+                                        if diff <= min_diff:
+                                            min_diff = diff
+                                            nearest_key = k
+                                    if nearest_key is not None:
+                                        recent_data.append(hist_imbalance[nearest_key])
+                                except Exception:
+                                    pass
+
                        # Calculate moving average
                        if recent_data:
-                            ma_results[period_name][price] = sum(recent_data) / len(recent_data)
+                            ma_results[period_name][price] = float(sum(recent_data) / len(recent_data))
                        else:
+                            # Respect rule: no synthetic metadata; use 0.0 for unavailable
                            ma_results[period_name][price] = 0.0
-                
+
                return ma_results
                
        except Exception as e: