stored

2025-10-23 00:21:22 +03:00
parent dbab0283c9
commit b0771ff34e
8 changed files with 451 additions and 125 deletions
--- a/core/data_provider.py
+++ b/core/data_provider.py
@@ -1269,6 +1269,110 @@ class DataProvider:
            logger.error(f"Error getting price range for {symbol}: {e}")
            return None

+    def get_historical_data_replay(self, symbol: str, start_time: datetime, end_time: datetime, 
+                                 timeframes: List[str] = None) -> Dict[str, pd.DataFrame]:
+        """
+        Get historical data for a specific time period for replay/training purposes.
+        This method allows "going back in time" to replay market moves.
+        
+        Args:
+            symbol: Trading symbol (e.g., 'ETH/USDT')
+            start_time: Start of the time period
+            end_time: End of the time period  
+            timeframes: List of timeframes to fetch (default: ['1s', '1m', '1h', '1d'])
+            
+        Returns:
+            Dict mapping timeframe to DataFrame with OHLCV data for the period
+        """
+        if timeframes is None:
+            timeframes = ['1s', '1m', '1h', '1d']
+            
+        logger.info(f"Replaying historical data for {symbol} from {start_time} to {end_time}")
+        
+        replay_data = {}
+        
+        for timeframe in timeframes:
+            try:
+                # Calculate how many candles we need for the time period
+                if timeframe == '1s':
+                    limit = int((end_time - start_time).total_seconds()) + 100  # Extra buffer
+                elif timeframe == '1m':
+                    limit = int((end_time - start_time).total_seconds() / 60) + 10
+                elif timeframe == '1h':
+                    limit = int((end_time - start_time).total_seconds() / 3600) + 5
+                elif timeframe == '1d':
+                    limit = int((end_time - start_time).total_seconds() / 86400) + 2
+                else:
+                    limit = 1000
+                
+                # Fetch historical data
+                df = self.get_historical_data(symbol, timeframe, limit=limit, refresh=True)
+                
+                if df is not None and not df.empty:
+                    # Filter to the exact time period
+                    df_filtered = df[(df.index >= start_time) & (df.index <= end_time)]
+                    
+                    if not df_filtered.empty:
+                        replay_data[timeframe] = df_filtered
+                        logger.info(f"  {timeframe}: {len(df_filtered)} candles in replay period")
+                    else:
+                        logger.warning(f"  {timeframe}: No data in replay period")
+                        replay_data[timeframe] = pd.DataFrame()
+                else:
+                    logger.warning(f"  {timeframe}: No data available")
+                    replay_data[timeframe] = pd.DataFrame()
+                    
+            except Exception as e:
+                logger.error(f"Error fetching {timeframe} data for replay: {e}")
+                replay_data[timeframe] = pd.DataFrame()
+        
+        logger.info(f"Historical replay data prepared: {len([tf for tf, df in replay_data.items() if not df.empty])} timeframes")
+        return replay_data
+    
+    def get_market_state_at_time(self, symbol: str, timestamp: datetime, 
+                                context_window_minutes: int = 5) -> Dict[str, Any]:
+        """
+        Get complete market state at a specific point in time for training.
+        This includes OHLCV data ±context_window_minutes around the timestamp.
+        
+        Args:
+            symbol: Trading symbol
+            timestamp: The specific point in time
+            context_window_minutes: Minutes before/after the timestamp to include
+            
+        Returns:
+            Dict with market state data in training format
+        """
+        try:
+            start_time = timestamp - timedelta(minutes=context_window_minutes)
+            end_time = timestamp + timedelta(minutes=context_window_minutes)
+            
+            logger.info(f"Getting market state for {symbol} at {timestamp} (±{context_window_minutes}min)")
+            
+            # Get replay data for the time window
+            replay_data = self.get_historical_data_replay(symbol, start_time, end_time)
+            
+            # Convert to training format
+            market_state = {}
+            
+            for timeframe, df in replay_data.items():
+                if not df.empty:
+                    market_state[f'ohlcv_{timeframe}'] = {
+                        'timestamps': df.index.strftime('%Y-%m-%d %H:%M:%S').tolist(),
+                        'open': df['open'].tolist(),
+                        'high': df['high'].tolist(),
+                        'low': df['low'].tolist(),
+                        'close': df['close'].tolist(),
+                        'volume': df['volume'].tolist()
+                    }
+            
+            logger.info(f"Market state prepared with {len(market_state)} timeframes")
+            return market_state
+            
+        except Exception as e:
+            logger.error(f"Error getting market state at time: {e}")
+            return {}
+
    def get_historical_data(self, symbol: str, timeframe: str, limit: int = 1000, refresh: bool = False) -> Optional[pd.DataFrame]:
        """Get historical OHLCV data.
        - Prefer cached data for low latency.