""" Report Data Crawler Sub-Module This module provides functionality to crawl and compile comprehensive trading reports in the specified format for different trading pairs. It extends the DataProvider with specialized methods for report generation. Enhanced with: - Polymarket bets/odds data for sentiment analysis - Multiple open interest data sources for improved reliability - Real-time market sentiment from prediction markets CRITICAL POLICY: NO SYNTHETIC DATA ALLOWED This module MUST ONLY use real market data from exchanges. NEVER use np.random.*, mock/fake/synthetic data, or placeholder values. If data is unavailable: return None/0/empty, log errors, raise exceptions. """ import logging import pandas as pd import numpy as np import requests import time import json from datetime import datetime, timedelta from typing import Dict, List, Optional, Tuple, Any from dataclasses import dataclass import ta import warnings # Suppress ta library deprecation warnings warnings.filterwarnings("ignore", category=FutureWarning, module="ta") logger = logging.getLogger(__name__) @dataclass class PolymarketData: """Polymarket prediction market data""" market_id: str question: str outcome: str probability: float volume_24h: float liquidity: float last_trade_time: datetime market_type: str # 'crypto', 'election', 'sports', etc. @dataclass class OpenInterestData: """Open interest data from multiple sources""" source: str symbol: str open_interest: float timestamp: datetime change_24h: float change_percent: float @dataclass class ReportData: """Data structure for comprehensive trading report""" symbol: str timestamp: datetime # Current values current_price: float current_ema20: float current_macd: float current_rsi_7: float # Enhanced Open Interest and Funding Rate (multiple sources) open_interest_data: List[OpenInterestData] funding_rate: float # Polymarket sentiment data polymarket_data: List[PolymarketData] market_sentiment_score: float # Intraday series (3-minute intervals, oldest → newest) mid_prices: List[float] ema_20_series: List[float] macd_series: List[float] rsi_7_series: List[float] rsi_14_series: List[float] # Longer-term context (4-hour timeframe) ema_20_4h: float ema_50_4h: float atr_3_4h: float atr_14_4h: float current_volume: float average_volume: float macd_4h_series: List[float] rsi_14_4h_series: List[float] class ReportDataCrawler: """Specialized crawler for compiling comprehensive trading reports""" def __init__(self, data_provider=None): """Initialize the report data crawler Args: data_provider: Instance of DataProvider to use for data fetching """ self.data_provider = data_provider self.session = requests.Session() self.session.headers.update({ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' }) # Cache for API responses to avoid rate limiting self._cache = {} self._cache_timeout = 30 # 30 seconds cache # Polymarket API configuration self.polymarket_base_url = "https://gamma-api.polymarket.com" self.polymarket_graphql_url = f"{self.polymarket_base_url}/graphql" # Open interest data sources self.open_interest_sources = { 'binance': 'https://fapi.binance.com/fapi/v1/openInterest', 'bybit': 'https://api.bybit.com/v5/market/open-interest', 'okx': 'https://www.okx.com/api/v5/public/open-interest', 'coinglass': 'https://open-api.coinglass.com/public/v2/open_interest' } def _get_cached_or_fetch(self, cache_key: str, fetch_func, *args, **kwargs): """Get data from cache or fetch if expired""" now = time.time() if cache_key in self._cache: data, timestamp = self._cache[cache_key] if now - timestamp < self._cache_timeout: return data # Fetch new data data = fetch_func(*args, **kwargs) if data is not None: self._cache[cache_key] = (data, now) return data def _fetch_polymarket_data(self, symbol: str) -> List[PolymarketData]: """Fetch Polymarket prediction market data for cryptocurrency-related markets Args: symbol: Trading pair symbol (e.g., 'BTC/USDT') Returns: List of PolymarketData objects """ try: cache_key = f"polymarket_{symbol}" cached_data = self._get_cached_or_fetch(cache_key, self._fetch_polymarket_raw, symbol) if not cached_data: return [] polymarket_data = [] base_symbol = symbol.split('/')[0].lower() # Extract BTC from BTC/USDT for market in cached_data: try: # Filter for crypto-related markets if any(keyword in market.get('question', '').lower() for keyword in [base_symbol, 'bitcoin', 'ethereum', 'crypto', 'cryptocurrency']): polymarket_data.append(PolymarketData( market_id=market.get('id', ''), question=market.get('question', ''), outcome=market.get('outcome', ''), probability=float(market.get('probability', 0)), volume_24h=float(market.get('volume24h', 0)), liquidity=float(market.get('liquidity', 0)), last_trade_time=datetime.fromisoformat(market.get('lastTradeTime', datetime.now().isoformat())), market_type='crypto' )) except Exception as e: logger.warning(f"Error parsing Polymarket market data: {e}") continue logger.info(f"Fetched {len(polymarket_data)} Polymarket markets for {symbol}") return polymarket_data except Exception as e: logger.error(f"Error fetching Polymarket data for {symbol}: {e}") return [] def _fetch_polymarket_raw(self, symbol: str) -> List[Dict]: """Fetch raw Polymarket data using GraphQL API""" try: # GraphQL query for active markets query = """ query GetMarkets($limit: Int!, $offset: Int!) { markets( limit: $limit offset: $offset orderBy: "volume" orderDirection: "desc" where: { active: true closed: false } ) { id question outcome probability volume24h liquidity lastTradeTime endDate } } """ variables = { "limit": 50, "offset": 0 } payload = { "query": query, "variables": variables } response = self.session.post( self.polymarket_graphql_url, json=payload, timeout=10, headers={'Content-Type': 'application/json'} ) if response.status_code == 200: data = response.json() if 'data' in data and 'markets' in data['data']: return data['data']['markets'] logger.warning(f"Polymarket API returned status {response.status_code}") return [] except Exception as e: logger.error(f"Error fetching raw Polymarket data: {e}") return [] def _calculate_market_sentiment_score(self, polymarket_data: List[PolymarketData], symbol: str) -> float: """Calculate market sentiment score from Polymarket data Args: polymarket_data: List of PolymarketData objects symbol: Trading pair symbol Returns: Sentiment score between -1 (bearish) and 1 (bullish) """ try: if not polymarket_data: return 0.0 base_symbol = symbol.split('/')[0].lower() sentiment_scores = [] for data in polymarket_data: # Weight by volume and liquidity weight = (data.volume_24h + data.liquidity) / 1000000 # Normalize # Extract sentiment from probability if 'above' in data.question.lower() or 'higher' in data.question.lower(): # Bullish sentiment sentiment = (data.probability - 0.5) * 2 # Convert 0-1 to -1 to 1 elif 'below' in data.question.lower() or 'lower' in data.question.lower(): # Bearish sentiment sentiment = (0.5 - data.probability) * 2 # Convert 0-1 to -1 to 1 else: # Neutral or unclear sentiment sentiment = 0.0 sentiment_scores.append(sentiment * weight) if sentiment_scores: # Weighted average sentiment total_weight = sum((data.volume_24h + data.liquidity) / 1000000 for data in polymarket_data) if total_weight > 0: return sum(sentiment_scores) / total_weight return 0.0 except Exception as e: logger.error(f"Error calculating market sentiment score: {e}") return 0.0 def _fetch_open_interest_from_multiple_sources(self, symbol: str) -> List[OpenInterestData]: """Fetch open interest data from multiple sources for improved reliability Args: symbol: Trading pair symbol (e.g., 'BTC/USDT') Returns: List of OpenInterestData objects from different sources """ try: open_interest_data = [] binance_symbol = symbol.replace('/', '') # Source 1: Binance try: cache_key = f"binance_oi_{symbol}" binance_data = self._get_cached_or_fetch(cache_key, self._fetch_binance_open_interest, binance_symbol) if binance_data: open_interest_data.append(OpenInterestData( source='binance', symbol=symbol, open_interest=binance_data['openInterest'], timestamp=datetime.now(), change_24h=binance_data.get('change24h', 0), change_percent=binance_data.get('changePercent', 0) )) except Exception as e: logger.warning(f"Failed to fetch Binance open interest: {e}") # Source 2: Bybit try: cache_key = f"bybit_oi_{symbol}" bybit_data = self._get_cached_or_fetch(cache_key, self._fetch_bybit_open_interest, symbol) if bybit_data: open_interest_data.append(OpenInterestData( source='bybit', symbol=symbol, open_interest=bybit_data['openInterest'], timestamp=datetime.now(), change_24h=bybit_data.get('change24h', 0), change_percent=bybit_data.get('changePercent', 0) )) except Exception as e: logger.warning(f"Failed to fetch Bybit open interest: {e}") # Source 3: OKX try: cache_key = f"okx_oi_{symbol}" okx_data = self._get_cached_or_fetch(cache_key, self._fetch_okx_open_interest, symbol) if okx_data: open_interest_data.append(OpenInterestData( source='okx', symbol=symbol, open_interest=okx_data['openInterest'], timestamp=datetime.now(), change_24h=okx_data.get('change24h', 0), change_percent=okx_data.get('changePercent', 0) )) except Exception as e: logger.warning(f"Failed to fetch OKX open interest: {e}") logger.info(f"Fetched open interest data from {len(open_interest_data)} sources for {symbol}") return open_interest_data except Exception as e: logger.error(f"Error fetching open interest from multiple sources for {symbol}: {e}") return [] def _fetch_binance_open_interest(self, symbol: str) -> Dict: """Fetch open interest data from Binance""" try: url = "https://fapi.binance.com/fapi/v1/openInterest" params = {'symbol': symbol} response = self.session.get(url, params=params, timeout=10) if response.status_code == 200: return response.json() return {} except Exception as e: logger.error(f"Error fetching Binance open interest: {e}") return {} def _fetch_bybit_open_interest(self, symbol: str) -> Dict: """Fetch open interest data from Bybit""" try: # Convert BTC/USDT to BTCUSDT for Bybit bybit_symbol = symbol.replace('/', '') url = "https://api.bybit.com/v5/market/open-interest" params = { 'category': 'linear', 'symbol': bybit_symbol, 'intervalTime': '5min' } response = self.session.get(url, params=params, timeout=10) if response.status_code == 200: data = response.json() if 'result' in data and 'list' in data['result'] and data['result']['list']: latest = data['result']['list'][0] return { 'openInterest': float(latest['openInterest']), 'change24h': float(latest.get('change24h', 0)), 'changePercent': float(latest.get('changePercent', 0)) } return {} except Exception as e: logger.error(f"Error fetching Bybit open interest: {e}") return {} def _fetch_okx_open_interest(self, symbol: str) -> Dict: """Fetch open interest data from OKX""" try: # Convert BTC/USDT to BTC-USDT-SWAP for OKX okx_symbol = symbol.replace('/', '-') + '-SWAP' url = "https://www.okx.com/api/v5/public/open-interest" params = {'instId': okx_symbol} response = self.session.get(url, params=params, timeout=10) if response.status_code == 200: data = response.json() if 'data' in data and data['data']: latest = data['data'][0] return { 'openInterest': float(latest['oi']), 'change24h': float(latest.get('oiCcy', 0)), 'changePercent': 0 # OKX doesn't provide percentage change } return {} except Exception as e: logger.error(f"Error fetching OKX open interest: {e}") return {} def _fetch_funding_rate(self, symbol: str) -> float: """Fetch funding rate from Binance""" try: binance_symbol = symbol.replace('/', '') url = "https://fapi.binance.com/fapi/v1/premiumIndex" params = {'symbol': binance_symbol} response = self.session.get(url, params=params, timeout=10) if response.status_code == 200: data = response.json() return float(data.get('lastFundingRate', 0)) return 0.0 except Exception as e: logger.error(f"Error fetching funding rate: {e}") return 0.0 def _calculate_technical_indicators(self, df: pd.DataFrame) -> Dict[str, Any]: """Calculate technical indicators for the given dataframe""" try: if df.empty or len(df) < 5: logger.warning("Insufficient data for technical indicators") return {} indicators = {} # EMA 20 (need at least 20 periods) if len(df) >= 20: indicators['ema_20'] = ta.trend.ema_indicator(df['close'], window=20) else: indicators['ema_20'] = pd.Series(index=df.index, dtype=float) # MACD (need at least 26 periods) if len(df) >= 26: macd = ta.trend.MACD(df['close']) indicators['macd'] = macd.macd() indicators['macd_signal'] = macd.macd_signal() indicators['macd_histogram'] = macd.macd_diff() else: indicators['macd'] = pd.Series(index=df.index, dtype=float) indicators['macd_signal'] = pd.Series(index=df.index, dtype=float) indicators['macd_histogram'] = pd.Series(index=df.index, dtype=float) # RSI (7 and 14 period) - need at least 14 periods if len(df) >= 14: indicators['rsi_7'] = self._calculate_rsi(df['close'], period=7) indicators['rsi_14'] = self._calculate_rsi(df['close'], period=14) else: indicators['rsi_7'] = pd.Series(index=df.index, dtype=float) indicators['rsi_14'] = pd.Series(index=df.index, dtype=float) # ATR (need at least 14 periods) if len(df) >= 14: indicators['atr_3'] = ta.volatility.average_true_range(df['high'], df['low'], df['close'], window=3) indicators['atr_14'] = ta.volatility.average_true_range(df['high'], df['low'], df['close'], window=14) else: indicators['atr_3'] = pd.Series(index=df.index, dtype=float) indicators['atr_14'] = pd.Series(index=df.index, dtype=float) # EMA 50 (need at least 50 periods) if len(df) >= 50: indicators['ema_50'] = ta.trend.ema_indicator(df['close'], window=50) else: indicators['ema_50'] = pd.Series(index=df.index, dtype=float) return indicators except Exception as e: logger.error(f"Error calculating technical indicators: {e}") return {} def _calculate_rsi(self, prices: pd.Series, period: int = 14) -> pd.Series: """Calculate RSI using our own implementation""" try: delta = prices.diff() gain = (delta.where(delta > 0, 0)).rolling(window=period).mean() loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean() rs = gain / loss rsi = 100 - (100 / (1 + rs)) return rsi except Exception as e: logger.error(f"Error calculating RSI: {e}") return pd.Series(index=prices.index, dtype=float) def _get_intraday_series(self, symbol: str, limit: int = 10) -> Dict[str, List[float]]: """Get intraday series data (3-minute intervals)""" try: if not self.data_provider: logger.error("No data provider available") return {} # Get 1-minute data and resample to 3-minute intervals df_1m = self.data_provider.get_historical_data(symbol, '1m', limit=limit*3, refresh=True) if df_1m is None or df_1m.empty: logger.warning(f"No 1-minute data available for {symbol}") return {} # Resample to 3-minute intervals df_3m = df_1m.resample('3min').agg({ 'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum' }).dropna() if len(df_3m) < limit: logger.warning(f"Insufficient 3-minute data: {len(df_3m)} < {limit}") return {} # Take the last 'limit' periods df_3m = df_3m.tail(limit) # Calculate technical indicators indicators = self._calculate_technical_indicators(df_3m) # Extract series data series_data = { 'mid_prices': df_3m['close'].tolist(), 'ema_20_series': indicators.get('ema_20', pd.Series()).tolist(), 'macd_series': indicators.get('macd', pd.Series()).tolist(), 'rsi_7_series': indicators.get('rsi_7', pd.Series()).tolist(), 'rsi_14_series': indicators.get('rsi_14', pd.Series()).tolist() } return series_data except Exception as e: logger.error(f"Error getting intraday series for {symbol}: {e}") return {} def _get_longer_term_context(self, symbol: str, limit: int = 10) -> Dict[str, Any]: """Get longer-term context data (4-hour timeframe)""" try: if not self.data_provider: logger.error("No data provider available") return {} # Get 4-hour data df_4h = self.data_provider.get_historical_data(symbol, '4h', limit=limit, refresh=True) if df_4h is None or df_4h.empty: logger.warning(f"No 4-hour data available for {symbol}") return {} # Calculate technical indicators indicators = self._calculate_technical_indicators(df_4h) # Calculate volume metrics current_volume = df_4h['volume'].iloc[-1] if not df_4h.empty else 0 average_volume = df_4h['volume'].mean() if not df_4h.empty else 0 context_data = { 'ema_20_4h': indicators.get('ema_20', pd.Series()).iloc[-1] if not indicators.get('ema_20', pd.Series()).empty else 0, 'ema_50_4h': indicators.get('ema_50', pd.Series()).iloc[-1] if not indicators.get('ema_50', pd.Series()).empty else 0, 'atr_3_4h': indicators.get('atr_3', pd.Series()).iloc[-1] if not indicators.get('atr_3', pd.Series()).empty else 0, 'atr_14_4h': indicators.get('atr_14', pd.Series()).iloc[-1] if not indicators.get('atr_14', pd.Series()).empty else 0, 'current_volume': current_volume, 'average_volume': average_volume, 'macd_4h_series': indicators.get('macd', pd.Series()).tolist(), 'rsi_14_4h_series': indicators.get('rsi_14', pd.Series()).tolist() } return context_data except Exception as e: logger.error(f"Error getting longer-term context for {symbol}: {e}") return {} def crawl_report_data(self, symbol: str) -> Optional[ReportData]: """Crawl comprehensive report data for a trading pair Args: symbol: Trading pair symbol (e.g., 'BTC/USDT') Returns: ReportData object with all required information, or None if failed """ try: logger.info(f"Crawling report data for {symbol}") if not self.data_provider: logger.error("No data provider available for crawling") return None # Force refresh data to ensure we have current information logger.info(f"Refreshing data for {symbol}...") for timeframe in ['1m', '1h', '4h']: try: self.data_provider.get_historical_data(symbol, timeframe, limit=100, refresh=True) except Exception as e: logger.warning(f"Could not refresh {timeframe} data for {symbol}: {e}") # Get current price and basic data current_price = self.data_provider.get_current_price(symbol) if current_price is None: logger.error(f"Could not get current price for {symbol}") return None # Get 1-minute data for current indicators df_1m = self.data_provider.get_historical_data(symbol, '1m', limit=50, refresh=True) if df_1m is None or df_1m.empty: logger.error(f"Could not get 1-minute data for {symbol}") return None # Calculate current technical indicators indicators = self._calculate_technical_indicators(df_1m) # Get current indicator values current_ema20 = indicators.get('ema_20', pd.Series()).iloc[-1] if not indicators.get('ema_20', pd.Series()).empty else 0 current_macd = indicators.get('macd', pd.Series()).iloc[-1] if not indicators.get('macd', pd.Series()).empty else 0 current_rsi_7 = indicators.get('rsi_7', pd.Series()).iloc[-1] if not indicators.get('rsi_7', pd.Series()).empty else 0 # Fetch enhanced open interest data from multiple sources open_interest_data = self._fetch_open_interest_from_multiple_sources(symbol) # Fetch funding rate (still using Binance as primary source) funding_rate = self._fetch_funding_rate(symbol) # Fetch Polymarket sentiment data polymarket_data = self._fetch_polymarket_data(symbol) market_sentiment_score = self._calculate_market_sentiment_score(polymarket_data, symbol) # Get intraday series data intraday_data = self._get_intraday_series(symbol, limit=10) # Get longer-term context longer_term_data = self._get_longer_term_context(symbol, limit=10) # Create ReportData object report_data = ReportData( symbol=symbol, timestamp=datetime.now(), current_price=current_price, current_ema20=current_ema20, current_macd=current_macd, current_rsi_7=current_rsi_7, open_interest_data=open_interest_data, funding_rate=funding_rate, polymarket_data=polymarket_data, market_sentiment_score=market_sentiment_score, mid_prices=intraday_data.get('mid_prices', []), ema_20_series=intraday_data.get('ema_20_series', []), macd_series=intraday_data.get('macd_series', []), rsi_7_series=intraday_data.get('rsi_7_series', []), rsi_14_series=intraday_data.get('rsi_14_series', []), ema_20_4h=longer_term_data.get('ema_20_4h', 0), ema_50_4h=longer_term_data.get('ema_50_4h', 0), atr_3_4h=longer_term_data.get('atr_3_4h', 0), atr_14_4h=longer_term_data.get('atr_14_4h', 0), current_volume=longer_term_data.get('current_volume', 0), average_volume=longer_term_data.get('average_volume', 0), macd_4h_series=longer_term_data.get('macd_4h_series', []), rsi_14_4h_series=longer_term_data.get('rsi_14_4h_series', []) ) logger.info(f"Successfully crawled report data for {symbol}") return report_data except Exception as e: logger.error(f"Error crawling report data for {symbol}: {e}") return None def generate_report_text(self, report_data: ReportData) -> str: """Generate formatted report text in the specified format Args: report_data: ReportData object with all required information Returns: Formatted report string """ try: if not report_data: return "Error: No report data available" # Format current values current_values = ( f"current_price = {report_data.current_price:.1f}, " f"current_ema20 = {report_data.current_ema20:.3f}, " f"current_macd = {report_data.current_macd:.3f}, " f"current_rsi (7 period) = {report_data.current_rsi_7:.3f}" ) # Format enhanced open interest data from multiple sources oi_section = "Open Interest (Multiple Sources):\n" if report_data.open_interest_data: for oi_data in report_data.open_interest_data: try: open_interest_val = float(oi_data.open_interest) if oi_data.open_interest else 0 change_percent_val = float(oi_data.change_percent) if oi_data.change_percent else 0 oi_section += f" {oi_data.source.upper()}: {open_interest_val:,.2f} " if change_percent_val != 0: oi_section += f"(24h: {change_percent_val:+.2f}%)\n" else: oi_section += "\n" except (ValueError, TypeError) as e: logger.warning(f"Error formatting open interest data: {e}") oi_section += f" {oi_data.source.upper()}: Error formatting data\n" else: oi_section += " No open interest data available\n" # Format funding rate funding_section = f"Funding Rate: {report_data.funding_rate:.2e}\n" # Format Polymarket sentiment data sentiment_section = "" if report_data.polymarket_data: sentiment_section = f"\nPolymarket Sentiment Analysis:\n" sentiment_section += f"Market Sentiment Score: {report_data.market_sentiment_score:.3f} " if report_data.market_sentiment_score > 0.1: sentiment_section += "(Bullish)\n" elif report_data.market_sentiment_score < -0.1: sentiment_section += "(Bearish)\n" else: sentiment_section += "(Neutral)\n" sentiment_section += f"Active Markets: {len(report_data.polymarket_data)}\n" for market in report_data.polymarket_data[:3]: # Show top 3 markets sentiment_section += f" - {market.question[:50]}... (Prob: {market.probability:.2f})\n" else: sentiment_section = "\nPolymarket Sentiment: No data available\n" # Format intraday series intraday_section = f"""Intraday series (by minute, oldest → latest): Mid prices: {report_data.mid_prices} EMA indicators (20‑period): {report_data.ema_20_series} MACD indicators: {report_data.macd_series} RSI indicators (7‑Period): {report_data.rsi_7_series} RSI indicators (14‑Period): {report_data.rsi_14_series}""" # Format longer-term context longer_term_section = f"""Longer‑term context (4‑hour timeframe): 20‑Period EMA: {report_data.ema_20_4h:.3f} vs. 50‑Period EMA: {report_data.ema_50_4h:.3f} 3‑Period ATR: {report_data.atr_3_4h:.3f} vs. 14‑Period ATR: {report_data.atr_14_4h:.3f} Current Volume: {report_data.current_volume:.3f} vs. Average Volume: {report_data.average_volume:.3f} MACD indicators: {report_data.macd_4h_series} RSI indicators (14‑Period): {report_data.rsi_14_4h_series}""" # Combine all sections report_text = f"""ALL OF THE PRICE OR SIGNAL DATA BELOW IS ORDERED: OLDEST → NEWEST Timeframes note: Unless stated otherwise in a section title, intraday series are provided at 3‑minute intervals. If a coin uses a different interval, it is explicitly stated in that coin's section. {current_values} In addition, here is the latest {report_data.symbol} open interest and funding rate for perps (the instrument you are trading): {oi_section} {funding_section}{sentiment_section} {intraday_section} {longer_term_section}""" return report_text except Exception as e: logger.error(f"Error generating report text: {e}") return f"Error generating report: {e}" def crawl_and_generate_report(self, symbol: str) -> Optional[str]: """Crawl data and generate formatted report in one call Args: symbol: Trading pair symbol (e.g., 'BTC/USDT') Returns: Formatted report string, or None if failed """ try: report_data = self.crawl_report_data(symbol) if report_data: return self.generate_report_text(report_data) else: return None except Exception as e: logger.error(f"Error in crawl_and_generate_report for {symbol}: {e}") return None