Files
gogo2/.kiro/specs/1.multi-modal-trading-system/DATA_PROVIDER_QUICK_REFERENCE.md
Dobromir Popov 0c28a0997c more cleanup
2025-10-13 16:11:06 +03:00

11 KiB

Data Provider Quick Reference Guide

Overview

Quick reference for using the multi-layered data provider system in the multi-modal trading system.

Architecture Layers

COBY System → Core DataProvider → StandardizedDataProvider → Models

Getting Started

Basic Usage

from core.standardized_data_provider import StandardizedDataProvider

# Initialize provider
provider = StandardizedDataProvider(
    symbols=['ETH/USDT', 'BTC/USDT'],
    timeframes=['1s', '1m', '1h', '1d']
)

# Start real-time processing
provider.start_real_time_processing()

# Get standardized input for models
base_input = provider.get_base_data_input('ETH/USDT')

# Validate data quality
if base_input and base_input.validate():
    # Use data for model inference
    pass

BaseDataInput Structure

@dataclass
class BaseDataInput:
    symbol: str                                    # 'ETH/USDT'
    timestamp: datetime                            # Current time
    
    # OHLCV Data (300 frames each)
    ohlcv_1s: List[OHLCVBar]                      # 1-second bars
    ohlcv_1m: List[OHLCVBar]                      # 1-minute bars
    ohlcv_1h: List[OHLCVBar]                      # 1-hour bars
    ohlcv_1d: List[OHLCVBar]                      # 1-day bars
    btc_ohlcv_1s: List[OHLCVBar]                  # BTC reference
    
    # COB Data
    cob_data: Optional[COBData]                    # Order book data
    
    # Technical Analysis
    technical_indicators: Dict[str, float]         # RSI, MACD, etc.
    pivot_points: List[PivotPoint]                 # Williams pivots
    
    # Cross-Model Feeding
    last_predictions: Dict[str, ModelOutput]       # Other model outputs
    
    # Market Microstructure
    market_microstructure: Dict[str, Any]          # Order flow, etc.

Common Operations

Get Current Price

# Multiple fallback methods
price = provider.get_current_price('ETH/USDT')

# Direct API call with cache
price = provider.get_live_price_from_api('ETH/USDT')

Get Historical Data

# Get OHLCV data
df = provider.get_historical_data(
    symbol='ETH/USDT',
    timeframe='1h',
    limit=300
)

Get COB Data

# Get latest COB snapshot
cob_data = provider.get_latest_cob_data('ETH/USDT')

# Get COB imbalance metrics
imbalance = provider.get_current_cob_imbalance('ETH/USDT')

Get Pivot Points

# Get Williams Market Structure pivots
pivots = provider.calculate_williams_pivot_points('ETH/USDT')

Store Model Output

from core.data_models import ModelOutput

# Create model output
output = ModelOutput(
    model_type='cnn',
    model_name='williams_cnn_v2',
    symbol='ETH/USDT',
    timestamp=datetime.now(),
    confidence=0.85,
    predictions={
        'action': 'BUY',
        'action_confidence': 0.85,
        'direction_vector': 0.7
    },
    hidden_states={'conv_features': tensor(...)},
    metadata={'version': '2.1'}
)

# Store for cross-model feeding
provider.store_model_output(output)

Get Model Outputs

# Get all model outputs for a symbol
outputs = provider.get_model_outputs('ETH/USDT')

# Access specific model output
cnn_output = outputs.get('williams_cnn_v2')

Data Validation

Validate BaseDataInput

base_input = provider.get_base_data_input('ETH/USDT')

if base_input:
    # Check validation
    is_valid = base_input.validate()
    
    # Check data completeness
    if len(base_input.ohlcv_1s) >= 100:
        # Sufficient data for inference
        pass

Check Data Quality

# Get data completeness metrics
if base_input:
    ohlcv_complete = all([
        len(base_input.ohlcv_1s) >= 100,
        len(base_input.ohlcv_1m) >= 100,
        len(base_input.ohlcv_1h) >= 100,
        len(base_input.ohlcv_1d) >= 100
    ])
    
    cob_complete = base_input.cob_data is not None
    
    # Overall quality score (implement in Task 2.3)
    # quality_score = base_input.data_quality_score()

COB Data Access

COB Data Structure

@dataclass
class COBData:
    symbol: str
    timestamp: datetime
    current_price: float
    bucket_size: float                             # $1 ETH, $10 BTC
    
    # Price Buckets (±20 around current price)
    price_buckets: Dict[float, Dict[str, float]]   # {price: {bid_vol, ask_vol}}
    bid_ask_imbalance: Dict[float, float]          # {price: imbalance}
    
    # Moving Averages (±5 buckets)
    ma_1s_imbalance: Dict[float, float]
    ma_5s_imbalance: Dict[float, float]
    ma_15s_imbalance: Dict[float, float]
    ma_60s_imbalance: Dict[float, float]
    
    # Order Flow
    order_flow_metrics: Dict[str, float]

Access COB Buckets

if base_input.cob_data:
    cob = base_input.cob_data
    
    # Get current price
    current_price = cob.current_price
    
    # Get bid/ask volumes for specific price
    price_level = current_price + cob.bucket_size  # One bucket up
    if price_level in cob.price_buckets:
        bucket = cob.price_buckets[price_level]
        bid_volume = bucket.get('bid_volume', 0)
        ask_volume = bucket.get('ask_volume', 0)
    
    # Get imbalance for price level
    imbalance = cob.bid_ask_imbalance.get(price_level, 0)
    
    # Get moving averages
    ma_1s = cob.ma_1s_imbalance.get(price_level, 0)
    ma_5s = cob.ma_5s_imbalance.get(price_level, 0)

Subscriber Pattern

Subscribe to Data Updates

def my_data_callback(tick):
    """Handle real-time tick data"""
    print(f"Received tick: {tick.symbol} @ {tick.price}")

# Subscribe to data updates
subscriber_id = provider.subscribe_to_data(
    callback=my_data_callback,
    symbols=['ETH/USDT'],
    subscriber_name='my_model'
)

# Unsubscribe when done
provider.unsubscribe_from_data(subscriber_id)

Configuration

Key Configuration Options

# config.yaml
data_provider:
  symbols:
    - ETH/USDT
    - BTC/USDT
  
  timeframes:
    - 1s
    - 1m
    - 1h
    - 1d
  
  cache:
    enabled: true
    candles_per_timeframe: 1500
  
  cob:
    enabled: true
    bucket_sizes:
      ETH/USDT: 1.0    # $1 buckets
      BTC/USDT: 10.0   # $10 buckets
    price_ranges:
      ETH/USDT: 5.0    # ±$5 for imbalance
      BTC/USDT: 50.0   # ±$50 for imbalance
    
  websocket:
    update_speed: 100ms
    max_depth: 1000
    reconnect_delay: 1.0
    max_reconnect_delay: 60.0

Performance Tips

Optimize Data Access

# Cache BaseDataInput for multiple models
base_input = provider.get_base_data_input('ETH/USDT')

# Use cached data for all models
cnn_input = base_input  # CNN uses full data
rl_input = base_input   # RL uses full data + CNN outputs

# Avoid repeated calls
# BAD: base_input = provider.get_base_data_input('ETH/USDT')  # Called multiple times
# GOOD: Cache and reuse

Monitor Performance

# Check subscriber statistics
stats = provider.distribution_stats

print(f"Total ticks received: {stats['total_ticks_received']}")
print(f"Total ticks distributed: {stats['total_ticks_distributed']}")
print(f"Distribution errors: {stats['distribution_errors']}")

Troubleshooting

Common Issues

1. No Data Available

base_input = provider.get_base_data_input('ETH/USDT')

if base_input is None:
    # Check if data provider is started
    if not provider.data_maintenance_active:
        provider.start_automatic_data_maintenance()
    
    # Check if COB collection is started
    if not provider.cob_collection_active:
        provider.start_cob_collection()

2. Incomplete Data

if base_input:
    # Check frame counts
    print(f"1s frames: {len(base_input.ohlcv_1s)}")
    print(f"1m frames: {len(base_input.ohlcv_1m)}")
    print(f"1h frames: {len(base_input.ohlcv_1h)}")
    print(f"1d frames: {len(base_input.ohlcv_1d)}")
    
    # Wait for data to accumulate
    if len(base_input.ohlcv_1s) < 100:
        print("Waiting for more data...")
        time.sleep(60)  # Wait 1 minute

3. COB Data Missing

if base_input and base_input.cob_data is None:
    # Check COB collection status
    if not provider.cob_collection_active:
        provider.start_cob_collection()
    
    # Check WebSocket status
    if hasattr(provider, 'enhanced_cob_websocket'):
        ws = provider.enhanced_cob_websocket
        status = ws.status.get('ETH/USDT')
        print(f"WebSocket connected: {status.connected}")
        print(f"Last message: {status.last_message_time}")

4. Price Data Stale

# Force refresh price
price = provider.get_live_price_from_api('ETH/USDT')

# Check cache freshness
if 'ETH/USDT' in provider.live_price_cache:
    cached_price, timestamp = provider.live_price_cache['ETH/USDT']
    age = datetime.now() - timestamp
    print(f"Price cache age: {age.total_seconds()}s")

Best Practices

1. Always Validate Data

base_input = provider.get_base_data_input('ETH/USDT')

if base_input and base_input.validate():
    # Safe to use for inference
    model_output = model.predict(base_input)
else:
    # Log and skip inference
    logger.warning("Invalid or incomplete data, skipping inference")

2. Handle Missing Data Gracefully

# Never use synthetic data
if base_input is None:
    logger.error("No data available")
    return None  # Don't proceed with inference

# Check specific components
if base_input.cob_data is None:
    logger.warning("COB data unavailable, using OHLCV only")
    # Proceed with reduced features or skip

3. Store Model Outputs

# Always store outputs for cross-model feeding
output = model.predict(base_input)
provider.store_model_output(output)

# Other models can now access this output

4. Monitor Data Quality

# Implement quality checks
def check_data_quality(base_input):
    if not base_input:
        return 0.0
    
    score = 0.0
    
    # OHLCV completeness (40%)
    ohlcv_score = min(1.0, len(base_input.ohlcv_1s) / 300) * 0.4
    score += ohlcv_score
    
    # COB availability (30%)
    cob_score = 0.3 if base_input.cob_data else 0.0
    score += cob_score
    
    # Pivot points (20%)
    pivot_score = 0.2 if base_input.pivot_points else 0.0
    score += pivot_score
    
    # Freshness (10%)
    age = (datetime.now() - base_input.timestamp).total_seconds()
    freshness_score = max(0, 1.0 - age / 60) * 0.1  # Decay over 1 minute
    score += freshness_score
    
    return score

# Use quality score
quality = check_data_quality(base_input)
if quality < 0.8:
    logger.warning(f"Low data quality: {quality:.2f}")

File Locations

  • Core DataProvider: core/data_provider.py
  • Standardized Provider: core/standardized_data_provider.py
  • Enhanced COB WebSocket: core/enhanced_cob_websocket.py
  • Williams Market Structure: core/williams_market_structure.py
  • Data Models: core/data_models.py
  • Model Output Manager: core/model_output_manager.py
  • COBY System: COBY/ directory

Additional Resources

  • Requirements: .kiro/specs/1.multi-modal-trading-system/requirements.md
  • Design: .kiro/specs/1.multi-modal-trading-system/design.md
  • Tasks: .kiro/specs/1.multi-modal-trading-system/tasks.md
  • Audit Summary: .kiro/specs/1.multi-modal-trading-system/AUDIT_SUMMARY.md

Last Updated: January 9, 2025
Version: 1.0