Files
gogo2/COBY/tests/test_integration_pipeline.py
Dobromir Popov 622d059aae 18: tests, fixes
2025-08-05 14:11:49 +03:00

485 lines
18 KiB
Python

"""
Integration tests for complete data pipeline from exchanges to storage.
"""
import pytest
import asyncio
import time
from datetime import datetime, timezone
from unittest.mock import Mock, AsyncMock, patch
from typing import List, Dict, Any
from ..connectors.binance_connector import BinanceConnector
from ..processing.data_processor import DataProcessor
from ..aggregation.aggregation_engine import AggregationEngine
from ..storage.timescale_manager import TimescaleManager
from ..caching.redis_manager import RedisManager
from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
from ..utils.logging import get_logger
logger = get_logger(__name__)
class TestDataPipelineIntegration:
"""Test complete data pipeline integration"""
@pytest.fixture
async def mock_components(self):
"""Setup mock components for testing"""
# Mock exchange connector
connector = Mock(spec=BinanceConnector)
connector.exchange_name = "binance"
connector.connect = AsyncMock(return_value=True)
connector.disconnect = AsyncMock()
connector.subscribe_orderbook = AsyncMock()
connector.subscribe_trades = AsyncMock()
# Mock data processor
processor = Mock(spec=DataProcessor)
processor.process_orderbook = Mock()
processor.process_trade = Mock()
processor.validate_data = Mock(return_value=True)
# Mock aggregation engine
aggregator = Mock(spec=AggregationEngine)
aggregator.aggregate_orderbook = Mock()
aggregator.create_heatmap = Mock()
# Mock storage manager
storage = Mock(spec=TimescaleManager)
storage.store_orderbook = AsyncMock(return_value=True)
storage.store_trade = AsyncMock(return_value=True)
storage.is_connected = Mock(return_value=True)
# Mock cache manager
cache = Mock(spec=RedisManager)
cache.set = AsyncMock(return_value=True)
cache.get = AsyncMock(return_value=None)
cache.is_connected = Mock(return_value=True)
return {
'connector': connector,
'processor': processor,
'aggregator': aggregator,
'storage': storage,
'cache': cache
}
@pytest.fixture
def sample_orderbook(self):
"""Create sample order book data"""
return OrderBookSnapshot(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
bids=[
PriceLevel(price=50000.0, size=1.5),
PriceLevel(price=49990.0, size=2.0),
PriceLevel(price=49980.0, size=1.0)
],
asks=[
PriceLevel(price=50010.0, size=1.2),
PriceLevel(price=50020.0, size=1.8),
PriceLevel(price=50030.0, size=0.8)
]
)
@pytest.fixture
def sample_trade(self):
"""Create sample trade data"""
return TradeEvent(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
price=50005.0,
size=0.5,
side="buy",
trade_id="12345"
)
@pytest.mark.asyncio
async def test_complete_orderbook_pipeline(self, mock_components, sample_orderbook):
"""Test complete order book processing pipeline"""
components = mock_components
# Setup processor to return processed data
components['processor'].process_orderbook.return_value = sample_orderbook
# Simulate pipeline flow
# 1. Receive data from exchange
raw_data = {"symbol": "BTCUSDT", "bids": [], "asks": []}
# 2. Process data
processed_data = components['processor'].process_orderbook(raw_data, "binance")
# 3. Validate data
is_valid = components['processor'].validate_data(processed_data)
assert is_valid
# 4. Aggregate data
components['aggregator'].aggregate_orderbook(processed_data)
# 5. Store in database
await components['storage'].store_orderbook(processed_data)
# 6. Cache latest data
await components['cache'].set(f"orderbook:BTCUSDT:binance", processed_data)
# Verify all components were called
components['processor'].process_orderbook.assert_called_once()
components['processor'].validate_data.assert_called_once()
components['aggregator'].aggregate_orderbook.assert_called_once()
components['storage'].store_orderbook.assert_called_once()
components['cache'].set.assert_called_once()
@pytest.mark.asyncio
async def test_complete_trade_pipeline(self, mock_components, sample_trade):
"""Test complete trade processing pipeline"""
components = mock_components
# Setup processor to return processed data
components['processor'].process_trade.return_value = sample_trade
# Simulate pipeline flow
raw_data = {"symbol": "BTCUSDT", "price": 50005.0, "quantity": 0.5}
# Process through pipeline
processed_data = components['processor'].process_trade(raw_data, "binance")
is_valid = components['processor'].validate_data(processed_data)
assert is_valid
await components['storage'].store_trade(processed_data)
await components['cache'].set(f"trade:BTCUSDT:binance:latest", processed_data)
# Verify calls
components['processor'].process_trade.assert_called_once()
components['storage'].store_trade.assert_called_once()
components['cache'].set.assert_called_once()
@pytest.mark.asyncio
async def test_multi_exchange_pipeline(self, mock_components):
"""Test pipeline with multiple exchanges"""
components = mock_components
exchanges = ["binance", "coinbase", "kraken"]
# Simulate data from multiple exchanges
for exchange in exchanges:
# Create exchange-specific data
orderbook = OrderBookSnapshot(
symbol="BTCUSDT",
exchange=exchange,
timestamp=datetime.now(timezone.utc),
bids=[PriceLevel(price=50000.0, size=1.0)],
asks=[PriceLevel(price=50010.0, size=1.0)]
)
components['processor'].process_orderbook.return_value = orderbook
components['processor'].validate_data.return_value = True
# Process through pipeline
processed_data = components['processor'].process_orderbook({}, exchange)
is_valid = components['processor'].validate_data(processed_data)
assert is_valid
await components['storage'].store_orderbook(processed_data)
await components['cache'].set(f"orderbook:BTCUSDT:{exchange}", processed_data)
# Verify multiple calls
assert components['processor'].process_orderbook.call_count == len(exchanges)
assert components['storage'].store_orderbook.call_count == len(exchanges)
assert components['cache'].set.call_count == len(exchanges)
@pytest.mark.asyncio
async def test_pipeline_error_handling(self, mock_components, sample_orderbook):
"""Test pipeline error handling and recovery"""
components = mock_components
# Setup storage to fail initially
components['storage'].store_orderbook.side_effect = [
Exception("Database connection failed"),
True # Success on retry
]
components['processor'].process_orderbook.return_value = sample_orderbook
components['processor'].validate_data.return_value = True
# First attempt should fail
with pytest.raises(Exception):
await components['storage'].store_orderbook(sample_orderbook)
# Second attempt should succeed
result = await components['storage'].store_orderbook(sample_orderbook)
assert result is True
# Verify retry logic
assert components['storage'].store_orderbook.call_count == 2
@pytest.mark.asyncio
async def test_pipeline_performance(self, mock_components):
"""Test pipeline performance with high throughput"""
components = mock_components
# Setup fast responses
components['processor'].process_orderbook.return_value = Mock()
components['processor'].validate_data.return_value = True
components['storage'].store_orderbook.return_value = True
components['cache'].set.return_value = True
# Process multiple items quickly
start_time = time.time()
tasks = []
for i in range(100):
# Simulate processing 100 order books
task = asyncio.create_task(self._process_single_orderbook(components, i))
tasks.append(task)
await asyncio.gather(*tasks)
end_time = time.time()
processing_time = end_time - start_time
throughput = 100 / processing_time
# Should process at least 50 items per second
assert throughput > 50, f"Throughput too low: {throughput:.2f} items/sec"
# Verify all items were processed
assert components['processor'].process_orderbook.call_count == 100
assert components['storage'].store_orderbook.call_count == 100
async def _process_single_orderbook(self, components, index):
"""Helper method to process a single order book"""
raw_data = {"symbol": "BTCUSDT", "index": index}
processed_data = components['processor'].process_orderbook(raw_data, "binance")
is_valid = components['processor'].validate_data(processed_data)
if is_valid:
await components['storage'].store_orderbook(processed_data)
await components['cache'].set(f"orderbook:BTCUSDT:binance:{index}", processed_data)
@pytest.mark.asyncio
async def test_data_consistency_across_pipeline(self, mock_components, sample_orderbook):
"""Test data consistency throughout the pipeline"""
components = mock_components
# Track data transformations
original_data = {"symbol": "BTCUSDT", "timestamp": "2024-01-01T00:00:00Z"}
# Setup processor to modify data
modified_orderbook = sample_orderbook
modified_orderbook.symbol = "BTCUSDT" # Ensure consistency
components['processor'].process_orderbook.return_value = modified_orderbook
components['processor'].validate_data.return_value = True
# Process data
processed_data = components['processor'].process_orderbook(original_data, "binance")
# Verify data consistency
assert processed_data.symbol == "BTCUSDT"
assert processed_data.exchange == "binance"
assert len(processed_data.bids) > 0
assert len(processed_data.asks) > 0
# Verify all price levels are valid
for bid in processed_data.bids:
assert bid.price > 0
assert bid.size > 0
for ask in processed_data.asks:
assert ask.price > 0
assert ask.size > 0
# Verify bid/ask ordering
bid_prices = [bid.price for bid in processed_data.bids]
ask_prices = [ask.price for ask in processed_data.asks]
assert bid_prices == sorted(bid_prices, reverse=True) # Bids descending
assert ask_prices == sorted(ask_prices) # Asks ascending
# Verify spread is positive
if bid_prices and ask_prices:
spread = min(ask_prices) - max(bid_prices)
assert spread >= 0, f"Negative spread detected: {spread}"
@pytest.mark.asyncio
async def test_pipeline_memory_usage(self, mock_components):
"""Test pipeline memory usage under load"""
import psutil
import gc
components = mock_components
process = psutil.Process()
# Get initial memory usage
initial_memory = process.memory_info().rss / 1024 / 1024 # MB
# Process large amount of data
for i in range(1000):
orderbook = OrderBookSnapshot(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
bids=[PriceLevel(price=50000.0 + i, size=1.0)],
asks=[PriceLevel(price=50010.0 + i, size=1.0)]
)
components['processor'].process_orderbook.return_value = orderbook
components['processor'].validate_data.return_value = True
# Process data
processed_data = components['processor'].process_orderbook({}, "binance")
await components['storage'].store_orderbook(processed_data)
# Force garbage collection every 100 items
if i % 100 == 0:
gc.collect()
# Get final memory usage
final_memory = process.memory_info().rss / 1024 / 1024 # MB
memory_increase = final_memory - initial_memory
# Memory increase should be reasonable (less than 100MB for 1000 items)
assert memory_increase < 100, f"Memory usage increased by {memory_increase:.2f}MB"
logger.info(f"Memory usage: {initial_memory:.2f}MB -> {final_memory:.2f}MB (+{memory_increase:.2f}MB)")
class TestPipelineResilience:
"""Test pipeline resilience and fault tolerance"""
@pytest.mark.asyncio
async def test_database_reconnection(self):
"""Test database reconnection handling"""
storage = Mock(spec=TimescaleManager)
# Simulate connection failure then recovery
storage.is_connected.side_effect = [False, False, True]
storage.connect.return_value = True
storage.store_orderbook.return_value = True
# Should attempt reconnection
for attempt in range(3):
if not storage.is_connected():
storage.connect()
else:
break
assert storage.connect.call_count == 1
assert storage.is_connected.call_count == 3
@pytest.mark.asyncio
async def test_cache_fallback(self):
"""Test cache fallback when Redis is unavailable"""
cache = Mock(spec=RedisManager)
# Simulate cache failure
cache.is_connected.return_value = False
cache.set.side_effect = Exception("Redis connection failed")
# Should handle cache failure gracefully
try:
await cache.set("test_key", "test_value")
except Exception:
# Should continue processing even if cache fails
pass
assert not cache.is_connected()
@pytest.mark.asyncio
async def test_exchange_failover(self):
"""Test exchange failover when one exchange fails"""
exchanges = ["binance", "coinbase", "kraken"]
failed_exchange = "binance"
# Simulate one exchange failing
for exchange in exchanges:
if exchange == failed_exchange:
# This exchange fails
assert exchange == failed_exchange
else:
# Other exchanges continue working
assert exchange != failed_exchange
# Should continue with remaining exchanges
working_exchanges = [ex for ex in exchanges if ex != failed_exchange]
assert len(working_exchanges) == 2
assert "coinbase" in working_exchanges
assert "kraken" in working_exchanges
@pytest.mark.integration
class TestRealDataPipeline:
"""Integration tests with real components (requires running services)"""
@pytest.mark.skipif(not pytest.config.getoption("--integration"),
reason="Integration tests require --integration flag")
@pytest.mark.asyncio
async def test_real_database_integration(self):
"""Test with real TimescaleDB instance"""
# This test requires a running TimescaleDB instance
# Skip if not available
try:
from ..storage.timescale_manager import TimescaleManager
storage = TimescaleManager()
await storage.connect()
# Test basic operations
assert storage.is_connected()
# Create test data
orderbook = OrderBookSnapshot(
symbol="BTCUSDT",
exchange="test",
timestamp=datetime.now(timezone.utc),
bids=[PriceLevel(price=50000.0, size=1.0)],
asks=[PriceLevel(price=50010.0, size=1.0)]
)
# Store and verify
result = await storage.store_orderbook(orderbook)
assert result is True
await storage.disconnect()
except Exception as e:
pytest.skip(f"Real database not available: {e}")
@pytest.mark.skipif(not pytest.config.getoption("--integration"),
reason="Integration tests require --integration flag")
@pytest.mark.asyncio
async def test_real_cache_integration(self):
"""Test with real Redis instance"""
try:
from ..caching.redis_manager import RedisManager
cache = RedisManager()
await cache.connect()
assert cache.is_connected()
# Test basic operations
await cache.set("test_key", {"test": "data"})
result = await cache.get("test_key")
assert result is not None
await cache.disconnect()
except Exception as e:
pytest.skip(f"Real cache not available: {e}")
def pytest_configure(config):
"""Configure pytest with custom markers"""
config.addinivalue_line("markers", "integration: mark test as integration test")
def pytest_addoption(parser):
"""Add custom command line options"""
parser.addoption(
"--integration",
action="store_true",
default=False,
help="run integration tests with real services"
)