485 lines
18 KiB
Python
485 lines
18 KiB
Python
"""
|
|
Integration tests for complete data pipeline from exchanges to storage.
|
|
"""
|
|
|
|
import pytest
|
|
import asyncio
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from unittest.mock import Mock, AsyncMock, patch
|
|
from typing import List, Dict, Any
|
|
|
|
from ..connectors.binance_connector import BinanceConnector
|
|
from ..processing.data_processor import DataProcessor
|
|
from ..aggregation.aggregation_engine import AggregationEngine
|
|
from ..storage.timescale_manager import TimescaleManager
|
|
from ..caching.redis_manager import RedisManager
|
|
from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
|
|
from ..utils.logging import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
class TestDataPipelineIntegration:
|
|
"""Test complete data pipeline integration"""
|
|
|
|
@pytest.fixture
|
|
async def mock_components(self):
|
|
"""Setup mock components for testing"""
|
|
# Mock exchange connector
|
|
connector = Mock(spec=BinanceConnector)
|
|
connector.exchange_name = "binance"
|
|
connector.connect = AsyncMock(return_value=True)
|
|
connector.disconnect = AsyncMock()
|
|
connector.subscribe_orderbook = AsyncMock()
|
|
connector.subscribe_trades = AsyncMock()
|
|
|
|
# Mock data processor
|
|
processor = Mock(spec=DataProcessor)
|
|
processor.process_orderbook = Mock()
|
|
processor.process_trade = Mock()
|
|
processor.validate_data = Mock(return_value=True)
|
|
|
|
# Mock aggregation engine
|
|
aggregator = Mock(spec=AggregationEngine)
|
|
aggregator.aggregate_orderbook = Mock()
|
|
aggregator.create_heatmap = Mock()
|
|
|
|
# Mock storage manager
|
|
storage = Mock(spec=TimescaleManager)
|
|
storage.store_orderbook = AsyncMock(return_value=True)
|
|
storage.store_trade = AsyncMock(return_value=True)
|
|
storage.is_connected = Mock(return_value=True)
|
|
|
|
# Mock cache manager
|
|
cache = Mock(spec=RedisManager)
|
|
cache.set = AsyncMock(return_value=True)
|
|
cache.get = AsyncMock(return_value=None)
|
|
cache.is_connected = Mock(return_value=True)
|
|
|
|
return {
|
|
'connector': connector,
|
|
'processor': processor,
|
|
'aggregator': aggregator,
|
|
'storage': storage,
|
|
'cache': cache
|
|
}
|
|
|
|
@pytest.fixture
|
|
def sample_orderbook(self):
|
|
"""Create sample order book data"""
|
|
return OrderBookSnapshot(
|
|
symbol="BTCUSDT",
|
|
exchange="binance",
|
|
timestamp=datetime.now(timezone.utc),
|
|
bids=[
|
|
PriceLevel(price=50000.0, size=1.5),
|
|
PriceLevel(price=49990.0, size=2.0),
|
|
PriceLevel(price=49980.0, size=1.0)
|
|
],
|
|
asks=[
|
|
PriceLevel(price=50010.0, size=1.2),
|
|
PriceLevel(price=50020.0, size=1.8),
|
|
PriceLevel(price=50030.0, size=0.8)
|
|
]
|
|
)
|
|
|
|
@pytest.fixture
|
|
def sample_trade(self):
|
|
"""Create sample trade data"""
|
|
return TradeEvent(
|
|
symbol="BTCUSDT",
|
|
exchange="binance",
|
|
timestamp=datetime.now(timezone.utc),
|
|
price=50005.0,
|
|
size=0.5,
|
|
side="buy",
|
|
trade_id="12345"
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_complete_orderbook_pipeline(self, mock_components, sample_orderbook):
|
|
"""Test complete order book processing pipeline"""
|
|
components = mock_components
|
|
|
|
# Setup processor to return processed data
|
|
components['processor'].process_orderbook.return_value = sample_orderbook
|
|
|
|
# Simulate pipeline flow
|
|
# 1. Receive data from exchange
|
|
raw_data = {"symbol": "BTCUSDT", "bids": [], "asks": []}
|
|
|
|
# 2. Process data
|
|
processed_data = components['processor'].process_orderbook(raw_data, "binance")
|
|
|
|
# 3. Validate data
|
|
is_valid = components['processor'].validate_data(processed_data)
|
|
assert is_valid
|
|
|
|
# 4. Aggregate data
|
|
components['aggregator'].aggregate_orderbook(processed_data)
|
|
|
|
# 5. Store in database
|
|
await components['storage'].store_orderbook(processed_data)
|
|
|
|
# 6. Cache latest data
|
|
await components['cache'].set(f"orderbook:BTCUSDT:binance", processed_data)
|
|
|
|
# Verify all components were called
|
|
components['processor'].process_orderbook.assert_called_once()
|
|
components['processor'].validate_data.assert_called_once()
|
|
components['aggregator'].aggregate_orderbook.assert_called_once()
|
|
components['storage'].store_orderbook.assert_called_once()
|
|
components['cache'].set.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_complete_trade_pipeline(self, mock_components, sample_trade):
|
|
"""Test complete trade processing pipeline"""
|
|
components = mock_components
|
|
|
|
# Setup processor to return processed data
|
|
components['processor'].process_trade.return_value = sample_trade
|
|
|
|
# Simulate pipeline flow
|
|
raw_data = {"symbol": "BTCUSDT", "price": 50005.0, "quantity": 0.5}
|
|
|
|
# Process through pipeline
|
|
processed_data = components['processor'].process_trade(raw_data, "binance")
|
|
is_valid = components['processor'].validate_data(processed_data)
|
|
assert is_valid
|
|
|
|
await components['storage'].store_trade(processed_data)
|
|
await components['cache'].set(f"trade:BTCUSDT:binance:latest", processed_data)
|
|
|
|
# Verify calls
|
|
components['processor'].process_trade.assert_called_once()
|
|
components['storage'].store_trade.assert_called_once()
|
|
components['cache'].set.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multi_exchange_pipeline(self, mock_components):
|
|
"""Test pipeline with multiple exchanges"""
|
|
components = mock_components
|
|
exchanges = ["binance", "coinbase", "kraken"]
|
|
|
|
# Simulate data from multiple exchanges
|
|
for exchange in exchanges:
|
|
# Create exchange-specific data
|
|
orderbook = OrderBookSnapshot(
|
|
symbol="BTCUSDT",
|
|
exchange=exchange,
|
|
timestamp=datetime.now(timezone.utc),
|
|
bids=[PriceLevel(price=50000.0, size=1.0)],
|
|
asks=[PriceLevel(price=50010.0, size=1.0)]
|
|
)
|
|
|
|
components['processor'].process_orderbook.return_value = orderbook
|
|
components['processor'].validate_data.return_value = True
|
|
|
|
# Process through pipeline
|
|
processed_data = components['processor'].process_orderbook({}, exchange)
|
|
is_valid = components['processor'].validate_data(processed_data)
|
|
assert is_valid
|
|
|
|
await components['storage'].store_orderbook(processed_data)
|
|
await components['cache'].set(f"orderbook:BTCUSDT:{exchange}", processed_data)
|
|
|
|
# Verify multiple calls
|
|
assert components['processor'].process_orderbook.call_count == len(exchanges)
|
|
assert components['storage'].store_orderbook.call_count == len(exchanges)
|
|
assert components['cache'].set.call_count == len(exchanges)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_pipeline_error_handling(self, mock_components, sample_orderbook):
|
|
"""Test pipeline error handling and recovery"""
|
|
components = mock_components
|
|
|
|
# Setup storage to fail initially
|
|
components['storage'].store_orderbook.side_effect = [
|
|
Exception("Database connection failed"),
|
|
True # Success on retry
|
|
]
|
|
|
|
components['processor'].process_orderbook.return_value = sample_orderbook
|
|
components['processor'].validate_data.return_value = True
|
|
|
|
# First attempt should fail
|
|
with pytest.raises(Exception):
|
|
await components['storage'].store_orderbook(sample_orderbook)
|
|
|
|
# Second attempt should succeed
|
|
result = await components['storage'].store_orderbook(sample_orderbook)
|
|
assert result is True
|
|
|
|
# Verify retry logic
|
|
assert components['storage'].store_orderbook.call_count == 2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_pipeline_performance(self, mock_components):
|
|
"""Test pipeline performance with high throughput"""
|
|
components = mock_components
|
|
|
|
# Setup fast responses
|
|
components['processor'].process_orderbook.return_value = Mock()
|
|
components['processor'].validate_data.return_value = True
|
|
components['storage'].store_orderbook.return_value = True
|
|
components['cache'].set.return_value = True
|
|
|
|
# Process multiple items quickly
|
|
start_time = time.time()
|
|
tasks = []
|
|
|
|
for i in range(100):
|
|
# Simulate processing 100 order books
|
|
task = asyncio.create_task(self._process_single_orderbook(components, i))
|
|
tasks.append(task)
|
|
|
|
await asyncio.gather(*tasks)
|
|
end_time = time.time()
|
|
|
|
processing_time = end_time - start_time
|
|
throughput = 100 / processing_time
|
|
|
|
# Should process at least 50 items per second
|
|
assert throughput > 50, f"Throughput too low: {throughput:.2f} items/sec"
|
|
|
|
# Verify all items were processed
|
|
assert components['processor'].process_orderbook.call_count == 100
|
|
assert components['storage'].store_orderbook.call_count == 100
|
|
|
|
async def _process_single_orderbook(self, components, index):
|
|
"""Helper method to process a single order book"""
|
|
raw_data = {"symbol": "BTCUSDT", "index": index}
|
|
|
|
processed_data = components['processor'].process_orderbook(raw_data, "binance")
|
|
is_valid = components['processor'].validate_data(processed_data)
|
|
|
|
if is_valid:
|
|
await components['storage'].store_orderbook(processed_data)
|
|
await components['cache'].set(f"orderbook:BTCUSDT:binance:{index}", processed_data)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_data_consistency_across_pipeline(self, mock_components, sample_orderbook):
|
|
"""Test data consistency throughout the pipeline"""
|
|
components = mock_components
|
|
|
|
# Track data transformations
|
|
original_data = {"symbol": "BTCUSDT", "timestamp": "2024-01-01T00:00:00Z"}
|
|
|
|
# Setup processor to modify data
|
|
modified_orderbook = sample_orderbook
|
|
modified_orderbook.symbol = "BTCUSDT" # Ensure consistency
|
|
components['processor'].process_orderbook.return_value = modified_orderbook
|
|
components['processor'].validate_data.return_value = True
|
|
|
|
# Process data
|
|
processed_data = components['processor'].process_orderbook(original_data, "binance")
|
|
|
|
# Verify data consistency
|
|
assert processed_data.symbol == "BTCUSDT"
|
|
assert processed_data.exchange == "binance"
|
|
assert len(processed_data.bids) > 0
|
|
assert len(processed_data.asks) > 0
|
|
|
|
# Verify all price levels are valid
|
|
for bid in processed_data.bids:
|
|
assert bid.price > 0
|
|
assert bid.size > 0
|
|
|
|
for ask in processed_data.asks:
|
|
assert ask.price > 0
|
|
assert ask.size > 0
|
|
|
|
# Verify bid/ask ordering
|
|
bid_prices = [bid.price for bid in processed_data.bids]
|
|
ask_prices = [ask.price for ask in processed_data.asks]
|
|
|
|
assert bid_prices == sorted(bid_prices, reverse=True) # Bids descending
|
|
assert ask_prices == sorted(ask_prices) # Asks ascending
|
|
|
|
# Verify spread is positive
|
|
if bid_prices and ask_prices:
|
|
spread = min(ask_prices) - max(bid_prices)
|
|
assert spread >= 0, f"Negative spread detected: {spread}"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_pipeline_memory_usage(self, mock_components):
|
|
"""Test pipeline memory usage under load"""
|
|
import psutil
|
|
import gc
|
|
|
|
components = mock_components
|
|
process = psutil.Process()
|
|
|
|
# Get initial memory usage
|
|
initial_memory = process.memory_info().rss / 1024 / 1024 # MB
|
|
|
|
# Process large amount of data
|
|
for i in range(1000):
|
|
orderbook = OrderBookSnapshot(
|
|
symbol="BTCUSDT",
|
|
exchange="binance",
|
|
timestamp=datetime.now(timezone.utc),
|
|
bids=[PriceLevel(price=50000.0 + i, size=1.0)],
|
|
asks=[PriceLevel(price=50010.0 + i, size=1.0)]
|
|
)
|
|
|
|
components['processor'].process_orderbook.return_value = orderbook
|
|
components['processor'].validate_data.return_value = True
|
|
|
|
# Process data
|
|
processed_data = components['processor'].process_orderbook({}, "binance")
|
|
await components['storage'].store_orderbook(processed_data)
|
|
|
|
# Force garbage collection every 100 items
|
|
if i % 100 == 0:
|
|
gc.collect()
|
|
|
|
# Get final memory usage
|
|
final_memory = process.memory_info().rss / 1024 / 1024 # MB
|
|
memory_increase = final_memory - initial_memory
|
|
|
|
# Memory increase should be reasonable (less than 100MB for 1000 items)
|
|
assert memory_increase < 100, f"Memory usage increased by {memory_increase:.2f}MB"
|
|
|
|
logger.info(f"Memory usage: {initial_memory:.2f}MB -> {final_memory:.2f}MB (+{memory_increase:.2f}MB)")
|
|
|
|
|
|
class TestPipelineResilience:
|
|
"""Test pipeline resilience and fault tolerance"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_database_reconnection(self):
|
|
"""Test database reconnection handling"""
|
|
storage = Mock(spec=TimescaleManager)
|
|
|
|
# Simulate connection failure then recovery
|
|
storage.is_connected.side_effect = [False, False, True]
|
|
storage.connect.return_value = True
|
|
storage.store_orderbook.return_value = True
|
|
|
|
# Should attempt reconnection
|
|
for attempt in range(3):
|
|
if not storage.is_connected():
|
|
storage.connect()
|
|
else:
|
|
break
|
|
|
|
assert storage.connect.call_count == 1
|
|
assert storage.is_connected.call_count == 3
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cache_fallback(self):
|
|
"""Test cache fallback when Redis is unavailable"""
|
|
cache = Mock(spec=RedisManager)
|
|
|
|
# Simulate cache failure
|
|
cache.is_connected.return_value = False
|
|
cache.set.side_effect = Exception("Redis connection failed")
|
|
|
|
# Should handle cache failure gracefully
|
|
try:
|
|
await cache.set("test_key", "test_value")
|
|
except Exception:
|
|
# Should continue processing even if cache fails
|
|
pass
|
|
|
|
assert not cache.is_connected()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_exchange_failover(self):
|
|
"""Test exchange failover when one exchange fails"""
|
|
exchanges = ["binance", "coinbase", "kraken"]
|
|
failed_exchange = "binance"
|
|
|
|
# Simulate one exchange failing
|
|
for exchange in exchanges:
|
|
if exchange == failed_exchange:
|
|
# This exchange fails
|
|
assert exchange == failed_exchange
|
|
else:
|
|
# Other exchanges continue working
|
|
assert exchange != failed_exchange
|
|
|
|
# Should continue with remaining exchanges
|
|
working_exchanges = [ex for ex in exchanges if ex != failed_exchange]
|
|
assert len(working_exchanges) == 2
|
|
assert "coinbase" in working_exchanges
|
|
assert "kraken" in working_exchanges
|
|
|
|
|
|
@pytest.mark.integration
|
|
class TestRealDataPipeline:
|
|
"""Integration tests with real components (requires running services)"""
|
|
|
|
@pytest.mark.skipif(not pytest.config.getoption("--integration"),
|
|
reason="Integration tests require --integration flag")
|
|
@pytest.mark.asyncio
|
|
async def test_real_database_integration(self):
|
|
"""Test with real TimescaleDB instance"""
|
|
# This test requires a running TimescaleDB instance
|
|
# Skip if not available
|
|
try:
|
|
from ..storage.timescale_manager import TimescaleManager
|
|
|
|
storage = TimescaleManager()
|
|
await storage.connect()
|
|
|
|
# Test basic operations
|
|
assert storage.is_connected()
|
|
|
|
# Create test data
|
|
orderbook = OrderBookSnapshot(
|
|
symbol="BTCUSDT",
|
|
exchange="test",
|
|
timestamp=datetime.now(timezone.utc),
|
|
bids=[PriceLevel(price=50000.0, size=1.0)],
|
|
asks=[PriceLevel(price=50010.0, size=1.0)]
|
|
)
|
|
|
|
# Store and verify
|
|
result = await storage.store_orderbook(orderbook)
|
|
assert result is True
|
|
|
|
await storage.disconnect()
|
|
|
|
except Exception as e:
|
|
pytest.skip(f"Real database not available: {e}")
|
|
|
|
@pytest.mark.skipif(not pytest.config.getoption("--integration"),
|
|
reason="Integration tests require --integration flag")
|
|
@pytest.mark.asyncio
|
|
async def test_real_cache_integration(self):
|
|
"""Test with real Redis instance"""
|
|
try:
|
|
from ..caching.redis_manager import RedisManager
|
|
|
|
cache = RedisManager()
|
|
await cache.connect()
|
|
|
|
assert cache.is_connected()
|
|
|
|
# Test basic operations
|
|
await cache.set("test_key", {"test": "data"})
|
|
result = await cache.get("test_key")
|
|
assert result is not None
|
|
|
|
await cache.disconnect()
|
|
|
|
except Exception as e:
|
|
pytest.skip(f"Real cache not available: {e}")
|
|
|
|
|
|
def pytest_configure(config):
|
|
"""Configure pytest with custom markers"""
|
|
config.addinivalue_line("markers", "integration: mark test as integration test")
|
|
|
|
|
|
def pytest_addoption(parser):
|
|
"""Add custom command line options"""
|
|
parser.addoption(
|
|
"--integration",
|
|
action="store_true",
|
|
default=False,
|
|
help="run integration tests with real services"
|
|
) |