18: tests, fixes
This commit is contained in:
485
COBY/tests/test_integration_pipeline.py
Normal file
485
COBY/tests/test_integration_pipeline.py
Normal file
@ -0,0 +1,485 @@
|
||||
"""
|
||||
Integration tests for complete data pipeline from exchanges to storage.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import asyncio
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from unittest.mock import Mock, AsyncMock, patch
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from ..connectors.binance_connector import BinanceConnector
|
||||
from ..processing.data_processor import DataProcessor
|
||||
from ..aggregation.aggregation_engine import AggregationEngine
|
||||
from ..storage.timescale_manager import TimescaleManager
|
||||
from ..caching.redis_manager import RedisManager
|
||||
from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class TestDataPipelineIntegration:
|
||||
"""Test complete data pipeline integration"""
|
||||
|
||||
@pytest.fixture
|
||||
async def mock_components(self):
|
||||
"""Setup mock components for testing"""
|
||||
# Mock exchange connector
|
||||
connector = Mock(spec=BinanceConnector)
|
||||
connector.exchange_name = "binance"
|
||||
connector.connect = AsyncMock(return_value=True)
|
||||
connector.disconnect = AsyncMock()
|
||||
connector.subscribe_orderbook = AsyncMock()
|
||||
connector.subscribe_trades = AsyncMock()
|
||||
|
||||
# Mock data processor
|
||||
processor = Mock(spec=DataProcessor)
|
||||
processor.process_orderbook = Mock()
|
||||
processor.process_trade = Mock()
|
||||
processor.validate_data = Mock(return_value=True)
|
||||
|
||||
# Mock aggregation engine
|
||||
aggregator = Mock(spec=AggregationEngine)
|
||||
aggregator.aggregate_orderbook = Mock()
|
||||
aggregator.create_heatmap = Mock()
|
||||
|
||||
# Mock storage manager
|
||||
storage = Mock(spec=TimescaleManager)
|
||||
storage.store_orderbook = AsyncMock(return_value=True)
|
||||
storage.store_trade = AsyncMock(return_value=True)
|
||||
storage.is_connected = Mock(return_value=True)
|
||||
|
||||
# Mock cache manager
|
||||
cache = Mock(spec=RedisManager)
|
||||
cache.set = AsyncMock(return_value=True)
|
||||
cache.get = AsyncMock(return_value=None)
|
||||
cache.is_connected = Mock(return_value=True)
|
||||
|
||||
return {
|
||||
'connector': connector,
|
||||
'processor': processor,
|
||||
'aggregator': aggregator,
|
||||
'storage': storage,
|
||||
'cache': cache
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def sample_orderbook(self):
|
||||
"""Create sample order book data"""
|
||||
return OrderBookSnapshot(
|
||||
symbol="BTCUSDT",
|
||||
exchange="binance",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
bids=[
|
||||
PriceLevel(price=50000.0, size=1.5),
|
||||
PriceLevel(price=49990.0, size=2.0),
|
||||
PriceLevel(price=49980.0, size=1.0)
|
||||
],
|
||||
asks=[
|
||||
PriceLevel(price=50010.0, size=1.2),
|
||||
PriceLevel(price=50020.0, size=1.8),
|
||||
PriceLevel(price=50030.0, size=0.8)
|
||||
]
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_trade(self):
|
||||
"""Create sample trade data"""
|
||||
return TradeEvent(
|
||||
symbol="BTCUSDT",
|
||||
exchange="binance",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
price=50005.0,
|
||||
size=0.5,
|
||||
side="buy",
|
||||
trade_id="12345"
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_complete_orderbook_pipeline(self, mock_components, sample_orderbook):
|
||||
"""Test complete order book processing pipeline"""
|
||||
components = mock_components
|
||||
|
||||
# Setup processor to return processed data
|
||||
components['processor'].process_orderbook.return_value = sample_orderbook
|
||||
|
||||
# Simulate pipeline flow
|
||||
# 1. Receive data from exchange
|
||||
raw_data = {"symbol": "BTCUSDT", "bids": [], "asks": []}
|
||||
|
||||
# 2. Process data
|
||||
processed_data = components['processor'].process_orderbook(raw_data, "binance")
|
||||
|
||||
# 3. Validate data
|
||||
is_valid = components['processor'].validate_data(processed_data)
|
||||
assert is_valid
|
||||
|
||||
# 4. Aggregate data
|
||||
components['aggregator'].aggregate_orderbook(processed_data)
|
||||
|
||||
# 5. Store in database
|
||||
await components['storage'].store_orderbook(processed_data)
|
||||
|
||||
# 6. Cache latest data
|
||||
await components['cache'].set(f"orderbook:BTCUSDT:binance", processed_data)
|
||||
|
||||
# Verify all components were called
|
||||
components['processor'].process_orderbook.assert_called_once()
|
||||
components['processor'].validate_data.assert_called_once()
|
||||
components['aggregator'].aggregate_orderbook.assert_called_once()
|
||||
components['storage'].store_orderbook.assert_called_once()
|
||||
components['cache'].set.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_complete_trade_pipeline(self, mock_components, sample_trade):
|
||||
"""Test complete trade processing pipeline"""
|
||||
components = mock_components
|
||||
|
||||
# Setup processor to return processed data
|
||||
components['processor'].process_trade.return_value = sample_trade
|
||||
|
||||
# Simulate pipeline flow
|
||||
raw_data = {"symbol": "BTCUSDT", "price": 50005.0, "quantity": 0.5}
|
||||
|
||||
# Process through pipeline
|
||||
processed_data = components['processor'].process_trade(raw_data, "binance")
|
||||
is_valid = components['processor'].validate_data(processed_data)
|
||||
assert is_valid
|
||||
|
||||
await components['storage'].store_trade(processed_data)
|
||||
await components['cache'].set(f"trade:BTCUSDT:binance:latest", processed_data)
|
||||
|
||||
# Verify calls
|
||||
components['processor'].process_trade.assert_called_once()
|
||||
components['storage'].store_trade.assert_called_once()
|
||||
components['cache'].set.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multi_exchange_pipeline(self, mock_components):
|
||||
"""Test pipeline with multiple exchanges"""
|
||||
components = mock_components
|
||||
exchanges = ["binance", "coinbase", "kraken"]
|
||||
|
||||
# Simulate data from multiple exchanges
|
||||
for exchange in exchanges:
|
||||
# Create exchange-specific data
|
||||
orderbook = OrderBookSnapshot(
|
||||
symbol="BTCUSDT",
|
||||
exchange=exchange,
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
bids=[PriceLevel(price=50000.0, size=1.0)],
|
||||
asks=[PriceLevel(price=50010.0, size=1.0)]
|
||||
)
|
||||
|
||||
components['processor'].process_orderbook.return_value = orderbook
|
||||
components['processor'].validate_data.return_value = True
|
||||
|
||||
# Process through pipeline
|
||||
processed_data = components['processor'].process_orderbook({}, exchange)
|
||||
is_valid = components['processor'].validate_data(processed_data)
|
||||
assert is_valid
|
||||
|
||||
await components['storage'].store_orderbook(processed_data)
|
||||
await components['cache'].set(f"orderbook:BTCUSDT:{exchange}", processed_data)
|
||||
|
||||
# Verify multiple calls
|
||||
assert components['processor'].process_orderbook.call_count == len(exchanges)
|
||||
assert components['storage'].store_orderbook.call_count == len(exchanges)
|
||||
assert components['cache'].set.call_count == len(exchanges)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pipeline_error_handling(self, mock_components, sample_orderbook):
|
||||
"""Test pipeline error handling and recovery"""
|
||||
components = mock_components
|
||||
|
||||
# Setup storage to fail initially
|
||||
components['storage'].store_orderbook.side_effect = [
|
||||
Exception("Database connection failed"),
|
||||
True # Success on retry
|
||||
]
|
||||
|
||||
components['processor'].process_orderbook.return_value = sample_orderbook
|
||||
components['processor'].validate_data.return_value = True
|
||||
|
||||
# First attempt should fail
|
||||
with pytest.raises(Exception):
|
||||
await components['storage'].store_orderbook(sample_orderbook)
|
||||
|
||||
# Second attempt should succeed
|
||||
result = await components['storage'].store_orderbook(sample_orderbook)
|
||||
assert result is True
|
||||
|
||||
# Verify retry logic
|
||||
assert components['storage'].store_orderbook.call_count == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pipeline_performance(self, mock_components):
|
||||
"""Test pipeline performance with high throughput"""
|
||||
components = mock_components
|
||||
|
||||
# Setup fast responses
|
||||
components['processor'].process_orderbook.return_value = Mock()
|
||||
components['processor'].validate_data.return_value = True
|
||||
components['storage'].store_orderbook.return_value = True
|
||||
components['cache'].set.return_value = True
|
||||
|
||||
# Process multiple items quickly
|
||||
start_time = time.time()
|
||||
tasks = []
|
||||
|
||||
for i in range(100):
|
||||
# Simulate processing 100 order books
|
||||
task = asyncio.create_task(self._process_single_orderbook(components, i))
|
||||
tasks.append(task)
|
||||
|
||||
await asyncio.gather(*tasks)
|
||||
end_time = time.time()
|
||||
|
||||
processing_time = end_time - start_time
|
||||
throughput = 100 / processing_time
|
||||
|
||||
# Should process at least 50 items per second
|
||||
assert throughput > 50, f"Throughput too low: {throughput:.2f} items/sec"
|
||||
|
||||
# Verify all items were processed
|
||||
assert components['processor'].process_orderbook.call_count == 100
|
||||
assert components['storage'].store_orderbook.call_count == 100
|
||||
|
||||
async def _process_single_orderbook(self, components, index):
|
||||
"""Helper method to process a single order book"""
|
||||
raw_data = {"symbol": "BTCUSDT", "index": index}
|
||||
|
||||
processed_data = components['processor'].process_orderbook(raw_data, "binance")
|
||||
is_valid = components['processor'].validate_data(processed_data)
|
||||
|
||||
if is_valid:
|
||||
await components['storage'].store_orderbook(processed_data)
|
||||
await components['cache'].set(f"orderbook:BTCUSDT:binance:{index}", processed_data)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_data_consistency_across_pipeline(self, mock_components, sample_orderbook):
|
||||
"""Test data consistency throughout the pipeline"""
|
||||
components = mock_components
|
||||
|
||||
# Track data transformations
|
||||
original_data = {"symbol": "BTCUSDT", "timestamp": "2024-01-01T00:00:00Z"}
|
||||
|
||||
# Setup processor to modify data
|
||||
modified_orderbook = sample_orderbook
|
||||
modified_orderbook.symbol = "BTCUSDT" # Ensure consistency
|
||||
components['processor'].process_orderbook.return_value = modified_orderbook
|
||||
components['processor'].validate_data.return_value = True
|
||||
|
||||
# Process data
|
||||
processed_data = components['processor'].process_orderbook(original_data, "binance")
|
||||
|
||||
# Verify data consistency
|
||||
assert processed_data.symbol == "BTCUSDT"
|
||||
assert processed_data.exchange == "binance"
|
||||
assert len(processed_data.bids) > 0
|
||||
assert len(processed_data.asks) > 0
|
||||
|
||||
# Verify all price levels are valid
|
||||
for bid in processed_data.bids:
|
||||
assert bid.price > 0
|
||||
assert bid.size > 0
|
||||
|
||||
for ask in processed_data.asks:
|
||||
assert ask.price > 0
|
||||
assert ask.size > 0
|
||||
|
||||
# Verify bid/ask ordering
|
||||
bid_prices = [bid.price for bid in processed_data.bids]
|
||||
ask_prices = [ask.price for ask in processed_data.asks]
|
||||
|
||||
assert bid_prices == sorted(bid_prices, reverse=True) # Bids descending
|
||||
assert ask_prices == sorted(ask_prices) # Asks ascending
|
||||
|
||||
# Verify spread is positive
|
||||
if bid_prices and ask_prices:
|
||||
spread = min(ask_prices) - max(bid_prices)
|
||||
assert spread >= 0, f"Negative spread detected: {spread}"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pipeline_memory_usage(self, mock_components):
|
||||
"""Test pipeline memory usage under load"""
|
||||
import psutil
|
||||
import gc
|
||||
|
||||
components = mock_components
|
||||
process = psutil.Process()
|
||||
|
||||
# Get initial memory usage
|
||||
initial_memory = process.memory_info().rss / 1024 / 1024 # MB
|
||||
|
||||
# Process large amount of data
|
||||
for i in range(1000):
|
||||
orderbook = OrderBookSnapshot(
|
||||
symbol="BTCUSDT",
|
||||
exchange="binance",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
bids=[PriceLevel(price=50000.0 + i, size=1.0)],
|
||||
asks=[PriceLevel(price=50010.0 + i, size=1.0)]
|
||||
)
|
||||
|
||||
components['processor'].process_orderbook.return_value = orderbook
|
||||
components['processor'].validate_data.return_value = True
|
||||
|
||||
# Process data
|
||||
processed_data = components['processor'].process_orderbook({}, "binance")
|
||||
await components['storage'].store_orderbook(processed_data)
|
||||
|
||||
# Force garbage collection every 100 items
|
||||
if i % 100 == 0:
|
||||
gc.collect()
|
||||
|
||||
# Get final memory usage
|
||||
final_memory = process.memory_info().rss / 1024 / 1024 # MB
|
||||
memory_increase = final_memory - initial_memory
|
||||
|
||||
# Memory increase should be reasonable (less than 100MB for 1000 items)
|
||||
assert memory_increase < 100, f"Memory usage increased by {memory_increase:.2f}MB"
|
||||
|
||||
logger.info(f"Memory usage: {initial_memory:.2f}MB -> {final_memory:.2f}MB (+{memory_increase:.2f}MB)")
|
||||
|
||||
|
||||
class TestPipelineResilience:
|
||||
"""Test pipeline resilience and fault tolerance"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_database_reconnection(self):
|
||||
"""Test database reconnection handling"""
|
||||
storage = Mock(spec=TimescaleManager)
|
||||
|
||||
# Simulate connection failure then recovery
|
||||
storage.is_connected.side_effect = [False, False, True]
|
||||
storage.connect.return_value = True
|
||||
storage.store_orderbook.return_value = True
|
||||
|
||||
# Should attempt reconnection
|
||||
for attempt in range(3):
|
||||
if not storage.is_connected():
|
||||
storage.connect()
|
||||
else:
|
||||
break
|
||||
|
||||
assert storage.connect.call_count == 1
|
||||
assert storage.is_connected.call_count == 3
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cache_fallback(self):
|
||||
"""Test cache fallback when Redis is unavailable"""
|
||||
cache = Mock(spec=RedisManager)
|
||||
|
||||
# Simulate cache failure
|
||||
cache.is_connected.return_value = False
|
||||
cache.set.side_effect = Exception("Redis connection failed")
|
||||
|
||||
# Should handle cache failure gracefully
|
||||
try:
|
||||
await cache.set("test_key", "test_value")
|
||||
except Exception:
|
||||
# Should continue processing even if cache fails
|
||||
pass
|
||||
|
||||
assert not cache.is_connected()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_exchange_failover(self):
|
||||
"""Test exchange failover when one exchange fails"""
|
||||
exchanges = ["binance", "coinbase", "kraken"]
|
||||
failed_exchange = "binance"
|
||||
|
||||
# Simulate one exchange failing
|
||||
for exchange in exchanges:
|
||||
if exchange == failed_exchange:
|
||||
# This exchange fails
|
||||
assert exchange == failed_exchange
|
||||
else:
|
||||
# Other exchanges continue working
|
||||
assert exchange != failed_exchange
|
||||
|
||||
# Should continue with remaining exchanges
|
||||
working_exchanges = [ex for ex in exchanges if ex != failed_exchange]
|
||||
assert len(working_exchanges) == 2
|
||||
assert "coinbase" in working_exchanges
|
||||
assert "kraken" in working_exchanges
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
class TestRealDataPipeline:
|
||||
"""Integration tests with real components (requires running services)"""
|
||||
|
||||
@pytest.mark.skipif(not pytest.config.getoption("--integration"),
|
||||
reason="Integration tests require --integration flag")
|
||||
@pytest.mark.asyncio
|
||||
async def test_real_database_integration(self):
|
||||
"""Test with real TimescaleDB instance"""
|
||||
# This test requires a running TimescaleDB instance
|
||||
# Skip if not available
|
||||
try:
|
||||
from ..storage.timescale_manager import TimescaleManager
|
||||
|
||||
storage = TimescaleManager()
|
||||
await storage.connect()
|
||||
|
||||
# Test basic operations
|
||||
assert storage.is_connected()
|
||||
|
||||
# Create test data
|
||||
orderbook = OrderBookSnapshot(
|
||||
symbol="BTCUSDT",
|
||||
exchange="test",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
bids=[PriceLevel(price=50000.0, size=1.0)],
|
||||
asks=[PriceLevel(price=50010.0, size=1.0)]
|
||||
)
|
||||
|
||||
# Store and verify
|
||||
result = await storage.store_orderbook(orderbook)
|
||||
assert result is True
|
||||
|
||||
await storage.disconnect()
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Real database not available: {e}")
|
||||
|
||||
@pytest.mark.skipif(not pytest.config.getoption("--integration"),
|
||||
reason="Integration tests require --integration flag")
|
||||
@pytest.mark.asyncio
|
||||
async def test_real_cache_integration(self):
|
||||
"""Test with real Redis instance"""
|
||||
try:
|
||||
from ..caching.redis_manager import RedisManager
|
||||
|
||||
cache = RedisManager()
|
||||
await cache.connect()
|
||||
|
||||
assert cache.is_connected()
|
||||
|
||||
# Test basic operations
|
||||
await cache.set("test_key", {"test": "data"})
|
||||
result = await cache.get("test_key")
|
||||
assert result is not None
|
||||
|
||||
await cache.disconnect()
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Real cache not available: {e}")
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Configure pytest with custom markers"""
|
||||
config.addinivalue_line("markers", "integration: mark test as integration test")
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
"""Add custom command line options"""
|
||||
parser.addoption(
|
||||
"--integration",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="run integration tests with real services"
|
||||
)
|
Reference in New Issue
Block a user