355 lines
13 KiB
Python
355 lines
13 KiB
Python
"""
|
|
Data serialization for Redis caching.
|
|
"""
|
|
|
|
import json
|
|
import pickle
|
|
import gzip
|
|
from typing import Any, Union, Dict, List
|
|
from datetime import datetime
|
|
from ..models.core import (
|
|
OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets,
|
|
OrderBookMetrics, ImbalanceMetrics, ConsolidatedOrderBook
|
|
)
|
|
from ..utils.logging import get_logger
|
|
from ..utils.exceptions import ProcessingError
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
class DataSerializer:
|
|
"""
|
|
Handles serialization and deserialization of data for Redis storage.
|
|
|
|
Supports multiple serialization formats:
|
|
- JSON for simple data
|
|
- Pickle for complex objects
|
|
- Compressed formats for large data
|
|
"""
|
|
|
|
def __init__(self, use_compression: bool = True):
|
|
"""
|
|
Initialize data serializer.
|
|
|
|
Args:
|
|
use_compression: Whether to use gzip compression
|
|
"""
|
|
self.use_compression = use_compression
|
|
self.serialization_stats = {
|
|
'serialized': 0,
|
|
'deserialized': 0,
|
|
'compression_ratio': 0.0,
|
|
'errors': 0
|
|
}
|
|
|
|
logger.info(f"Data serializer initialized (compression: {use_compression})")
|
|
|
|
def serialize(self, data: Any, format_type: str = 'auto') -> bytes:
|
|
"""
|
|
Serialize data for Redis storage.
|
|
|
|
Args:
|
|
data: Data to serialize
|
|
format_type: Serialization format ('json', 'pickle', 'auto')
|
|
|
|
Returns:
|
|
bytes: Serialized data
|
|
"""
|
|
try:
|
|
# Determine format
|
|
if format_type == 'auto':
|
|
format_type = self._determine_format(data)
|
|
|
|
# Serialize based on format
|
|
if format_type == 'json':
|
|
serialized = self._serialize_json(data)
|
|
elif format_type == 'pickle':
|
|
serialized = self._serialize_pickle(data)
|
|
else:
|
|
raise ValueError(f"Unsupported format: {format_type}")
|
|
|
|
# Apply compression if enabled
|
|
if self.use_compression:
|
|
original_size = len(serialized)
|
|
serialized = gzip.compress(serialized)
|
|
compressed_size = len(serialized)
|
|
|
|
# Update compression ratio
|
|
if original_size > 0:
|
|
ratio = compressed_size / original_size
|
|
self.serialization_stats['compression_ratio'] = (
|
|
(self.serialization_stats['compression_ratio'] *
|
|
self.serialization_stats['serialized'] + ratio) /
|
|
(self.serialization_stats['serialized'] + 1)
|
|
)
|
|
|
|
self.serialization_stats['serialized'] += 1
|
|
return serialized
|
|
|
|
except Exception as e:
|
|
self.serialization_stats['errors'] += 1
|
|
logger.error(f"Serialization error: {e}")
|
|
raise ProcessingError(f"Serialization failed: {e}", "SERIALIZE_ERROR")
|
|
|
|
def deserialize(self, data: bytes, format_type: str = 'auto') -> Any:
|
|
"""
|
|
Deserialize data from Redis storage.
|
|
|
|
Args:
|
|
data: Serialized data
|
|
format_type: Expected format ('json', 'pickle', 'auto')
|
|
|
|
Returns:
|
|
Any: Deserialized data
|
|
"""
|
|
try:
|
|
# Decompress if needed
|
|
if self.use_compression:
|
|
try:
|
|
data = gzip.decompress(data)
|
|
except gzip.BadGzipFile:
|
|
# Data might not be compressed
|
|
pass
|
|
|
|
# Determine format if auto
|
|
if format_type == 'auto':
|
|
format_type = self._detect_format(data)
|
|
|
|
# Deserialize based on format
|
|
if format_type == 'json':
|
|
result = self._deserialize_json(data)
|
|
elif format_type == 'pickle':
|
|
result = self._deserialize_pickle(data)
|
|
else:
|
|
raise ValueError(f"Unsupported format: {format_type}")
|
|
|
|
self.serialization_stats['deserialized'] += 1
|
|
return result
|
|
|
|
except Exception as e:
|
|
self.serialization_stats['errors'] += 1
|
|
logger.error(f"Deserialization error: {e}")
|
|
raise ProcessingError(f"Deserialization failed: {e}", "DESERIALIZE_ERROR")
|
|
|
|
def _determine_format(self, data: Any) -> str:
|
|
"""Determine best serialization format for data"""
|
|
# Use JSON for simple data types
|
|
if isinstance(data, (dict, list, str, int, float, bool)) or data is None:
|
|
return 'json'
|
|
|
|
# Use pickle for complex objects
|
|
return 'pickle'
|
|
|
|
def _detect_format(self, data: bytes) -> str:
|
|
"""Detect serialization format from data"""
|
|
try:
|
|
# Try JSON first
|
|
json.loads(data.decode('utf-8'))
|
|
return 'json'
|
|
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
# Assume pickle
|
|
return 'pickle'
|
|
|
|
def _serialize_json(self, data: Any) -> bytes:
|
|
"""Serialize data as JSON"""
|
|
# Convert complex objects to dictionaries
|
|
if hasattr(data, '__dict__'):
|
|
data = self._object_to_dict(data)
|
|
elif isinstance(data, list):
|
|
data = [self._object_to_dict(item) if hasattr(item, '__dict__') else item
|
|
for item in data]
|
|
|
|
json_str = json.dumps(data, default=self._json_serializer, ensure_ascii=False)
|
|
return json_str.encode('utf-8')
|
|
|
|
def _deserialize_json(self, data: bytes) -> Any:
|
|
"""Deserialize JSON data"""
|
|
json_str = data.decode('utf-8')
|
|
return json.loads(json_str, object_hook=self._json_deserializer)
|
|
|
|
def _serialize_pickle(self, data: Any) -> bytes:
|
|
"""Serialize data as pickle"""
|
|
return pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
|
|
|
|
def _deserialize_pickle(self, data: bytes) -> Any:
|
|
"""Deserialize pickle data"""
|
|
return pickle.loads(data)
|
|
|
|
def _object_to_dict(self, obj: Any) -> Dict:
|
|
"""Convert object to dictionary for JSON serialization"""
|
|
if isinstance(obj, (OrderBookSnapshot, TradeEvent, HeatmapData,
|
|
PriceBuckets, OrderBookMetrics, ImbalanceMetrics,
|
|
ConsolidatedOrderBook)):
|
|
result = {
|
|
'__type__': obj.__class__.__name__,
|
|
'__data__': {}
|
|
}
|
|
|
|
# Convert object attributes
|
|
for key, value in obj.__dict__.items():
|
|
if isinstance(value, datetime):
|
|
result['__data__'][key] = {
|
|
'__datetime__': value.isoformat()
|
|
}
|
|
elif isinstance(value, list):
|
|
result['__data__'][key] = [
|
|
self._object_to_dict(item) if hasattr(item, '__dict__') else item
|
|
for item in value
|
|
]
|
|
elif hasattr(value, '__dict__'):
|
|
result['__data__'][key] = self._object_to_dict(value)
|
|
else:
|
|
result['__data__'][key] = value
|
|
|
|
return result
|
|
else:
|
|
return obj.__dict__ if hasattr(obj, '__dict__') else obj
|
|
|
|
def _json_serializer(self, obj: Any) -> Any:
|
|
"""Custom JSON serializer for special types"""
|
|
if isinstance(obj, datetime):
|
|
return {'__datetime__': obj.isoformat()}
|
|
elif hasattr(obj, '__dict__'):
|
|
return self._object_to_dict(obj)
|
|
else:
|
|
return str(obj)
|
|
|
|
def _json_deserializer(self, obj: Dict) -> Any:
|
|
"""Custom JSON deserializer for special types"""
|
|
if '__datetime__' in obj:
|
|
return datetime.fromisoformat(obj['__datetime__'])
|
|
elif '__type__' in obj and '__data__' in obj:
|
|
return self._reconstruct_object(obj['__type__'], obj['__data__'])
|
|
else:
|
|
return obj
|
|
|
|
def _reconstruct_object(self, type_name: str, data: Dict) -> Any:
|
|
"""Reconstruct object from serialized data"""
|
|
# Import required classes
|
|
from ..models.core import (
|
|
OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets,
|
|
OrderBookMetrics, ImbalanceMetrics, ConsolidatedOrderBook,
|
|
PriceLevel, HeatmapPoint
|
|
)
|
|
|
|
# Map type names to classes
|
|
type_map = {
|
|
'OrderBookSnapshot': OrderBookSnapshot,
|
|
'TradeEvent': TradeEvent,
|
|
'HeatmapData': HeatmapData,
|
|
'PriceBuckets': PriceBuckets,
|
|
'OrderBookMetrics': OrderBookMetrics,
|
|
'ImbalanceMetrics': ImbalanceMetrics,
|
|
'ConsolidatedOrderBook': ConsolidatedOrderBook,
|
|
'PriceLevel': PriceLevel,
|
|
'HeatmapPoint': HeatmapPoint
|
|
}
|
|
|
|
if type_name in type_map:
|
|
cls = type_map[type_name]
|
|
|
|
# Recursively deserialize nested objects
|
|
processed_data = {}
|
|
for key, value in data.items():
|
|
if isinstance(value, dict) and '__datetime__' in value:
|
|
processed_data[key] = datetime.fromisoformat(value['__datetime__'])
|
|
elif isinstance(value, dict) and '__type__' in value:
|
|
processed_data[key] = self._reconstruct_object(
|
|
value['__type__'], value['__data__']
|
|
)
|
|
elif isinstance(value, list):
|
|
processed_data[key] = [
|
|
self._reconstruct_object(item['__type__'], item['__data__'])
|
|
if isinstance(item, dict) and '__type__' in item
|
|
else item
|
|
for item in value
|
|
]
|
|
else:
|
|
processed_data[key] = value
|
|
|
|
try:
|
|
return cls(**processed_data)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to reconstruct {type_name}: {e}")
|
|
return processed_data
|
|
else:
|
|
logger.warning(f"Unknown type for reconstruction: {type_name}")
|
|
return data
|
|
|
|
def serialize_heatmap(self, heatmap: HeatmapData) -> bytes:
|
|
"""Specialized serialization for heatmap data"""
|
|
try:
|
|
# Create optimized representation
|
|
heatmap_dict = {
|
|
'symbol': heatmap.symbol,
|
|
'timestamp': heatmap.timestamp.isoformat(),
|
|
'bucket_size': heatmap.bucket_size,
|
|
'points': [
|
|
{
|
|
'p': point.price, # price
|
|
'v': point.volume, # volume
|
|
'i': point.intensity, # intensity
|
|
's': point.side # side
|
|
}
|
|
for point in heatmap.data
|
|
]
|
|
}
|
|
|
|
return self.serialize(heatmap_dict, 'json')
|
|
|
|
except Exception as e:
|
|
logger.error(f"Heatmap serialization error: {e}")
|
|
# Fallback to standard serialization
|
|
return self.serialize(heatmap, 'pickle')
|
|
|
|
def deserialize_heatmap(self, data: bytes) -> HeatmapData:
|
|
"""Specialized deserialization for heatmap data"""
|
|
try:
|
|
# Try optimized format first
|
|
heatmap_dict = self.deserialize(data, 'json')
|
|
|
|
if isinstance(heatmap_dict, dict) and 'points' in heatmap_dict:
|
|
from ..models.core import HeatmapData, HeatmapPoint
|
|
|
|
# Reconstruct heatmap points
|
|
points = []
|
|
for point_data in heatmap_dict['points']:
|
|
point = HeatmapPoint(
|
|
price=point_data['p'],
|
|
volume=point_data['v'],
|
|
intensity=point_data['i'],
|
|
side=point_data['s']
|
|
)
|
|
points.append(point)
|
|
|
|
# Create heatmap
|
|
heatmap = HeatmapData(
|
|
symbol=heatmap_dict['symbol'],
|
|
timestamp=datetime.fromisoformat(heatmap_dict['timestamp']),
|
|
bucket_size=heatmap_dict['bucket_size']
|
|
)
|
|
heatmap.data = points
|
|
|
|
return heatmap
|
|
else:
|
|
# Fallback to standard deserialization
|
|
return self.deserialize(data, 'pickle')
|
|
|
|
except Exception as e:
|
|
logger.error(f"Heatmap deserialization error: {e}")
|
|
# Final fallback
|
|
return self.deserialize(data, 'pickle')
|
|
|
|
def get_stats(self) -> Dict[str, Any]:
|
|
"""Get serialization statistics"""
|
|
return self.serialization_stats.copy()
|
|
|
|
def reset_stats(self) -> None:
|
|
"""Reset serialization statistics"""
|
|
self.serialization_stats = {
|
|
'serialized': 0,
|
|
'deserialized': 0,
|
|
'compression_ratio': 0.0,
|
|
'errors': 0
|
|
}
|
|
logger.info("Serialization statistics reset") |