This commit is contained in:
Dobromir Popov
2025-08-04 17:55:00 +03:00
parent 8ee9b7a90c
commit ff75af566c
6 changed files with 1687 additions and 0 deletions

View File

@ -0,0 +1,355 @@
"""
Data serialization for Redis caching.
"""
import json
import pickle
import gzip
from typing import Any, Union, Dict, List
from datetime import datetime
from ..models.core import (
OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets,
OrderBookMetrics, ImbalanceMetrics, ConsolidatedOrderBook
)
from ..utils.logging import get_logger
from ..utils.exceptions import ProcessingError
logger = get_logger(__name__)
class DataSerializer:
"""
Handles serialization and deserialization of data for Redis storage.
Supports multiple serialization formats:
- JSON for simple data
- Pickle for complex objects
- Compressed formats for large data
"""
def __init__(self, use_compression: bool = True):
"""
Initialize data serializer.
Args:
use_compression: Whether to use gzip compression
"""
self.use_compression = use_compression
self.serialization_stats = {
'serialized': 0,
'deserialized': 0,
'compression_ratio': 0.0,
'errors': 0
}
logger.info(f"Data serializer initialized (compression: {use_compression})")
def serialize(self, data: Any, format_type: str = 'auto') -> bytes:
"""
Serialize data for Redis storage.
Args:
data: Data to serialize
format_type: Serialization format ('json', 'pickle', 'auto')
Returns:
bytes: Serialized data
"""
try:
# Determine format
if format_type == 'auto':
format_type = self._determine_format(data)
# Serialize based on format
if format_type == 'json':
serialized = self._serialize_json(data)
elif format_type == 'pickle':
serialized = self._serialize_pickle(data)
else:
raise ValueError(f"Unsupported format: {format_type}")
# Apply compression if enabled
if self.use_compression:
original_size = len(serialized)
serialized = gzip.compress(serialized)
compressed_size = len(serialized)
# Update compression ratio
if original_size > 0:
ratio = compressed_size / original_size
self.serialization_stats['compression_ratio'] = (
(self.serialization_stats['compression_ratio'] *
self.serialization_stats['serialized'] + ratio) /
(self.serialization_stats['serialized'] + 1)
)
self.serialization_stats['serialized'] += 1
return serialized
except Exception as e:
self.serialization_stats['errors'] += 1
logger.error(f"Serialization error: {e}")
raise ProcessingError(f"Serialization failed: {e}", "SERIALIZE_ERROR")
def deserialize(self, data: bytes, format_type: str = 'auto') -> Any:
"""
Deserialize data from Redis storage.
Args:
data: Serialized data
format_type: Expected format ('json', 'pickle', 'auto')
Returns:
Any: Deserialized data
"""
try:
# Decompress if needed
if self.use_compression:
try:
data = gzip.decompress(data)
except gzip.BadGzipFile:
# Data might not be compressed
pass
# Determine format if auto
if format_type == 'auto':
format_type = self._detect_format(data)
# Deserialize based on format
if format_type == 'json':
result = self._deserialize_json(data)
elif format_type == 'pickle':
result = self._deserialize_pickle(data)
else:
raise ValueError(f"Unsupported format: {format_type}")
self.serialization_stats['deserialized'] += 1
return result
except Exception as e:
self.serialization_stats['errors'] += 1
logger.error(f"Deserialization error: {e}")
raise ProcessingError(f"Deserialization failed: {e}", "DESERIALIZE_ERROR")
def _determine_format(self, data: Any) -> str:
"""Determine best serialization format for data"""
# Use JSON for simple data types
if isinstance(data, (dict, list, str, int, float, bool)) or data is None:
return 'json'
# Use pickle for complex objects
return 'pickle'
def _detect_format(self, data: bytes) -> str:
"""Detect serialization format from data"""
try:
# Try JSON first
json.loads(data.decode('utf-8'))
return 'json'
except (json.JSONDecodeError, UnicodeDecodeError):
# Assume pickle
return 'pickle'
def _serialize_json(self, data: Any) -> bytes:
"""Serialize data as JSON"""
# Convert complex objects to dictionaries
if hasattr(data, '__dict__'):
data = self._object_to_dict(data)
elif isinstance(data, list):
data = [self._object_to_dict(item) if hasattr(item, '__dict__') else item
for item in data]
json_str = json.dumps(data, default=self._json_serializer, ensure_ascii=False)
return json_str.encode('utf-8')
def _deserialize_json(self, data: bytes) -> Any:
"""Deserialize JSON data"""
json_str = data.decode('utf-8')
return json.loads(json_str, object_hook=self._json_deserializer)
def _serialize_pickle(self, data: Any) -> bytes:
"""Serialize data as pickle"""
return pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
def _deserialize_pickle(self, data: bytes) -> Any:
"""Deserialize pickle data"""
return pickle.loads(data)
def _object_to_dict(self, obj: Any) -> Dict:
"""Convert object to dictionary for JSON serialization"""
if isinstance(obj, (OrderBookSnapshot, TradeEvent, HeatmapData,
PriceBuckets, OrderBookMetrics, ImbalanceMetrics,
ConsolidatedOrderBook)):
result = {
'__type__': obj.__class__.__name__,
'__data__': {}
}
# Convert object attributes
for key, value in obj.__dict__.items():
if isinstance(value, datetime):
result['__data__'][key] = {
'__datetime__': value.isoformat()
}
elif isinstance(value, list):
result['__data__'][key] = [
self._object_to_dict(item) if hasattr(item, '__dict__') else item
for item in value
]
elif hasattr(value, '__dict__'):
result['__data__'][key] = self._object_to_dict(value)
else:
result['__data__'][key] = value
return result
else:
return obj.__dict__ if hasattr(obj, '__dict__') else obj
def _json_serializer(self, obj: Any) -> Any:
"""Custom JSON serializer for special types"""
if isinstance(obj, datetime):
return {'__datetime__': obj.isoformat()}
elif hasattr(obj, '__dict__'):
return self._object_to_dict(obj)
else:
return str(obj)
def _json_deserializer(self, obj: Dict) -> Any:
"""Custom JSON deserializer for special types"""
if '__datetime__' in obj:
return datetime.fromisoformat(obj['__datetime__'])
elif '__type__' in obj and '__data__' in obj:
return self._reconstruct_object(obj['__type__'], obj['__data__'])
else:
return obj
def _reconstruct_object(self, type_name: str, data: Dict) -> Any:
"""Reconstruct object from serialized data"""
# Import required classes
from ..models.core import (
OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets,
OrderBookMetrics, ImbalanceMetrics, ConsolidatedOrderBook,
PriceLevel, HeatmapPoint
)
# Map type names to classes
type_map = {
'OrderBookSnapshot': OrderBookSnapshot,
'TradeEvent': TradeEvent,
'HeatmapData': HeatmapData,
'PriceBuckets': PriceBuckets,
'OrderBookMetrics': OrderBookMetrics,
'ImbalanceMetrics': ImbalanceMetrics,
'ConsolidatedOrderBook': ConsolidatedOrderBook,
'PriceLevel': PriceLevel,
'HeatmapPoint': HeatmapPoint
}
if type_name in type_map:
cls = type_map[type_name]
# Recursively deserialize nested objects
processed_data = {}
for key, value in data.items():
if isinstance(value, dict) and '__datetime__' in value:
processed_data[key] = datetime.fromisoformat(value['__datetime__'])
elif isinstance(value, dict) and '__type__' in value:
processed_data[key] = self._reconstruct_object(
value['__type__'], value['__data__']
)
elif isinstance(value, list):
processed_data[key] = [
self._reconstruct_object(item['__type__'], item['__data__'])
if isinstance(item, dict) and '__type__' in item
else item
for item in value
]
else:
processed_data[key] = value
try:
return cls(**processed_data)
except Exception as e:
logger.warning(f"Failed to reconstruct {type_name}: {e}")
return processed_data
else:
logger.warning(f"Unknown type for reconstruction: {type_name}")
return data
def serialize_heatmap(self, heatmap: HeatmapData) -> bytes:
"""Specialized serialization for heatmap data"""
try:
# Create optimized representation
heatmap_dict = {
'symbol': heatmap.symbol,
'timestamp': heatmap.timestamp.isoformat(),
'bucket_size': heatmap.bucket_size,
'points': [
{
'p': point.price, # price
'v': point.volume, # volume
'i': point.intensity, # intensity
's': point.side # side
}
for point in heatmap.data
]
}
return self.serialize(heatmap_dict, 'json')
except Exception as e:
logger.error(f"Heatmap serialization error: {e}")
# Fallback to standard serialization
return self.serialize(heatmap, 'pickle')
def deserialize_heatmap(self, data: bytes) -> HeatmapData:
"""Specialized deserialization for heatmap data"""
try:
# Try optimized format first
heatmap_dict = self.deserialize(data, 'json')
if isinstance(heatmap_dict, dict) and 'points' in heatmap_dict:
from ..models.core import HeatmapData, HeatmapPoint
# Reconstruct heatmap points
points = []
for point_data in heatmap_dict['points']:
point = HeatmapPoint(
price=point_data['p'],
volume=point_data['v'],
intensity=point_data['i'],
side=point_data['s']
)
points.append(point)
# Create heatmap
heatmap = HeatmapData(
symbol=heatmap_dict['symbol'],
timestamp=datetime.fromisoformat(heatmap_dict['timestamp']),
bucket_size=heatmap_dict['bucket_size']
)
heatmap.data = points
return heatmap
else:
# Fallback to standard deserialization
return self.deserialize(data, 'pickle')
except Exception as e:
logger.error(f"Heatmap deserialization error: {e}")
# Final fallback
return self.deserialize(data, 'pickle')
def get_stats(self) -> Dict[str, Any]:
"""Get serialization statistics"""
return self.serialization_stats.copy()
def reset_stats(self) -> None:
"""Reset serialization statistics"""
self.serialization_stats = {
'serialized': 0,
'deserialized': 0,
'compression_ratio': 0.0,
'errors': 0
}
logger.info("Serialization statistics reset")