caching
This commit is contained in:
355
COBY/caching/data_serializer.py
Normal file
355
COBY/caching/data_serializer.py
Normal file
@ -0,0 +1,355 @@
|
||||
"""
|
||||
Data serialization for Redis caching.
|
||||
"""
|
||||
|
||||
import json
|
||||
import pickle
|
||||
import gzip
|
||||
from typing import Any, Union, Dict, List
|
||||
from datetime import datetime
|
||||
from ..models.core import (
|
||||
OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets,
|
||||
OrderBookMetrics, ImbalanceMetrics, ConsolidatedOrderBook
|
||||
)
|
||||
from ..utils.logging import get_logger
|
||||
from ..utils.exceptions import ProcessingError
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class DataSerializer:
|
||||
"""
|
||||
Handles serialization and deserialization of data for Redis storage.
|
||||
|
||||
Supports multiple serialization formats:
|
||||
- JSON for simple data
|
||||
- Pickle for complex objects
|
||||
- Compressed formats for large data
|
||||
"""
|
||||
|
||||
def __init__(self, use_compression: bool = True):
|
||||
"""
|
||||
Initialize data serializer.
|
||||
|
||||
Args:
|
||||
use_compression: Whether to use gzip compression
|
||||
"""
|
||||
self.use_compression = use_compression
|
||||
self.serialization_stats = {
|
||||
'serialized': 0,
|
||||
'deserialized': 0,
|
||||
'compression_ratio': 0.0,
|
||||
'errors': 0
|
||||
}
|
||||
|
||||
logger.info(f"Data serializer initialized (compression: {use_compression})")
|
||||
|
||||
def serialize(self, data: Any, format_type: str = 'auto') -> bytes:
|
||||
"""
|
||||
Serialize data for Redis storage.
|
||||
|
||||
Args:
|
||||
data: Data to serialize
|
||||
format_type: Serialization format ('json', 'pickle', 'auto')
|
||||
|
||||
Returns:
|
||||
bytes: Serialized data
|
||||
"""
|
||||
try:
|
||||
# Determine format
|
||||
if format_type == 'auto':
|
||||
format_type = self._determine_format(data)
|
||||
|
||||
# Serialize based on format
|
||||
if format_type == 'json':
|
||||
serialized = self._serialize_json(data)
|
||||
elif format_type == 'pickle':
|
||||
serialized = self._serialize_pickle(data)
|
||||
else:
|
||||
raise ValueError(f"Unsupported format: {format_type}")
|
||||
|
||||
# Apply compression if enabled
|
||||
if self.use_compression:
|
||||
original_size = len(serialized)
|
||||
serialized = gzip.compress(serialized)
|
||||
compressed_size = len(serialized)
|
||||
|
||||
# Update compression ratio
|
||||
if original_size > 0:
|
||||
ratio = compressed_size / original_size
|
||||
self.serialization_stats['compression_ratio'] = (
|
||||
(self.serialization_stats['compression_ratio'] *
|
||||
self.serialization_stats['serialized'] + ratio) /
|
||||
(self.serialization_stats['serialized'] + 1)
|
||||
)
|
||||
|
||||
self.serialization_stats['serialized'] += 1
|
||||
return serialized
|
||||
|
||||
except Exception as e:
|
||||
self.serialization_stats['errors'] += 1
|
||||
logger.error(f"Serialization error: {e}")
|
||||
raise ProcessingError(f"Serialization failed: {e}", "SERIALIZE_ERROR")
|
||||
|
||||
def deserialize(self, data: bytes, format_type: str = 'auto') -> Any:
|
||||
"""
|
||||
Deserialize data from Redis storage.
|
||||
|
||||
Args:
|
||||
data: Serialized data
|
||||
format_type: Expected format ('json', 'pickle', 'auto')
|
||||
|
||||
Returns:
|
||||
Any: Deserialized data
|
||||
"""
|
||||
try:
|
||||
# Decompress if needed
|
||||
if self.use_compression:
|
||||
try:
|
||||
data = gzip.decompress(data)
|
||||
except gzip.BadGzipFile:
|
||||
# Data might not be compressed
|
||||
pass
|
||||
|
||||
# Determine format if auto
|
||||
if format_type == 'auto':
|
||||
format_type = self._detect_format(data)
|
||||
|
||||
# Deserialize based on format
|
||||
if format_type == 'json':
|
||||
result = self._deserialize_json(data)
|
||||
elif format_type == 'pickle':
|
||||
result = self._deserialize_pickle(data)
|
||||
else:
|
||||
raise ValueError(f"Unsupported format: {format_type}")
|
||||
|
||||
self.serialization_stats['deserialized'] += 1
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
self.serialization_stats['errors'] += 1
|
||||
logger.error(f"Deserialization error: {e}")
|
||||
raise ProcessingError(f"Deserialization failed: {e}", "DESERIALIZE_ERROR")
|
||||
|
||||
def _determine_format(self, data: Any) -> str:
|
||||
"""Determine best serialization format for data"""
|
||||
# Use JSON for simple data types
|
||||
if isinstance(data, (dict, list, str, int, float, bool)) or data is None:
|
||||
return 'json'
|
||||
|
||||
# Use pickle for complex objects
|
||||
return 'pickle'
|
||||
|
||||
def _detect_format(self, data: bytes) -> str:
|
||||
"""Detect serialization format from data"""
|
||||
try:
|
||||
# Try JSON first
|
||||
json.loads(data.decode('utf-8'))
|
||||
return 'json'
|
||||
except (json.JSONDecodeError, UnicodeDecodeError):
|
||||
# Assume pickle
|
||||
return 'pickle'
|
||||
|
||||
def _serialize_json(self, data: Any) -> bytes:
|
||||
"""Serialize data as JSON"""
|
||||
# Convert complex objects to dictionaries
|
||||
if hasattr(data, '__dict__'):
|
||||
data = self._object_to_dict(data)
|
||||
elif isinstance(data, list):
|
||||
data = [self._object_to_dict(item) if hasattr(item, '__dict__') else item
|
||||
for item in data]
|
||||
|
||||
json_str = json.dumps(data, default=self._json_serializer, ensure_ascii=False)
|
||||
return json_str.encode('utf-8')
|
||||
|
||||
def _deserialize_json(self, data: bytes) -> Any:
|
||||
"""Deserialize JSON data"""
|
||||
json_str = data.decode('utf-8')
|
||||
return json.loads(json_str, object_hook=self._json_deserializer)
|
||||
|
||||
def _serialize_pickle(self, data: Any) -> bytes:
|
||||
"""Serialize data as pickle"""
|
||||
return pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
def _deserialize_pickle(self, data: bytes) -> Any:
|
||||
"""Deserialize pickle data"""
|
||||
return pickle.loads(data)
|
||||
|
||||
def _object_to_dict(self, obj: Any) -> Dict:
|
||||
"""Convert object to dictionary for JSON serialization"""
|
||||
if isinstance(obj, (OrderBookSnapshot, TradeEvent, HeatmapData,
|
||||
PriceBuckets, OrderBookMetrics, ImbalanceMetrics,
|
||||
ConsolidatedOrderBook)):
|
||||
result = {
|
||||
'__type__': obj.__class__.__name__,
|
||||
'__data__': {}
|
||||
}
|
||||
|
||||
# Convert object attributes
|
||||
for key, value in obj.__dict__.items():
|
||||
if isinstance(value, datetime):
|
||||
result['__data__'][key] = {
|
||||
'__datetime__': value.isoformat()
|
||||
}
|
||||
elif isinstance(value, list):
|
||||
result['__data__'][key] = [
|
||||
self._object_to_dict(item) if hasattr(item, '__dict__') else item
|
||||
for item in value
|
||||
]
|
||||
elif hasattr(value, '__dict__'):
|
||||
result['__data__'][key] = self._object_to_dict(value)
|
||||
else:
|
||||
result['__data__'][key] = value
|
||||
|
||||
return result
|
||||
else:
|
||||
return obj.__dict__ if hasattr(obj, '__dict__') else obj
|
||||
|
||||
def _json_serializer(self, obj: Any) -> Any:
|
||||
"""Custom JSON serializer for special types"""
|
||||
if isinstance(obj, datetime):
|
||||
return {'__datetime__': obj.isoformat()}
|
||||
elif hasattr(obj, '__dict__'):
|
||||
return self._object_to_dict(obj)
|
||||
else:
|
||||
return str(obj)
|
||||
|
||||
def _json_deserializer(self, obj: Dict) -> Any:
|
||||
"""Custom JSON deserializer for special types"""
|
||||
if '__datetime__' in obj:
|
||||
return datetime.fromisoformat(obj['__datetime__'])
|
||||
elif '__type__' in obj and '__data__' in obj:
|
||||
return self._reconstruct_object(obj['__type__'], obj['__data__'])
|
||||
else:
|
||||
return obj
|
||||
|
||||
def _reconstruct_object(self, type_name: str, data: Dict) -> Any:
|
||||
"""Reconstruct object from serialized data"""
|
||||
# Import required classes
|
||||
from ..models.core import (
|
||||
OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets,
|
||||
OrderBookMetrics, ImbalanceMetrics, ConsolidatedOrderBook,
|
||||
PriceLevel, HeatmapPoint
|
||||
)
|
||||
|
||||
# Map type names to classes
|
||||
type_map = {
|
||||
'OrderBookSnapshot': OrderBookSnapshot,
|
||||
'TradeEvent': TradeEvent,
|
||||
'HeatmapData': HeatmapData,
|
||||
'PriceBuckets': PriceBuckets,
|
||||
'OrderBookMetrics': OrderBookMetrics,
|
||||
'ImbalanceMetrics': ImbalanceMetrics,
|
||||
'ConsolidatedOrderBook': ConsolidatedOrderBook,
|
||||
'PriceLevel': PriceLevel,
|
||||
'HeatmapPoint': HeatmapPoint
|
||||
}
|
||||
|
||||
if type_name in type_map:
|
||||
cls = type_map[type_name]
|
||||
|
||||
# Recursively deserialize nested objects
|
||||
processed_data = {}
|
||||
for key, value in data.items():
|
||||
if isinstance(value, dict) and '__datetime__' in value:
|
||||
processed_data[key] = datetime.fromisoformat(value['__datetime__'])
|
||||
elif isinstance(value, dict) and '__type__' in value:
|
||||
processed_data[key] = self._reconstruct_object(
|
||||
value['__type__'], value['__data__']
|
||||
)
|
||||
elif isinstance(value, list):
|
||||
processed_data[key] = [
|
||||
self._reconstruct_object(item['__type__'], item['__data__'])
|
||||
if isinstance(item, dict) and '__type__' in item
|
||||
else item
|
||||
for item in value
|
||||
]
|
||||
else:
|
||||
processed_data[key] = value
|
||||
|
||||
try:
|
||||
return cls(**processed_data)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to reconstruct {type_name}: {e}")
|
||||
return processed_data
|
||||
else:
|
||||
logger.warning(f"Unknown type for reconstruction: {type_name}")
|
||||
return data
|
||||
|
||||
def serialize_heatmap(self, heatmap: HeatmapData) -> bytes:
|
||||
"""Specialized serialization for heatmap data"""
|
||||
try:
|
||||
# Create optimized representation
|
||||
heatmap_dict = {
|
||||
'symbol': heatmap.symbol,
|
||||
'timestamp': heatmap.timestamp.isoformat(),
|
||||
'bucket_size': heatmap.bucket_size,
|
||||
'points': [
|
||||
{
|
||||
'p': point.price, # price
|
||||
'v': point.volume, # volume
|
||||
'i': point.intensity, # intensity
|
||||
's': point.side # side
|
||||
}
|
||||
for point in heatmap.data
|
||||
]
|
||||
}
|
||||
|
||||
return self.serialize(heatmap_dict, 'json')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Heatmap serialization error: {e}")
|
||||
# Fallback to standard serialization
|
||||
return self.serialize(heatmap, 'pickle')
|
||||
|
||||
def deserialize_heatmap(self, data: bytes) -> HeatmapData:
|
||||
"""Specialized deserialization for heatmap data"""
|
||||
try:
|
||||
# Try optimized format first
|
||||
heatmap_dict = self.deserialize(data, 'json')
|
||||
|
||||
if isinstance(heatmap_dict, dict) and 'points' in heatmap_dict:
|
||||
from ..models.core import HeatmapData, HeatmapPoint
|
||||
|
||||
# Reconstruct heatmap points
|
||||
points = []
|
||||
for point_data in heatmap_dict['points']:
|
||||
point = HeatmapPoint(
|
||||
price=point_data['p'],
|
||||
volume=point_data['v'],
|
||||
intensity=point_data['i'],
|
||||
side=point_data['s']
|
||||
)
|
||||
points.append(point)
|
||||
|
||||
# Create heatmap
|
||||
heatmap = HeatmapData(
|
||||
symbol=heatmap_dict['symbol'],
|
||||
timestamp=datetime.fromisoformat(heatmap_dict['timestamp']),
|
||||
bucket_size=heatmap_dict['bucket_size']
|
||||
)
|
||||
heatmap.data = points
|
||||
|
||||
return heatmap
|
||||
else:
|
||||
# Fallback to standard deserialization
|
||||
return self.deserialize(data, 'pickle')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Heatmap deserialization error: {e}")
|
||||
# Final fallback
|
||||
return self.deserialize(data, 'pickle')
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get serialization statistics"""
|
||||
return self.serialization_stats.copy()
|
||||
|
||||
def reset_stats(self) -> None:
|
||||
"""Reset serialization statistics"""
|
||||
self.serialization_stats = {
|
||||
'serialized': 0,
|
||||
'deserialized': 0,
|
||||
'compression_ratio': 0.0,
|
||||
'errors': 0
|
||||
}
|
||||
logger.info("Serialization statistics reset")
|
Reference in New Issue
Block a user