timeseries storage

This commit is contained in:
Dobromir Popov 2025-03-18 23:03:30 +02:00
parent c1ad6cddd6
commit 4aefca2d6c

View File

@ -52,16 +52,38 @@ class TradeTickStorage:
def get_ticks_as_df(self) -> pd.DataFrame:
"""Return ticks as a DataFrame"""
if not self.ticks:
logger.warning("No ticks available for DataFrame conversion")
return pd.DataFrame()
df = pd.DataFrame(self.ticks)
if not df.empty:
logger.debug(f"Converting timestamps for {len(df)} ticks")
# Ensure timestamp column exists
if 'timestamp' not in df.columns:
logger.error("Tick data missing timestamp column")
return pd.DataFrame()
# Check timestamp datatype before conversion
sample_ts = df['timestamp'].iloc[0] if len(df) > 0 else None
logger.debug(f"Sample timestamp before conversion: {sample_ts}, type: {type(sample_ts)}")
# Convert timestamps to datetime
try:
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
logger.debug(f"Timestamps converted to datetime successfully")
if len(df) > 0:
logger.debug(f"Sample converted timestamp: {df['timestamp'].iloc[0]}")
except Exception as e:
logger.error(f"Error converting timestamps: {str(e)}")
import traceback
logger.error(traceback.format_exc())
return pd.DataFrame()
return df
def get_candles(self, interval_seconds: int = 1) -> pd.DataFrame:
"""Convert ticks to OHLCV candles at specified interval"""
if not self.ticks:
logger.warning("No ticks available for candle formation")
return pd.DataFrame()
# Ensure ticks are up to date
@ -70,29 +92,53 @@ class TradeTickStorage:
# Convert to DataFrame
df = self.get_ticks_as_df()
if df.empty:
logger.warning("Tick DataFrame is empty after conversion")
return pd.DataFrame()
logger.info(f"Preparing to create candles from {len(df)} ticks")
try:
# Use timestamp column for resampling
df = df.set_index('timestamp')
# Create interval string for resampling
interval_str = f'{interval_seconds}S'
# Create interval string for resampling - use 's' instead of deprecated 'S'
interval_str = f'{interval_seconds}s'
# Resample to create OHLCV candles
logger.debug(f"Resampling with interval: {interval_str}")
candles = df.resample(interval_str).agg({
'price': ['first', 'max', 'min', 'last'],
'volume': 'sum'
})
# Check if resampling produced any data
if candles.empty:
logger.warning("Resampling produced empty dataframe - check timestamp distribution")
# Show timestamp ranges to diagnose potential resampling issues
if not df.empty:
min_time = df.index.min()
max_time = df.index.max()
logger.info(f"Tick timestamp range: {min_time} to {max_time}")
return pd.DataFrame()
# Flatten MultiIndex columns
candles.columns = ['open', 'high', 'low', 'close', 'volume']
# Reset index to get timestamp as column
candles = candles.reset_index()
# Ensure no NaN values
candles = candles.dropna()
logger.debug(f"Generated {len(candles)} candles from {len(self.ticks)} ticks")
return candles
except Exception as e:
logger.error(f"Error in candle formation: {str(e)}")
import traceback
logger.error(traceback.format_exc())
return pd.DataFrame()
class CandlestickData:
def __init__(self, max_length: int = 300):
self.timestamps = deque(maxlen=max_length)
@ -508,7 +554,9 @@ class RealTimeChart:
Input('interval-store', 'data')]
)
def update_chart(n, interval_data):
try:
interval = interval_data.get('interval', 1)
logger.info(f"Updating chart for {self.symbol} with interval {interval}s")
fig = make_subplots(
rows=2, cols=1,
@ -521,7 +569,14 @@ class RealTimeChart:
# Get candlesticks from tick storage
df = self.tick_storage.get_candles(interval_seconds=interval)
if not df.empty:
# Debug information about the dataframe
logger.info(f"Candles dataframe empty: {df.empty}, tick count: {len(self.tick_storage.ticks)}")
if not df.empty and len(df) > 0:
logger.info(f"Candles dataframe shape: {df.shape}")
logger.info(f"Candles dataframe columns: {df.columns.tolist()}")
logger.info(f"Candles dataframe first row: {df.iloc[0].to_dict() if len(df) > 0 else 'No rows'}")
# Add candlestick chart
fig.add_trace(
go.Candlestick(
@ -551,8 +606,7 @@ class RealTimeChart:
row=2, col=1
)
# Add latest price line
if len(df) > 0:
# Add latest price line and annotation
latest_price = df['close'].iloc[-1]
fig.add_shape(
type="line",
@ -564,7 +618,6 @@ class RealTimeChart:
row=1, col=1
)
# Add price label
fig.add_annotation(
x=df['timestamp'].max(),
y=latest_price,
@ -574,6 +627,20 @@ class RealTimeChart:
xshift=50,
row=1, col=1
)
else:
# If no data, add a text annotation to the chart
logger.warning(f"No data to display for {self.symbol} - tick count: {len(self.tick_storage.ticks)}")
if self.tick_storage.ticks:
logger.info(f"Sample tick: {self.tick_storage.ticks[0]}")
# Add a message to the empty chart
fig.add_annotation(
x=0.5, y=0.5,
text=f"Waiting for {self.symbol} data...",
showarrow=False,
font=dict(size=20, color="white"),
xref="paper", yref="paper"
)
# Update layout with improved styling
fig.update_layout(
@ -600,6 +667,28 @@ class RealTimeChart:
return fig
except Exception as e:
logger.error(f"Error updating chart: {str(e)}")
import traceback
logger.error(traceback.format_exc())
# Create a minimal figure with error message
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text=f"Error updating chart: {str(e)}",
showarrow=False,
font=dict(size=14, color="red"),
xref="paper", yref="paper"
)
fig.update_layout(
height=800,
template='plotly_dark',
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)'
)
return fig
async def start_websocket(self):
ws = ExchangeWebSocket(self.symbol)
@ -610,6 +699,10 @@ class RealTimeChart:
continue
try:
logger.info(f"WebSocket connected for {self.symbol}, beginning data collection")
tick_count = 0
last_tick_count_log = time.time()
while True:
if not ws.running:
logger.warning("WebSocket not running, breaking loop")
@ -627,6 +720,7 @@ class RealTimeChart:
'high': data['high'],
'low': data['low']
}
logger.debug(f"Received kline data: {data}")
else:
# Use trade data
trade_data = {
@ -634,14 +728,26 @@ class RealTimeChart:
'price': data['price'],
'volume': data['volume']
}
logger.debug(f"Updating candlestick with data: {trade_data}")
# Store raw tick in the tick storage
self.tick_storage.add_tick(trade_data)
tick_count += 1
# Also update the old candlestick data for backward compatibility
self.candlestick_data.update_from_trade(trade_data)
# Log tick counts periodically
current_time = time.time()
if current_time - last_tick_count_log >= 10: # Log every 10 seconds
logger.info(f"{self.symbol}: Collected {tick_count} ticks in last {current_time - last_tick_count_log:.1f}s, total: {len(self.tick_storage.ticks)}")
last_tick_count_log = current_time
tick_count = 0
# Check if ticks are being converted to candles
if len(self.tick_storage.ticks) > 0:
sample_df = self.tick_storage.get_candles(interval_seconds=1)
logger.info(f"{self.symbol}: Sample candle count: {len(sample_df)}")
await asyncio.sleep(0.01)
except Exception as e:
logger.error(f"Error in WebSocket loop: {str(e)}")
@ -652,8 +758,23 @@ class RealTimeChart:
await asyncio.sleep(5)
def run(self, host='localhost', port=8050):
logger.info(f"Starting Dash server on {host}:{port}")
try:
logger.info(f"Starting Dash server for {self.symbol} on {host}:{port}")
self.app.run(debug=False, host=host, port=port)
except Exception as e:
logger.error(f"Error running Dash server on port {port}: {str(e)}")
# Try an alternative port if the primary one is in use
if "Address already in use" in str(e):
alt_port = port + 100
logger.warning(f"Port {port} is busy, trying alternative port {alt_port}")
try:
self.app.run(debug=False, host=host, port=alt_port)
except Exception as alt_e:
logger.error(f"Error running Dash server on alternative port {alt_port}: {str(alt_e)}")
else:
# Re-raise other exceptions
raise
async def main():
symbols = ["ETH/USDT", "BTC/USDT"]
@ -672,10 +793,12 @@ async def main():
server_threads = []
for i, chart in enumerate(charts):
port = 8050 + i # Use different ports for each chart
thread = Thread(target=lambda c=chart, p=8050+i: c.run(port=p)) # Fix lambda capture
logger.info(f"Starting chart for {chart.symbol} on port {port}")
thread = Thread(target=lambda c=chart, p=port: c.run(port=p)) # Ensure correct port is passed
thread.daemon = True
thread.start()
server_threads.append(thread)
logger.info(f"Thread started for {chart.symbol} on port {port}")
try:
# Keep the main task running