LLM proxy integration

This commit is contained in:
Dobromir Popov
2025-08-26 18:37:00 +03:00
parent 9a76624904
commit b404191ffa
5 changed files with 572 additions and 21 deletions

View File

@@ -41,7 +41,7 @@ class TextDataExporter:
def __init__(self,
data_provider=None,
export_dir: str = "data/text_exports",
export_dir: str = "NN/training/samples/txt",
main_symbol: str = "ETH/USDT",
ref1_symbol: str = "BTC/USDT",
ref2_symbol: str = "SPX"):
@@ -116,7 +116,7 @@ class TextDataExporter:
# Check if we need a new file (new minute)
if self.current_minute != current_minute_key:
self.current_minute = current_minute_key
self.current_filename = f"market_data_{current_minute_key}.csv"
self.current_filename = f"market_data_{current_minute_key}.txt"
logger.info(f"Starting new export file: {self.current_filename}")
# Gather data for all symbols and timeframes
@@ -193,7 +193,7 @@ class TextDataExporter:
return None
def _write_csv_file(self, export_data: List[Dict[str, Any]]):
"""Write data to CSV file"""
"""Write data to TXT file in tab-separated format"""
if not export_data:
return
@@ -201,25 +201,17 @@ class TextDataExporter:
with self.export_lock:
try:
with open(filepath, 'w', newline='', encoding='utf-8') as csvfile:
# Create header based on the format specification
fieldnames = self._create_csv_header()
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
# Write header
writer.writeheader()
# Group data by symbol type for organized output
grouped_data = self._group_data_by_symbol(export_data)
# Write data rows
for row in self._format_csv_rows(grouped_data):
writer.writerow(row)
# Group data by symbol type for organized output
grouped_data = self._group_data_by_symbol(export_data)
with open(filepath, 'w', encoding='utf-8') as txtfile:
# Write in the format specified in readme.md sample
self._write_tab_format(txtfile, grouped_data)
logger.debug(f"Exported {len(export_data)} data points to {filepath}")
except Exception as e:
logger.error(f"Error writing CSV file {filepath}: {e}")
logger.error(f"Error writing TXT file {filepath}: {e}")
def _create_csv_header(self) -> List[str]:
"""Create CSV header based on specification"""
@@ -288,6 +280,57 @@ class TextDataExporter:
rows.append(row)
return rows
def _write_tab_format(self, txtfile, grouped_data: Dict[str, Dict[str, Dict[str, Any]]]):
"""Write data in tab-separated format like readme.md sample"""
# Write header structure
txtfile.write("symbol\tMAIN SYMBOL (ETH)\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tREF1 (BTC)\t\t\t\t\t\tREF2 (SPX)\t\t\t\t\t\tREF3 (SOL)\n")
txtfile.write("timeframe\t1s\t\t\t\t\t\t1m\t\t\t\t\t\t1h\t\t\t\t\t\t1d\t\t\t\t\t\t1s\t\t\t\t\t\t1s\t\t\t\t\t\t1s\n")
txtfile.write("datapoint\tO\tH\tL\tC\tV\tTimestamp\tO\tH\tL\tC\tV\tTimestamp\tO\tH\tL\tC\tV\tTimestamp\tO\tH\tL\tC\tV\tTimestamp\tO\tH\tL\tC\tV\tTimestamp\tO\tH\tL\tC\tV\tTimestamp\tO\tH\tL\tC\tV\tTimestamp\n")
# Write data row
row_parts = []
current_time = datetime.now()
# Timestamp first
row_parts.append(current_time.strftime("%Y-%m-%dT%H:%M:%SZ"))
# ETH data for all timeframes (1s, 1m, 1h, 1d)
main_data = grouped_data.get('MAIN', {})
for timeframe in ['1s', '1m', '1h', '1d']:
data_point = main_data.get(timeframe)
if data_point:
row_parts.extend([
f"{data_point['open']:.2f}",
f"{data_point['high']:.2f}",
f"{data_point['low']:.2f}",
f"{data_point['close']:.2f}",
f"{data_point['volume']:.1f}",
data_point['timestamp'].strftime("%Y-%m-%dT%H:%M:%SZ")
])
else:
row_parts.extend(["0", "0", "0", "0", "0", current_time.strftime("%Y-%m-%dT%H:%M:%SZ")])
# REF1 (BTC), REF2 (SPX), REF3 (SOL) - 1s timeframe only
for ref_type in ['REF1', 'REF2']: # REF3 will be added by LLM proxy
ref_data = grouped_data.get(ref_type, {})
data_point = ref_data.get('1s')
if data_point:
row_parts.extend([
f"{data_point['open']:.2f}",
f"{data_point['high']:.2f}",
f"{data_point['low']:.2f}",
f"{data_point['close']:.2f}",
f"{data_point['volume']:.1f}",
data_point['timestamp'].strftime("%Y-%m-%dT%H:%M:%SZ")
])
else:
row_parts.extend(["0", "0", "0", "0", "0", current_time.strftime("%Y-%m-%dT%H:%M:%SZ")])
# Add placeholder for REF3 (SOL) - will be filled by LLM proxy
row_parts.extend(["0", "0", "0", "0", "0", current_time.strftime("%Y-%m-%dT%H:%M:%SZ")])
txtfile.write("\t".join(row_parts) + "\n")
def get_current_filename(self) -> Optional[str]:
"""Get current export filename"""
return self.current_filename
@@ -308,7 +351,7 @@ class TextDataExporter:
# Add file count
try:
files = [f for f in os.listdir(self.export_dir) if f.endswith('.csv')]
files = [f for f in os.listdir(self.export_dir) if f.endswith('.txt')]
stats['total_files'] = len(files)
except:
stats['total_files'] = 0