LLM proxy integration

2025-08-26 18:37:00 +03:00
parent 9a76624904
commit b404191ffa
5 changed files with 572 additions and 21 deletions
--- a/core/text_data_exporter.py
+++ b/core/text_data_exporter.py
@@ -41,7 +41,7 @@ class TextDataExporter:
    
    def __init__(self, 
                 data_provider=None,
-                 export_dir: str = "data/text_exports",
+                 export_dir: str = "NN/training/samples/txt",
                 main_symbol: str = "ETH/USDT",
                 ref1_symbol: str = "BTC/USDT", 
                 ref2_symbol: str = "SPX"):
@@ -116,7 +116,7 @@ class TextDataExporter:
            # Check if we need a new file (new minute)
            if self.current_minute != current_minute_key:
                self.current_minute = current_minute_key
-                self.current_filename = f"market_data_{current_minute_key}.csv"
+                self.current_filename = f"market_data_{current_minute_key}.txt"
                logger.info(f"Starting new export file: {self.current_filename}")
            
            # Gather data for all symbols and timeframes
@@ -193,7 +193,7 @@ class TextDataExporter:
            return None
    
    def _write_csv_file(self, export_data: List[Dict[str, Any]]):
-        """Write data to CSV file"""
+        """Write data to TXT file in tab-separated format"""
        if not export_data:
            return
            
@@ -201,25 +201,17 @@ class TextDataExporter:
        
        with self.export_lock:
            try:
-                with open(filepath, 'w', newline='', encoding='utf-8') as csvfile:
-                    # Create header based on the format specification
-                    fieldnames = self._create_csv_header()
-                    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-                    
-                    # Write header
-                    writer.writeheader()
-                    
-                    # Group data by symbol type for organized output
-                    grouped_data = self._group_data_by_symbol(export_data)
-                    
-                    # Write data rows
-                    for row in self._format_csv_rows(grouped_data):
-                        writer.writerow(row)
+                # Group data by symbol type for organized output
+                grouped_data = self._group_data_by_symbol(export_data)
+                
+                with open(filepath, 'w', encoding='utf-8') as txtfile:
+                    # Write in the format specified in readme.md sample
+                    self._write_tab_format(txtfile, grouped_data)
                        
                logger.debug(f"Exported {len(export_data)} data points to {filepath}")
                
            except Exception as e:
-                logger.error(f"Error writing CSV file {filepath}: {e}")
+                logger.error(f"Error writing TXT file {filepath}: {e}")
    
    def _create_csv_header(self) -> List[str]:
        """Create CSV header based on specification"""
@@ -288,6 +280,57 @@ class TextDataExporter:
        rows.append(row)
        return rows
    
+    def _write_tab_format(self, txtfile, grouped_data: Dict[str, Dict[str, Dict[str, Any]]]):
+        """Write data in tab-separated format like readme.md sample"""
+        # Write header structure
+        txtfile.write("symbol\tMAIN SYMBOL (ETH)\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tREF1 (BTC)\t\t\t\t\t\tREF2 (SPX)\t\t\t\t\t\tREF3 (SOL)\n")
+        txtfile.write("timeframe\t1s\t\t\t\t\t\t1m\t\t\t\t\t\t1h\t\t\t\t\t\t1d\t\t\t\t\t\t1s\t\t\t\t\t\t1s\t\t\t\t\t\t1s\n")
+        txtfile.write("datapoint\tO\tH\tL\tC\tV\tTimestamp\tO\tH\tL\tC\tV\tTimestamp\tO\tH\tL\tC\tV\tTimestamp\tO\tH\tL\tC\tV\tTimestamp\tO\tH\tL\tC\tV\tTimestamp\tO\tH\tL\tC\tV\tTimestamp\tO\tH\tL\tC\tV\tTimestamp\n")
+        
+        # Write data row
+        row_parts = []
+        current_time = datetime.now()
+        
+        # Timestamp first
+        row_parts.append(current_time.strftime("%Y-%m-%dT%H:%M:%SZ"))
+        
+        # ETH data for all timeframes (1s, 1m, 1h, 1d)
+        main_data = grouped_data.get('MAIN', {})
+        for timeframe in ['1s', '1m', '1h', '1d']:
+            data_point = main_data.get(timeframe)
+            if data_point:
+                row_parts.extend([
+                    f"{data_point['open']:.2f}",
+                    f"{data_point['high']:.2f}",
+                    f"{data_point['low']:.2f}",
+                    f"{data_point['close']:.2f}",
+                    f"{data_point['volume']:.1f}",
+                    data_point['timestamp'].strftime("%Y-%m-%dT%H:%M:%SZ")
+                ])
+            else:
+                row_parts.extend(["0", "0", "0", "0", "0", current_time.strftime("%Y-%m-%dT%H:%M:%SZ")])
+        
+        # REF1 (BTC), REF2 (SPX), REF3 (SOL) - 1s timeframe only
+        for ref_type in ['REF1', 'REF2']:  # REF3 will be added by LLM proxy
+            ref_data = grouped_data.get(ref_type, {})
+            data_point = ref_data.get('1s')
+            if data_point:
+                row_parts.extend([
+                    f"{data_point['open']:.2f}",
+                    f"{data_point['high']:.2f}",
+                    f"{data_point['low']:.2f}",
+                    f"{data_point['close']:.2f}",
+                    f"{data_point['volume']:.1f}",
+                    data_point['timestamp'].strftime("%Y-%m-%dT%H:%M:%SZ")
+                ])
+            else:
+                row_parts.extend(["0", "0", "0", "0", "0", current_time.strftime("%Y-%m-%dT%H:%M:%SZ")])
+        
+        # Add placeholder for REF3 (SOL) - will be filled by LLM proxy
+        row_parts.extend(["0", "0", "0", "0", "0", current_time.strftime("%Y-%m-%dT%H:%M:%SZ")])
+        
+        txtfile.write("\t".join(row_parts) + "\n")
+    
    def get_current_filename(self) -> Optional[str]:
        """Get current export filename"""
        return self.current_filename
@@ -308,7 +351,7 @@ class TextDataExporter:
        
        # Add file count
        try:
-            files = [f for f in os.listdir(self.export_dir) if f.endswith('.csv')]
+            files = [f for f in os.listdir(self.export_dir) if f.endswith('.txt')]
            stats['total_files'] = len(files)
        except:
            stats['total_files'] = 0