using LLM for sentiment analysis

2025-09-25 00:52:01 +03:00
parent 1f35258a66
commit d68c915fd5
21 changed files with 2767 additions and 780 deletions
--- a/setup_advanced_hf_runner.sh
+++ b/setup_advanced_hf_runner.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+
+# Advanced Hugging Face Model Runner with Parallelism
+# This script sets up a Docker-based solution that mimics Docker Model Runner functionality
+# Specifically designed for HF models not available in LM Studio
+
+set -e
+
+echo "=== Advanced Hugging Face Model Runner Setup ==="
+echo "Designed for models not available in LM Studio with parallelism support"
+echo ""
+
+# Create project directory
+PROJECT_DIR="$HOME/hf-model-runner"
+mkdir -p "$PROJECT_DIR"
+cd "$PROJECT_DIR"
+
+echo "Project directory: $PROJECT_DIR"
+
+# Create Docker Compose configuration with GPU support and parallelism
+cat > docker-compose.yml << 'EOF'
+version: '3.8'
+
+services:
+  # Main model server with GPU support and parallelism
+  llama-cpp-server:
+    image: ghcr.io/ggerganov/llama.cpp:server
+    container_name: hf-model-server
+    ports:
+      - "8080:8080"
+    volumes:
+      - ./models:/models
+      - ./config:/config
+    environment:
+      - MODEL_PATH=/models
+      - GPU_LAYERS=35  # Adjust based on your GPU memory
+      - THREADS=8      # CPU threads for parallelism
+      - BATCH_SIZE=512 # Batch size for parallel processing
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    command: >
+      --model /models/current_model.gguf
+      --host 0.0.0.0
+      --port 8080
+      --n-gpu-layers 35
+      --threads 8
+      --batch-size 512
+      --parallel
+      --cont-batching
+      --ctx-size 4096
+      --keep-alive 300
+      --log-format json
+    restart: unless-stopped
+
+  # Alternative: vLLM server for even better parallelism
+  vllm-server:
+    image: vllm/vllm-openai:latest
+    container_name: hf-vllm-server
+    ports:
+      - "8000:8000"
+    volumes:
+      - ./models:/models
+    environment:
+      - CUDA_VISIBLE_DEVICES=0
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    command: >
+      --model /models/current_model
+      --host 0.0.0.0
+      --port 8000
+      --tensor-parallel-size 1
+      --gpu-memory-utilization 0.9
+      --max-model-len 4096
+      --trust-remote-code
+    restart: unless-stopped
+    profiles:
+      - vllm
+
+  # Model management service
+  model-manager:
+    image: python:3.11-slim
+    container_name: hf-model-manager
+    volumes:
+      - ./models:/models
+      - ./scripts:/scripts
+      - ./config:/config
+    working_dir: /scripts
+    command: python model_manager.py
+    restart: unless-stopped
+    depends_on:
+      - llama-cpp-server
+
+EOF
+
+# Create model management script
+mkdir -p scripts
+cat > scripts/model_manager.py << 'EOF'
+#!/usr/bin/env python3
+"""
+Hugging Face Model Manager
+Downloads and manages HF models with GGUF format support
+"""
+
+import os
+import json
+import requests
+import subprocess
+from pathlib import Path
+from huggingface_hub import hf_hub_download, list_repo_files
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+class HFModelManager:
+    def __init__(self, models_dir="/models"):
+        self.models_dir = Path(models_dir)
+        self.models_dir.mkdir(exist_ok=True)
+        self.config_file = Path("/config/models.json")
+        
+    def list_available_models(self, repo_id):
+        """List available GGUF models in a HF repository"""
+        try:
+            files = list_repo_files(repo_id)
+            gguf_files = [f for f in files if f.endswith('.gguf')]
+            return gguf_files
+        except Exception as e:
+            logger.error(f"Error listing models for {repo_id}: {e}")
+            return []
+    
+    def download_model(self, repo_id, filename=None):
+        """Download a GGUF model from Hugging Face"""
+        try:
+            if filename is None:
+                # Get the largest GGUF file
+                files = self.list_available_models(repo_id)
+                if not files:
+                    raise ValueError(f"No GGUF files found in {repo_id}")
+                
+                # Sort by size (largest first) - approximate by filename
+                gguf_files = sorted(files, key=lambda x: x.lower(), reverse=True)
+                filename = gguf_files[0]
+                logger.info(f"Auto-selected model: {filename}")
+            
+            logger.info(f"Downloading {repo_id}/{filename}...")
+            
+            # Download the model
+            model_path = hf_hub_download(
+                repo_id=repo_id,
+                filename=filename,
+                local_dir=self.models_dir,
+                local_dir_use_symlinks=False
+            )
+            
+            # Create symlink for current model
+            current_model_path = self.models_dir / "current_model.gguf"
+            if current_model_path.exists():
+                current_model_path.unlink()
+            current_model_path.symlink_to(Path(model_path).name)
+            
+            logger.info(f"Model downloaded to: {model_path}")
+            logger.info(f"Current model symlink: {current_model_path}")
+            
+            return model_path
+            
+        except Exception as e:
+            logger.error(f"Error downloading model: {e}")
+            raise
+    
+    def get_model_info(self, repo_id):
+        """Get information about a model repository"""
+        try:
+            # This would typically use HF API
+            return {
+                "repo_id": repo_id,
+                "available_files": self.list_available_models(repo_id),
+                "status": "available"
+            }
+        except Exception as e:
+            logger.error(f"Error getting model info: {e}")
+            return None
+
+def main():
+    manager = HFModelManager()
+    
+    # Example: Download a specific model
+    # You can modify this to download any HF model
+    repo_id = "microsoft/DialoGPT-medium"  # Example model
+    
+    print(f"Managing models in: {manager.models_dir}")
+    print(f"Available models: {manager.list_available_models(repo_id)}")
+    
+    # Uncomment to download a model:
+    # manager.download_model(repo_id)
+
+if __name__ == "__main__":
+    main()
+EOF
+
+# Create configuration directory
+mkdir -p config
+cat > config/models.json << 'EOF'
+{
+  "available_models": {
+    "microsoft/DialoGPT-medium": {
+      "description": "Microsoft DialoGPT Medium",
+      "size": "345M",
+      "format": "gguf"
+    },
+    "microsoft/DialoGPT-large": {
+      "description": "Microsoft DialoGPT Large", 
+      "size": "774M",
+      "format": "gguf"
+    }
+  },
+  "current_model": null,
+  "settings": {
+    "gpu_layers": 35,
+    "threads": 8,
+    "batch_size": 512,
+    "context_size": 4096
+  }
+}
+EOF
+
+# Create model download script
+cat > download_model.sh << 'EOF'
+#!/bin/bash
+
+# Download specific Hugging Face model
+# Usage: ./download_model.sh <repo_id> [filename]
+
+REPO_ID=${1:-"microsoft/DialoGPT-medium"}
+FILENAME=${2:-""}
+
+echo "=== Downloading Hugging Face Model ==="
+echo "Repository: $REPO_ID"
+echo "Filename: ${FILENAME:-"auto-select largest GGUF"}"
+echo ""
+
+# Install required Python packages
+pip install huggingface_hub transformers torch
+
+# Run the model manager to download the model
+docker-compose run --rm model-manager python -c "
+from model_manager import HFModelManager
+import sys
+
+manager = HFModelManager()
+try:
+    if '$FILENAME':
+        manager.download_model('$REPO_ID', '$FILENAME')
+    else:
+        manager.download_model('$REPO_ID')
+    print('Model downloaded successfully!')
+except Exception as e:
+    print(f'Error: {e}')
+    sys.exit(1)
+"
+
+echo ""
+echo "=== Model Download Complete ==="
+echo "You can now start the server with: docker-compose up"
+EOF
+
+chmod +x download_model.sh
+
+# Create API test script
+cat > test_api.sh << 'EOF'
+#!/bin/bash
+
+# Test the model API
+# Usage: ./test_api.sh [prompt]
+
+PROMPT=${1:-"Hello, how are you?"}
+API_URL="http://localhost:8080/completion"
+
+echo "=== Testing Model API ==="
+echo "Prompt: $PROMPT"
+echo "API URL: $API_URL"
+echo ""
+
+# Test the API
+curl -X POST "$API_URL" \
+  -H "Content-Type: application/json" \
+  -d "{
+    \"prompt\": \"$PROMPT\",
+    \"n_predict\": 100,
+    \"temperature\": 0.7,
+    \"top_p\": 0.9,
+    \"stream\": false
+  }" | jq '.'
+
+echo ""
+echo "=== API Test Complete ==="
+EOF
+
+chmod +x test_api.sh
+
+# Create startup script
+cat > start_server.sh << 'EOF'
+#!/bin/bash
+
+echo "=== Starting Hugging Face Model Server ==="
+echo ""
+
+# Check if NVIDIA GPU is available
+if command -v nvidia-smi &> /dev/null; then
+    echo "NVIDIA GPU detected:"
+    nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader,nounits
+    echo ""
+    echo "Starting with GPU acceleration..."
+    docker-compose up llama-cpp-server
+else
+    echo "No NVIDIA GPU detected, starting with CPU only..."
+    # Modify docker-compose to remove GPU requirements
+    sed 's/n-gpu-layers 35/n-gpu-layers 0/' docker-compose.yml > docker-compose-cpu.yml
+    docker-compose -f docker-compose-cpu.yml up llama-cpp-server
+fi
+EOF
+
+chmod +x start_server.sh
+
+echo ""
+echo "=== Setup Complete! ==="
+echo ""
+echo "Project directory: $PROJECT_DIR"
+echo ""
+echo "=== Next Steps ==="
+echo "1. Download a model:"
+echo "   ./download_model.sh microsoft/DialoGPT-medium"
+echo ""
+echo "2. Start the server:"
+echo "   ./start_server.sh"
+echo ""
+echo "3. Test the API:"
+echo "   ./test_api.sh 'Hello, how are you?'"
+echo ""
+echo "=== Available Commands ==="
+echo "- Download model: ./download_model.sh <repo_id> [filename]"
+echo "- Start server: ./start_server.sh"
+echo "- Test API: ./test_api.sh [prompt]"
+echo "- View logs: docker-compose logs -f llama-cpp-server"
+echo "- Stop server: docker-compose down"
+echo ""
+echo "=== Parallelism Features ==="
+echo "- GPU acceleration with NVIDIA support"
+echo "- Multi-threading for CPU processing"
+echo "- Batch processing for efficiency"
+echo "- Continuous batching for multiple requests"
+echo ""
+echo "=== OpenAI-Compatible API ==="
+echo "The server provides OpenAI-compatible endpoints:"
+echo "- POST /completion - Text completion"
+echo "- POST /chat/completions - Chat completions"
+echo "- GET /models - List available models"