nim_vlm_blueprint

#!/usr/bin/env python3
"""
VSS (Video Search System) Docker Setup Script
This script sets up the complete VSS environment using Docker containers
including NIM models for LLM, reranking, and embeddings.
"""

import os
import json
import subprocess
import time
import sys
from pathlib import Path

def setup_environment_variables():
    """Set up all required environment variables for VSS"""
    # NGC API Key for NVIDIA containers
    os.environ["NGC_API_KEY"] = "my key"

    # LLM endpoints for VIA engine to consume
    os.environ["LLM_ENDPOINT"] = "http://localhost:8000"
    os.environ["RERANK_ENDPOINT"] = "http://localhost:8007"
    os.environ["EMBEDDING_ENDPOINT"] = "http://localhost:8006"

    # Port configurations - No conflicts detected
    os.environ["BACKEND_PORT"] = "60000"     # VSS Backend internal port
    os.environ["FRONTEND_PORT"] = "9100"     # VSS Frontend port

    # Database credentials
    os.environ["GRAPH_DB_USERNAME"] = "neo4j"
    os.environ["GRAPH_DB_PASSWORD"] = "password"

    # GPU configuration
    os.environ["NVIDIA_VISIBLE_DEVICES"] = "0"  # Single H100 GPU

    # Model configurations
    os.environ["VLM_MODEL_TO_USE"] = "vila-1.5"  # or choose nvila
    os.environ["MODEL_PATH"] = "ngc:nim/nvidia/vila-1.5-40b:vila-yi-34b-siglip-stage3_1003_video_v8"
    # For nvila alternative: "git:https://huggingface.co/Efficient-Large-Model/NVILA-15B"

    # Cache configuration
    os.environ["LOCAL_NIM_CACHE"] = os.path.expanduser("~/.cache/nim")

    print("✓ Environment variables configured")

def change_to_working_directory():
    """Change to the docker launchables directory"""
    try:
        target_dir = "./docker/local_deployment_single_gpu"#local_deployment_single_gpu, launchables
        os.chdir(target_dir)
        print(f"✓ Changed directory to: {os.getcwd()}")
    except FileNotFoundError:
        print(f"❌ Error: Directory {target_dir} not found")
        sys.exit(1)

def setup_nim_cache():
    """Create NIM cache directory if it doesn't exist"""
    cache_dir = os.environ["LOCAL_NIM_CACHE"]
    os.makedirs(cache_dir, exist_ok=True)
    print(f"✓ NIM cache directory created: {cache_dir}")

def docker_login():
    """Login to NVIDIA Container Registry"""
    try:
        print("🔐 Logging into NVIDIA Container Registry...")
        # Use echo to pipe the API key to docker login
        cmd = f'echo "{os.environ["NGC_API_KEY"]}" | docker login nvcr.io -u \'$oauthtoken\' --password-stdin'
        result = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
        print("✓ Successfully logged into NVIDIA Container Registry ->",result)
    except subprocess.CalledProcessError as e:
        print(f"❌ Docker login failed: {e}")
        sys.exit(1)

def configure_docker_storage():
    """Configure Docker storage location"""
    storage_path = "./docker"
    daemon_file = "/etc/docker/daemon.json"

    try:
        # Read existing daemon configuration
        config = {}
        if os.path.exists(daemon_file):
            with open(daemon_file, 'r') as f:
                config = json.load(f)

        # Update data root path
        config["data-root"] = storage_path
        config_str = json.dumps(config, indent=4)

        print(f"✓ Docker storage configuration prepared (root: {storage_path})")
        print("⚠️  Note: Docker daemon restart required manually with elevated privileges")

        # Note: These commands require sudo privileges and are commented out
        # subprocess.run(f"echo '{config_str}' | sudo tee {daemon_file} > /dev/null", shell=True, check=True)
        # subprocess.run("sudo systemctl restart docker", shell=True, check=True)

    except PermissionError:
        print("⚠️  Cannot read docker daemon file. Elevated privileges required for storage configuration.")
    except Exception as e:
        print(f"⚠️  Docker storage configuration warning: {e}")

def verify_docker_storage():
    """Verify Docker storage location"""
    try:
        time.sleep(5)  # Wait for any potential docker restart
        result = subprocess.run("docker info | grep 'Docker Root Dir'",
                               shell=True, capture_output=True, text=True)
        if result.stdout:
            print(f"✓ Docker storage info: {result.stdout.strip()}")
    except Exception as e:
        print(f"⚠️  Could not verify docker storage: {e}")

def start_llm_container():
    """Start LLaMA 3.1 70B Instruct NIM container"""
    print("🚀 Starting LLaMA 3.1 70B Instruct container (port 8000)...")
    cmd = [
        "docker", "run", "-it", "--rm",
        "--gpus", "device=0",
        "--shm-size=16GB",
        "-e", "NGC_API_KEY",
        "-v", f"{os.environ['LOCAL_NIM_CACHE']}:/opt/nim/.cache",
        "-u", subprocess.check_output(["id", "-u"], text=True).strip(),
        "-p", "8000:8000",
        "-d",
        "nvcr.io/nim/meta/llama-3.1-70b-instruct:latest"
    ]

    try:
        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
        print(f"✓ LLM container started: {result.stdout.strip()}")
    except subprocess.CalledProcessError as e:
        print(f"❌ Failed to start LLM container: {e}")
        sys.exit(1)

def start_rerank_container():
    """Start LLaMA 3.2 Rerank NIM container"""
    print("🚀 Starting LLaMA 3.2 Rerank container (port 9235)...")
    cmd = [
        "docker", "run", "-it", "--rm",
        "--gpus", "device=0",
        "--shm-size=16GB",
        "-e", "NGC_API_KEY",
        "-v", f"{os.environ['LOCAL_NIM_CACHE']}:/opt/nim/.cache",
        "-u", subprocess.check_output(["id", "-u"], text=True).strip(),
        "-p", "8007:8000",
        "-d",
        "nvcr.io/nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:latest"
    ]

    try:
        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
        print(f"✓ Rerank container started: {result.stdout.strip()}")
    except subprocess.CalledProcessError as e:
        print(f"❌ Failed to start Rerank container: {e}")
        sys.exit(1)

def start_embed_container():
    """Start LLaMA 3.2 Embedding NIM container"""
    print("🚀 Starting LLaMA 3.2 Embedding container (port 9234)...")
    cmd = [
        "docker", "run", "-it", "--rm",
        "--gpus", "device=0",
        "--shm-size=16GB",
        "-e", "NGC_API_KEY",
        "-v", f"{os.environ['LOCAL_NIM_CACHE']}:/opt/nim/.cache",
        "-u", subprocess.check_output(["id", "-u"], text=True).strip(),
        "-p", "8006:8000",
        "-d",
        "nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:latest"
    ]

    try:
        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
        print(f"✓ Embedding container started: {result.stdout.strip()}")
    except subprocess.CalledProcessError as e:
        print(f"❌ Failed to start Embedding container: {e}")
        sys.exit(1)

def check_docker_compose():
    """Check and install Docker Compose if needed"""
    try:
        # Check if docker compose is available
        result = subprocess.run(["docker", "compose", "version"],
                               capture_output=True, text=True)
        print(f"✓ Docker Compose available: {result.stdout.strip()}")
    except subprocess.CalledProcessError:
        print("📦 Installing Docker Compose...")
        install_docker_compose()

def install_docker_compose():
    """Install Docker Compose CLI plugin"""
    try:
        # Create plugins directory
        plugins_dir = os.path.expanduser("~/.docker/cli-plugins")
        os.makedirs(plugins_dir, exist_ok=True)

        # Download Docker Compose
        compose_url = "https://github.com/docker/compose/releases/latest/download/docker-compose-linux-x86_64"
        compose_path = os.path.join(plugins_dir, "docker-compose")

        subprocess.run(["curl", "-SL", compose_url, "-o", compose_path], check=True)
        subprocess.run(["chmod", "+x", compose_path], check=True)

        print("✓ Docker Compose installed successfully")
    except subprocess.CalledProcessError as e:
        print(f"❌ Failed to install Docker Compose: {e}")
        sys.exit(1)

def stop_existing_services():
    """Stop any existing Docker Compose services"""
    try:
        print("🛑 Stopping existing services...")
        subprocess.run(["docker", "compose", "down"],
                      capture_output=True, check=True)
        print("✓ Existing services stopped")
    except subprocess.CalledProcessError:
        print("ℹ️  No existing services to stop")

def start_vss_services():
    """Start VSS services using Docker Compose with filtered logging"""
    print("🚀 Starting VSS services...")

    # Keywords to monitor in logs for service readiness
    keywords = [
        "Milvus server started", "Downloading model", "Downloaded model",
        "VILA Embeddings", "VILA TRT model load execution time",
        "Starting quantization", "Quantization done", "Engine generation completed",
        "TRT engines generated", "Uvicorn", "VIA Server loaded",
        "Backend", "Frontend", "****"
    ]

    try:
        # Start services in detached mode
        subprocess.run(['docker', 'compose', 'up', '--quiet-pull', '-d'], check=True)
        print("✓ VSS services started in background")

        # Monitor logs for service readiness
        monitor_service_logs(keywords)

    except subprocess.CalledProcessError as e:
        print(f"❌ Failed to start VSS services: {e}")
        sys.exit(1)

def filter_logs(logs, keywords):
    """Filter logs to show only lines containing specified keywords"""
    return [line for line in logs.splitlines() if any(keyword in line for keyword in keywords)]

def monitor_service_logs(keywords):
    """Monitor Docker Compose logs for service readiness indicators"""
    printed_lines = set()

    print("📋 Monitoring service startup logs...")

    try:
        while True:
            # Get current logs
            result = subprocess.run(['docker', 'compose', 'logs', '--no-color'],
                                  capture_output=True, text=True, check=True)

            # Filter and display new log lines
            filtered_logs = filter_logs(result.stdout, keywords)
            new_logs = [line for line in filtered_logs if line not in printed_lines]

            for line in new_logs:
                print(f"  {line}")
                printed_lines.add(line)

                # Check if frontend is ready
                if "Frontend" in line:
                    print("\n🎉 VSS Server started successfully!")
                    print("🌐 Access VSS Frontend UI from Brev portal tunnels section")
                    print(f"🔗 Frontend should be available on port {os.environ['FRONTEND_PORT']}")
                    print(f"🔗 Backend should be available on port {os.environ['BACKEND_PORT']}")
                    return

            time.sleep(1)

    except KeyboardInterrupt:
        print("\n⏸️  Log monitoring stopped by user")
    except subprocess.CalledProcessError as e:
        print(f"❌ Error monitoring logs: {e}")

def main():
    """Main function to orchestrate the VSS setup process"""
    print("🔧 Starting VSS Docker Setup...")
    print("=" * 50)

    # Setup phase
    setup_environment_variables()
    change_to_working_directory()
    setup_nim_cache()

    # Docker configuration phase
    docker_login()
    configure_docker_storage()
    verify_docker_storage()

    # Container startup phase
    start_llm_container()
    start_rerank_container()
    start_embed_container()

    # VSS services phase
    check_docker_compose()
    stop_existing_services()
    start_vss_services()

    print("\n✅ VSS setup completed successfully!")
    print("\nPort Configuration Summary:")
    print(f"  - LLM Service: 8000")
    print(f"  - Rerank Service: 9235")
    print(f"  - Embedding Service: 9234")
    print(f"  - VSS Backend: {os.environ['BACKEND_PORT']}")
    print(f"  - VSS Frontend: {os.environ['FRONTEND_PORT']}")

if __name__ == "__main__":
    main()