Guest User

nim_vlm_blueprint

a guest
Jun 18th, 2025
39
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 12.29 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. """
  3. VSS (Video Search System) Docker Setup Script
  4. This script sets up the complete VSS environment using Docker containers
  5. including NIM models for LLM, reranking, and embeddings.
  6. """
  7.  
  8. import os
  9. import json
  10. import subprocess
  11. import time
  12. import sys
  13. from pathlib import Path
  14.  
  15. def setup_environment_variables():
  16.     """Set up all required environment variables for VSS"""
  17.     # NGC API Key for NVIDIA containers
  18.     os.environ["NGC_API_KEY"] = "my key"
  19.    
  20.     # LLM endpoints for VIA engine to consume
  21.     os.environ["LLM_ENDPOINT"] = "http://localhost:8000"
  22.     os.environ["RERANK_ENDPOINT"] = "http://localhost:8007"
  23.     os.environ["EMBEDDING_ENDPOINT"] = "http://localhost:8006"
  24.  
  25.     # Port configurations - No conflicts detected
  26.     os.environ["BACKEND_PORT"] = "60000"     # VSS Backend internal port
  27.     os.environ["FRONTEND_PORT"] = "9100"     # VSS Frontend port
  28.    
  29.     # Database credentials
  30.     os.environ["GRAPH_DB_USERNAME"] = "neo4j"
  31.     os.environ["GRAPH_DB_PASSWORD"] = "password"
  32.    
  33.     # GPU configuration
  34.     os.environ["NVIDIA_VISIBLE_DEVICES"] = "0"  # Single H100 GPU
  35.    
  36.     # Model configurations
  37.     os.environ["VLM_MODEL_TO_USE"] = "vila-1.5"  # or choose nvila
  38.     os.environ["MODEL_PATH"] = "ngc:nim/nvidia/vila-1.5-40b:vila-yi-34b-siglip-stage3_1003_video_v8"
  39.     # For nvila alternative: "git:https://huggingface.co/Efficient-Large-Model/NVILA-15B"
  40.    
  41.     # Cache configuration
  42.     os.environ["LOCAL_NIM_CACHE"] = os.path.expanduser("~/.cache/nim")
  43.    
  44.     print("βœ“ Environment variables configured")
  45.  
  46. def change_to_working_directory():
  47.     """Change to the docker launchables directory"""
  48.     try:
  49.         target_dir = "./docker/local_deployment_single_gpu"#local_deployment_single_gpu, launchables
  50.         os.chdir(target_dir)
  51.         print(f"βœ“ Changed directory to: {os.getcwd()}")
  52.     except FileNotFoundError:
  53.         print(f"❌ Error: Directory {target_dir} not found")
  54.         sys.exit(1)
  55.  
  56. def setup_nim_cache():
  57.     """Create NIM cache directory if it doesn't exist"""
  58.     cache_dir = os.environ["LOCAL_NIM_CACHE"]
  59.     os.makedirs(cache_dir, exist_ok=True)
  60.     print(f"βœ“ NIM cache directory created: {cache_dir}")
  61.  
  62. def docker_login():
  63.     """Login to NVIDIA Container Registry"""
  64.     try:
  65.         print("πŸ” Logging into NVIDIA Container Registry...")
  66.         # Use echo to pipe the API key to docker login
  67.         cmd = f'echo "{os.environ["NGC_API_KEY"]}" | docker login nvcr.io -u \'$oauthtoken\' --password-stdin'
  68.         result = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
  69.         print("βœ“ Successfully logged into NVIDIA Container Registry ->",result)
  70.     except subprocess.CalledProcessError as e:
  71.         print(f"❌ Docker login failed: {e}")
  72.         sys.exit(1)
  73.  
  74. def configure_docker_storage():
  75.     """Configure Docker storage location"""
  76.     storage_path = "./docker"
  77.     daemon_file = "/etc/docker/daemon.json"
  78.    
  79.     try:
  80.         # Read existing daemon configuration
  81.         config = {}
  82.         if os.path.exists(daemon_file):
  83.             with open(daemon_file, 'r') as f:
  84.                 config = json.load(f)
  85.        
  86.         # Update data root path
  87.         config["data-root"] = storage_path
  88.         config_str = json.dumps(config, indent=4)
  89.        
  90.         print(f"βœ“ Docker storage configuration prepared (root: {storage_path})")
  91.         print("⚠️  Note: Docker daemon restart required manually with elevated privileges")
  92.        
  93.         # Note: These commands require sudo privileges and are commented out
  94.         # subprocess.run(f"echo '{config_str}' | sudo tee {daemon_file} > /dev/null", shell=True, check=True)
  95.         # subprocess.run("sudo systemctl restart docker", shell=True, check=True)
  96.        
  97.     except PermissionError:
  98.         print("⚠️  Cannot read docker daemon file. Elevated privileges required for storage configuration.")
  99.     except Exception as e:
  100.         print(f"⚠️  Docker storage configuration warning: {e}")
  101.  
  102. def verify_docker_storage():
  103.     """Verify Docker storage location"""
  104.     try:
  105.         time.sleep(5)  # Wait for any potential docker restart
  106.         result = subprocess.run("docker info | grep 'Docker Root Dir'",
  107.                                shell=True, capture_output=True, text=True)
  108.         if result.stdout:
  109.             print(f"βœ“ Docker storage info: {result.stdout.strip()}")
  110.     except Exception as e:
  111.         print(f"⚠️  Could not verify docker storage: {e}")
  112.  
  113. def start_llm_container():
  114.     """Start LLaMA 3.1 70B Instruct NIM container"""
  115.     print("πŸš€ Starting LLaMA 3.1 70B Instruct container (port 8000)...")
  116.     cmd = [
  117.         "docker", "run", "-it", "--rm",
  118.         "--gpus", "device=0",
  119.         "--shm-size=16GB",
  120.         "-e", "NGC_API_KEY",
  121.         "-v", f"{os.environ['LOCAL_NIM_CACHE']}:/opt/nim/.cache",
  122.         "-u", subprocess.check_output(["id", "-u"], text=True).strip(),
  123.         "-p", "8000:8000",
  124.         "-d",
  125.         "nvcr.io/nim/meta/llama-3.1-70b-instruct:latest"
  126.     ]
  127.    
  128.     try:
  129.         result = subprocess.run(cmd, check=True, capture_output=True, text=True)
  130.         print(f"βœ“ LLM container started: {result.stdout.strip()}")
  131.     except subprocess.CalledProcessError as e:
  132.         print(f"❌ Failed to start LLM container: {e}")
  133.         sys.exit(1)
  134.  
  135. def start_rerank_container():
  136.     """Start LLaMA 3.2 Rerank NIM container"""
  137.     print("πŸš€ Starting LLaMA 3.2 Rerank container (port 9235)...")
  138.     cmd = [
  139.         "docker", "run", "-it", "--rm",
  140.         "--gpus", "device=0",
  141.         "--shm-size=16GB",
  142.         "-e", "NGC_API_KEY",
  143.         "-v", f"{os.environ['LOCAL_NIM_CACHE']}:/opt/nim/.cache",
  144.         "-u", subprocess.check_output(["id", "-u"], text=True).strip(),
  145.         "-p", "8007:8000",
  146.         "-d",
  147.         "nvcr.io/nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:latest"
  148.     ]
  149.    
  150.     try:
  151.         result = subprocess.run(cmd, check=True, capture_output=True, text=True)
  152.         print(f"βœ“ Rerank container started: {result.stdout.strip()}")
  153.     except subprocess.CalledProcessError as e:
  154.         print(f"❌ Failed to start Rerank container: {e}")
  155.         sys.exit(1)
  156.  
  157. def start_embed_container():
  158.     """Start LLaMA 3.2 Embedding NIM container"""
  159.     print("πŸš€ Starting LLaMA 3.2 Embedding container (port 9234)...")
  160.     cmd = [
  161.         "docker", "run", "-it", "--rm",
  162.         "--gpus", "device=0",
  163.         "--shm-size=16GB",
  164.         "-e", "NGC_API_KEY",
  165.         "-v", f"{os.environ['LOCAL_NIM_CACHE']}:/opt/nim/.cache",
  166.         "-u", subprocess.check_output(["id", "-u"], text=True).strip(),
  167.         "-p", "8006:8000",
  168.         "-d",
  169.         "nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:latest"
  170.     ]
  171.    
  172.     try:
  173.         result = subprocess.run(cmd, check=True, capture_output=True, text=True)
  174.         print(f"βœ“ Embedding container started: {result.stdout.strip()}")
  175.     except subprocess.CalledProcessError as e:
  176.         print(f"❌ Failed to start Embedding container: {e}")
  177.         sys.exit(1)
  178.  
  179. def check_docker_compose():
  180.     """Check and install Docker Compose if needed"""
  181.     try:
  182.         # Check if docker compose is available
  183.         result = subprocess.run(["docker", "compose", "version"],
  184.                                capture_output=True, text=True)
  185.         print(f"βœ“ Docker Compose available: {result.stdout.strip()}")
  186.     except subprocess.CalledProcessError:
  187.         print("πŸ“¦ Installing Docker Compose...")
  188.         install_docker_compose()
  189.  
  190. def install_docker_compose():
  191.     """Install Docker Compose CLI plugin"""
  192.     try:
  193.         # Create plugins directory
  194.         plugins_dir = os.path.expanduser("~/.docker/cli-plugins")
  195.         os.makedirs(plugins_dir, exist_ok=True)
  196.        
  197.         # Download Docker Compose
  198.         compose_url = "https://github.com/docker/compose/releases/latest/download/docker-compose-linux-x86_64"
  199.         compose_path = os.path.join(plugins_dir, "docker-compose")
  200.        
  201.         subprocess.run(["curl", "-SL", compose_url, "-o", compose_path], check=True)
  202.         subprocess.run(["chmod", "+x", compose_path], check=True)
  203.        
  204.         print("βœ“ Docker Compose installed successfully")
  205.     except subprocess.CalledProcessError as e:
  206.         print(f"❌ Failed to install Docker Compose: {e}")
  207.         sys.exit(1)
  208.  
  209. def stop_existing_services():
  210.     """Stop any existing Docker Compose services"""
  211.     try:
  212.         print("πŸ›‘ Stopping existing services...")
  213.         subprocess.run(["docker", "compose", "down"],
  214.                       capture_output=True, check=True)
  215.         print("βœ“ Existing services stopped")
  216.     except subprocess.CalledProcessError:
  217.         print("ℹ️  No existing services to stop")
  218.  
  219. def start_vss_services():
  220.     """Start VSS services using Docker Compose with filtered logging"""
  221.     print("πŸš€ Starting VSS services...")
  222.    
  223.     # Keywords to monitor in logs for service readiness
  224.     keywords = [
  225.         "Milvus server started", "Downloading model", "Downloaded model",
  226.         "VILA Embeddings", "VILA TRT model load execution time",
  227.         "Starting quantization", "Quantization done", "Engine generation completed",
  228.         "TRT engines generated", "Uvicorn", "VIA Server loaded",
  229.         "Backend", "Frontend", "****"
  230.     ]
  231.    
  232.     try:
  233.         # Start services in detached mode
  234.         subprocess.run(['docker', 'compose', 'up', '--quiet-pull', '-d'], check=True)
  235.         print("βœ“ VSS services started in background")
  236.        
  237.         # Monitor logs for service readiness
  238.         monitor_service_logs(keywords)
  239.        
  240.     except subprocess.CalledProcessError as e:
  241.         print(f"❌ Failed to start VSS services: {e}")
  242.         sys.exit(1)
  243.  
  244. def filter_logs(logs, keywords):
  245.     """Filter logs to show only lines containing specified keywords"""
  246.     return [line for line in logs.splitlines() if any(keyword in line for keyword in keywords)]
  247.  
  248. def monitor_service_logs(keywords):
  249.     """Monitor Docker Compose logs for service readiness indicators"""
  250.     printed_lines = set()
  251.    
  252.     print("πŸ“‹ Monitoring service startup logs...")
  253.    
  254.     try:
  255.         while True:
  256.             # Get current logs
  257.             result = subprocess.run(['docker', 'compose', 'logs', '--no-color'],
  258.                                   capture_output=True, text=True, check=True)
  259.            
  260.             # Filter and display new log lines
  261.             filtered_logs = filter_logs(result.stdout, keywords)
  262.             new_logs = [line for line in filtered_logs if line not in printed_lines]
  263.            
  264.             for line in new_logs:
  265.                 print(f"  {line}")
  266.                 printed_lines.add(line)
  267.                
  268.                 # Check if frontend is ready
  269.                 if "Frontend" in line:
  270.                     print("\nπŸŽ‰ VSS Server started successfully!")
  271.                     print("🌐 Access VSS Frontend UI from Brev portal tunnels section")
  272.                     print(f"πŸ”— Frontend should be available on port {os.environ['FRONTEND_PORT']}")
  273.                     print(f"πŸ”— Backend should be available on port {os.environ['BACKEND_PORT']}")
  274.                     return
  275.            
  276.             time.sleep(1)
  277.            
  278.     except KeyboardInterrupt:
  279.         print("\n⏸️  Log monitoring stopped by user")
  280.     except subprocess.CalledProcessError as e:
  281.         print(f"❌ Error monitoring logs: {e}")
  282.  
  283. def main():
  284.     """Main function to orchestrate the VSS setup process"""
  285.     print("πŸ”§ Starting VSS Docker Setup...")
  286.     print("=" * 50)
  287.    
  288.     # Setup phase
  289.     setup_environment_variables()
  290.     change_to_working_directory()
  291.     setup_nim_cache()
  292.    
  293.     # Docker configuration phase
  294.     docker_login()
  295.     configure_docker_storage()
  296.     verify_docker_storage()
  297.    
  298.     # Container startup phase
  299.     start_llm_container()
  300.     start_rerank_container()
  301.     start_embed_container()
  302.    
  303.     # VSS services phase
  304.     check_docker_compose()
  305.     stop_existing_services()
  306.     start_vss_services()
  307.    
  308.     print("\nβœ… VSS setup completed successfully!")
  309.     print("\nPort Configuration Summary:")
  310.     print(f"  - LLM Service: 8000")
  311.     print(f"  - Rerank Service: 9235")
  312.     print(f"  - Embedding Service: 9234")
  313.     print(f"  - VSS Backend: {os.environ['BACKEND_PORT']}")
  314.     print(f"  - VSS Frontend: {os.environ['FRONTEND_PORT']}")
  315.  
  316. if __name__ == "__main__":
  317.     main()
Advertisement
Add Comment
Please, Sign In to add comment