Guest User

ComfyUI ROCm 7.1.1 9070xt start script

a guest
Dec 11th, 2025
48
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.99 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. # Activate Python virtual environment
  4.  
  5. COMFYUI_DIR="/mnt/storage/ComfyUI"
  6.  
  7. cd /mnt/storage/Comfy_Venv
  8. source .venv/bin/activate
  9.  
  10. cd "$COMFYUI_DIR"
  11.  
  12. # -----------------------------
  13.  
  14. # GPU visibility / architecture
  15.  
  16. # -----------------------------
  17.  
  18. export HSA_FORCE_FINE_GRAIN_PCIE=1
  19.  
  20. export HIP_VISIBLE_DEVICES=0
  21.  
  22. export ROCR_VISIBLE_DEVICES=0
  23.  
  24. export HIP_TARGET="gfx1201"
  25.  
  26. export PYTORCH_ROCM_ARCH="gfx1201"
  27.  
  28. export TORCH_HIP_ARCH_LIST="gfx1201"
  29.  
  30. # -----------------------------
  31.  
  32. # Mesa / RADV / debugging
  33.  
  34. # -----------------------------
  35.  
  36. export MESA_LOADER_DRIVER_OVERRIDE=amdgpu
  37.  
  38. export RADV_PERFTEST=aco,nggc,sam
  39.  
  40. export AMD_DEBUG=0
  41.  
  42. export ROCBLAS_VERBOSE_HIPBLASLT_ERROR=1
  43.  
  44. export AMD_SERIALIZE_KERNEL=0
  45.  
  46. export PYTORCH_HIP_FREE_MEMORY_THRESHOLD_MB=128
  47.  
  48. # -----------------------------
  49.  
  50. # Memory / performance tuning
  51.  
  52. # -----------------------------
  53.  
  54. export PYTORCH_ALLOC_CONF="garbage_collection_threshold:0.6,max_split_size_mb:6144"
  55.  
  56. export OMP_NUM_THREADS=12
  57.  
  58. export MKL_NUM_THREADS=12
  59.  
  60. export NUMEXPR_NUM_THREADS=12
  61.  
  62. # Precision and performance
  63.  
  64. export TORCH_BLAS_PREFER_HIPBLASLT=0
  65.  
  66. export TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDS="CK,TRITON,ROCBLAS"
  67.  
  68. export TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACE="BEST"
  69.  
  70. # -----------------------------
  71.  
  72. # ROCm backend fine-tuning
  73.  
  74. # -----------------------------
  75.  
  76. export HSA_ENABLE_ASYNC_COPY=1
  77.  
  78. export HSA_ENABLE_SDMA
  79.  
  80. export HSA_ENABLE_SDMA_KERNEL_COPY=1
  81.  
  82. export HSA_ENABLE_SDMA_COPY=1
  83.  
  84. # -----------------------------
  85.  
  86. # MIOpen (AMD DNN library)
  87.  
  88. # -----------------------------
  89.  
  90. export MIOPEN_FIND_MODE=2
  91.  
  92. export MIOPEN_ENABLE_CACHE=1
  93.  
  94. export MIOPEN_CONV_WINOGRAD=1
  95.  
  96. export MIOPEN_DEBUG_CONV_FFT=0
  97.  
  98. export MIOPEN_ENABLE_LOGGING_CMD=0
  99.  
  100. export MIOPEN_DEBUG_CONV_IMPLICIT_GEMM=1
  101.  
  102. export MIOPEN_USER_DB_PATH="$HOME/.config/miopen"
  103.  
  104. export MIOPEN_CUSTOM_CACHE_DIR="$HOME/.config/miopen"
  105.  
  106. # -----------------------------
  107.  
  108. # Torch / Inductor / Triton settings
  109.  
  110. # -----------------------------
  111.  
  112. export TORCH_COMPILE=1
  113.  
  114. export TORCHINDUCTOR_FORCE_FALLBACK=1
  115.  
  116. export TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDS=""
  117.  
  118. export TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACE=""
  119.  
  120. # FlashAttention backends
  121.  
  122. export TRITON_USE_ROCM=1
  123.  
  124. export TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
  125.  
  126. export FLASH_ATTENTION_BACKEND="flash_attn_native"
  127.  
  128. export FLASH_ATTENTION_TRITON_AMD_ENABLE="true"
  129.  
  130. export TRANSFORMERS_USE_FLASH_ATTENTION=1
  131.  
  132. export FLASH_ATTENTION_TRITON_AMD_SEQ_LEN=4096
  133.  
  134. export USE_CK=OFF
  135.  
  136. # ROCBLAS tuning for gfx1201 (RDNA3)
  137.  
  138. export ROCBLAS_TENSILE_LIBPATH="$ROCM_PATH/lib/rocblas"
  139.  
  140. export ROCBLAS_INTERNAL_FP16_ALT_IMPL=1
  141.  
  142. export ROCBLAS_LAYER=0
  143.  
  144. export ROCBLAS_INTERNAL_USE_SUBTENSILE=1
  145.  
  146. # -----------------------------
  147.  
  148. # Run ComfyUI
  149.  
  150. # -----------------------------
  151.  
  152. python3 main.py \
  153.  
  154. --listen 0.0.0.0 \
  155.  
  156. --use-pytorch-cross-attention \
  157.  
  158. --normalvram \
  159.  
  160. --reserve-vram 1 \
  161.  
  162. --fast fp16_accumulation fp8_matrix_mult \
  163.  
  164. --disable-smart-memory
Advertisement
Add Comment
Please, Sign In to add comment