Advertisement
machinez

Llama.cpp over Vulkan on AMD BC-250

Jan 2nd, 2025
590
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.40 KB | None | 0 0
  1. **PREREQUISTE**
  2. **Boot into BIOS**
  3. press F2 on keyboard
  4.  
  5. **BIOS MENU**
  6. Advanced
  7. Ipv4 PXE Support -> Disabled
  8. Boot Option #1 (Boot into USB Fedora 40 Server)
  9. Hit "-"
  10. Save and Exit
  11.  
  12. **After booting into USB**
  13. Troubleshooting
  14. Install Fedora 40 in basic graphics mode
  15.  
  16. **After Install and first boot into Fedora**
  17. sudo hostnamectl set-hostname YOURHOSTNAME
  18. sudo lvextend -r -l +100%FREE /dev/mapper/fedora-root
  19.  
  20. sudo dnf makecache --refresh
  21. sudo dnf -y group install "Development Tools"
  22.  
  23. sudo dnf -y install git cmake glslang rpmdevtools vulkan-headers vulkan-devel vulkan-tools glslc koji python3-pip ccache
  24. cd $(mktemp -d) && koji download-build --arch=x86_64 --arch=noarch kernel-6.2.0-63.fc38 && rm *debug*.rpm *uki*.rpm
  25. sudo dnf -y install *
  26. cd ~
  27.  
  28. wget https://kojipkgs.fedoraproject.org/packages/mesa/24.1.5/2.fc40/src/mesa-24.1.5-2.fc40.src.rpm
  29. rpm2cpio mesa-24.1.5-2.fc40.src.rpm | cpio -idmv
  30. mkdir -p rpmbuild/{SPECS,SOURCES}
  31. mv ~/mesa.spec ~/rpmbuild/SPECS/
  32. mv ~/gnome-shell-glthread-disable.patch ~/Mesa-MLAA-License-Clarification-Email.txt ~/rpmbuild/SOURCES/
  33.  
  34. tar xf mesa-*.tar.xz -C ~/rpmbuild/SOURCES
  35.  
  36. sed -i 's/#define AMDGPU_NAVI10_RANGE 0x01, 0x0A \/\/# 1 <= x < 10/#define AMDGPU_NAVI10_RANGE 0x01, 0x8A \/\/# 1 <= x < 10/g' ~/rpmbuild/SOURCES/mesa-24.1.5/src/amd/addrlib/src/amdgpu_asic_addr.h
  37.  
  38. cd ~/rpmbuild/SOURCES
  39. tar -cJf mesa-24.1.5.tar.xz ./mesa-24.1.5
  40. cd ~
  41.  
  42. sudo dnf -y install rust-paste-devel rust-proc-macro2-devel rust-quote-devel rust-syn+clone-impls-devel spirv-tools-devel expat-devel libclc-devel clang-devel flatbuffers-devel flatbuffers-compiler bindgen cbindgen meson valgrind-devel libva-devel libXfixes-devel libXdamage-devel wayland-protocols-devel clang-devel llvm-devel lm_sensors-devel xtensor-devel python3-devel python3-mako rust-packaging libunwind-devel libXrandr-devel libXxf86vm-devel libselinux-devel libomxil-bellagio-devel libxshmfence-devel libvdpau-devel mesa-libEGL-devel libglvnd-devel spirv-llvm-translator-devel libdrm-devel
  43.  
  44. rpmbuild -ba ./rpmbuild/SPECS/mesa.spec
  45. cd ~/rpmbuild/RPMS/x86_64/
  46. sudo rpm mesa* -ivh --force --nodeps
  47.  
  48.  
  49. sudo sed -i 's/nomodeset/amdgpu.sg_display=0/g' /etc/default/grub
  50. sudo grub2-mkconfig -o /boot/grub2/grub.cfg
  51.  
  52. **reboot and should automatically choose kernel-6.2.0-63.fc38**
  53.  
  54. ** This will drop watts per node by 20-25w idle**
  55. git clone https://gitlab.com/TuxThePenguin0/oberon-governor.git
  56. cd oberon-governor/
  57. mkdir build
  58. cd build
  59. cmake ..
  60. make
  61. sudo make install
  62. sudo systemctl enable oberon-governor
  63. sudo systemctl start oberon-governor
  64.  
  65. cd ~
  66. git clone https://github.com/ggerganov/llama.cpp
  67. cd llama.cpp/
  68.  
  69. vim ./ggml/src/ggml-vulkan/ggml-vulkan.cpp
  70.  
  71. **Make the one line change as shown with "+"**
  72. diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
  73. ******************************************************************************************
  74. index c7ac0e8f..7f69e6eb 100644
  75. --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
  76. +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
  77. @@ -1912,6 +1912,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
  78. device->max_memory_allocation_size = props3.maxMemoryAllocationSize;
  79. }
  80.  
  81. + device->max_memory_allocation_size = 2147483646;
  82. device->vendor_id = device->properties.vendorID;
  83. device->subgroup_size = subgroup_props.subgroupSize;
  84. device->uma = device->properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
  85. ******************************************************************************************
  86.  
  87. cmake -B build -DGGML_VULKAN=1
  88. cmake --build build --config Release
  89.  
  90. pip install -U "huggingface_hub[cli]"
  91. huggingface-cli download bartowski/Meta-Llama-3.1-8B-Instruct-GGUF Meta-Llama-3.1-8B-Instruct-Q8_0.gguf
  92. mkdir ~/models
  93.  
  94. ln -s ~/.cache/huggingface/hub/models--bartowski--Meta-Llama-3.1-8B-Instruct-GGUF/snapshots/bf5b95e96dac0462e2a09145ec66cae9a3f12067/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf ~/models/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf
  95.  
  96. *TEST llama.cpp you'll get 29-33 tok/s**
  97. ./build/bin/llama-cli -m "~/models/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf" -p "You are an expert of food and food preparation. What is the difference between jam, jelly, preserves and marmalade?" -n -2 -e -ngl 33 -t 4 -c 512
  98.  
  99. **Run with OpenAI compatible API**
  100. ./build/bin/llama-server -m "~/models/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf" -n -2 -e -ngl 33 -t 4 -c 4096 --host 0.0.0.0
  101.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement