Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import subprocess
- def quantize_model(model_path, gguf_types):
- convert_script = "../llama.cpp/convert_hf_to_gguf.py"
- for gguf_type in gguf_types:
- output_file = f"{model_path}-{gguf_type.lower()}.gguf"
- outtype = gguf_type.lower()
- command = [
- "python", convert_script,
- "--outfile", output_file,
- "--outtype", outtype,
- model_path,
- ]
- print(f"Запуск квантизации: {command}")
- try:
- subprocess.run(command, check=True)
- print(f"Успешно создан файл: {output_file}")
- except subprocess.CalledProcessError as e:
- print(f"Ошибка при выполнении квантизации для {gguf_type}: {e}")
- if __name__ == "__main__":
- model_path = "distilbert/distilbert-base-uncased"
- gguf_types = ["Q8_0", "Q6_K_L", "Q5_K_L"]
- quantize_model(model_path, gguf_types)
Advertisement
Add Comment
Please, Sign In to add comment