Untitled

import os
import subprocess

def quantize_model(model_path, gguf_types):
    convert_script = "../llama.cpp/convert_hf_to_gguf.py"

    for gguf_type in gguf_types:
        output_file = f"{model_path}-{gguf_type.lower()}.gguf"
        outtype = gguf_type.lower()

        command = [
            "python", convert_script,
            "--outfile", output_file,
            "--outtype", outtype,
            model_path,
        ]

        print(f"Запуск квантизации: {command}")

        try:
            subprocess.run(command, check=True)
            print(f"Успешно создан файл: {output_file}")
        except subprocess.CalledProcessError as e:
            print(f"Ошибка при выполнении квантизации для {gguf_type}: {e}")

if __name__ == "__main__":
    model_path = "distilbert/distilbert-base-uncased"

    gguf_types = ["Q8_0", "Q6_K_L", "Q5_K_L"]

    quantize_model(model_path, gguf_types)