transformers min p example

# Min P sampling example
import torch
from transformers import pipeline

chat = [
    {
        "role": "system",
        "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."
    },
    {
        "role": "user",
        "content": "Hey, can you tell me any fun things to do in New York?"
    }
]
pipe = pipeline(
    "text-generation",
    "meta-llama/Meta-Llama-3-8B-Instruct",
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
response = pipe(chat, max_new_tokens=512, do_sample=True, min_p=0.08, temperature=1.5)
print(response[0]['generated_text'][-1]['content'])