Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- mkdir ~/autodl-tmp/modelscope
- cd /root/.cache
- ln -s ~/autodl-tmp/modelscope
- cd /root
- pip install vllm
- pip install modelscope
- modelscope download --model deepseek-ai/DeepSeek-R1-Distill-Qwen-7
- cd ~/autodl-tmp/modelscope/hub/models/
- vllm serve ~/autodl-tmp/modelscope/hub/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B --tensor-parallel-size 1 --max-model-len 32768 --enforce-eager --dtype half
- pip install openai
- from openai import OpenAI
- from IPython.display import display, Markdown
- # Initialize the client with the vLLM server URL
- client = OpenAI(
- base_url="http://localhost:8000/v1", # vLLM's OpenAI-compatible endpoint
- api_key="EMPTY", # vLLM doesn't require an API key
- )
- # Make a completion request
- response = client.chat.completions.create(
- model="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", # Must match the model you loaded in vLLM
- messages=[
- {"role": "system", "content": "You are a helpful assistant."},
- {"role": "user", "content": "帮我写一个二分查找法"}
- ],
- temperature=0.7,
- max_tokens=10000,
- )
- ans = response.choices[0].message.content
- display(Markdown(ans))
Advertisement