Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from llama_cpp import Llama
- import llama_cpp.llama_chat_format as llama_chat_format
- # 1. Assert sha256 of the model file
- model_path = '../qwen2.5-3b-instruct-q8_0.gguf'
- llm = Llama(
- model_path=model_path,
- n_ctx=1024,
- seed=200,
- )
- eos_token_id = llm.token_eos()
- bos_token_id = llm.token_bos()
- eos_token = (
- llm._model.token_get_text(eos_token_id) if eos_token_id != -1 else ''
- )
- bos_token = (
- llm._model.token_get_text(bos_token_id) if bos_token_id != -1 else ''
- )
- formatter = llama_chat_format.Jinja2ChatFormatter(
- template=llm.metadata['tokenizer.chat_template'],
- eos_token=eos_token,
- bos_token=bos_token,
- stop_token_ids=[eos_token_id],
- )
- result = formatter(
- messages=[
- {'role': 'system', 'content': 'You are a professional CTF player.'},
- {
- 'role': 'user',
- 'content': 'Write a short article for Hackergame 2024 (中国科学技术大学 (University of Science and Technology of China) 第十一届信息安全大赛) in English. The more funny and unreal the better. About 500 words.',
- },
- ]
- )
- words = {}
- for i in range(llm.n_vocab()):
- t = llm._model.token_get_text(i)
- words.setdefault(len(t), {})[t] = i
- prompt = result.prompt
- prompt = llm.tokenize(
- prompt.encode('utf-8'),
- add_bos=not result.added_special,
- special=True,
- )
- appended = []
- with open('after.txt', 'rb') as f:
- s = f.read()
- alters = []
- alter_cur = []
- def logits_processor(
- input_ids,
- scores,
- ):
- global alters
- wow = scores.argsort()
- for i in wow[-1:-20:-1]:
- ss = llm.detokenize(appended + [i])
- if all(
- (a == ord('x') and b in map(ord, 'hackergame of ustc')) or a == b
- for a, b in zip(s[: len(ss)], ss)
- ):
- alters.append(i)
- if alter_cur[-1] >= len(alters):
- # run out of options, clear alters to notify the caller
- alters = []
- else:
- scores[alters[alter_cur[-1]]] = 100
- return scores
- i = 0
- while True:
- gen = llm.generate(
- prompt,
- logits_processor=[logits_processor],
- )
- alter_cur.append(0)
- while True:
- alters.clear()
- logic = next(gen)
- if not alters:
- print('# Oh no!! Going back!!!')
- alter_cur.pop()
- appended.pop()
- alter_cur[-1] += 1
- gen = llm.generate(
- prompt + appended,
- logits_processor=[logits_processor],
- )
- continue
- appended.append(logic)
- output = llm.detokenize(appended)
- print('=====\n' + output.decode())
- if len(output) >= len(s):
- quit()
- alter_cur.append(0)
Advertisement
Add Comment
Please, Sign In to add comment