Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- import os
- from tqdm import tqdm
- sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
- from exllamav2 import (
- ExLlamaV2,
- ExLlamaV2Config,
- ExLlamaV2Cache,
- ExLlamaV2Tokenizer,
- )
- from exllamav2.generator import (
- ExLlamaV2BaseGenerator,
- ExLlamaV2Sampler,
- )
- import time
- import json
- # Initialize model and cache
- model_directory = "yi-200kquanted/4.8bpw/"
- config = ExLlamaV2Config()
- config.model_dir = model_directory
- config.prepare()
- model = ExLlamaV2(config)
- print("Loading model: " + model_directory)
- # allocate 18 GB to CUDA:0 and 24 GB to CUDA:1.
- # (Call `model.load()` if using a single GPU.)
- tokenizer = ExLlamaV2Tokenizer(config)
- model.load([18.5, 22])
- cache = ExLlamaV2Cache(model, batch_size=1)
- # Initialize generator
- # Generate some text
- settings = ExLlamaV2Sampler.Settings()
- settings.temperature = 0.9
- settings.top_k = 0
- settings.top_p = 0.7
- settings.token_repetition_penalty = 1.1
- settings.disallow_tokens(tokenizer, [tokenizer.eos_token_id])
- max_new_tokens = 20000
- generator = ExLlamaV2BaseGenerator(model, cache, tokenizer)
- generator.warmup()
- import json
- def parse_jsonl(file_path):
- data_list = []
- with open(file_path, 'r') as file:
- for line in file:
- try:
- # Parse each line as a JSON object
- json_object = json.loads(line.strip())
- data_list.append(json_object)
- except json.JSONDecodeError as e:
- print(f"Error decoding JSON: {e}. Skipping line.")
- return data_list
- tmpCollector = {}
- # Open JSON file from /dev/shm if that fails, open from ./sample4Sum.json
- try:
- file_path = '/dev/shm/ppr.jsonl'
- inputCollector = parse_jsonl(file_path)
- except:
- file_path = '/dev/shm/pprBkup.jsonl'
- inputCollector = parse_jsonl(file_path)
- os.system('cp /dev/shm/pprBkup.json /dev/shm/ppr.json')
- for index, row in tqdm(enumerate(inputCollector), total=len(inputCollector), desc="Processing"):
- output = ''
- if row['rejected'] != '':
- continue
- prompt = f"{row['system']}\nUser:{row['user']}\nAssistant:"
- tmpCollector[index] = prompt
- if len(tmpCollector) < 1:
- continue
- output = generator.generate_simple(list(tmpCollector.values()), settings, max_new_tokens)
- for singleComp in output:
- for singlePrompt in tmpCollector:
- initialPrompt = tmpCollector[singlePrompt]
- if not initialPrompt in singleComp:
- continue
- newGen = singleComp.split('Assistant:')[1]
- print('>>>>>>>>>>>>'+newGen)
- print()
- inputCollector[singlePrompt]['rejected']=newGen
- break
- tmpCollector = {}
- with open('/dev/shm/pprBkup.jsonl', 'w') as file:
- file.seek(0)
- json.dump(inputCollector, file)
- file.truncate()
- # now the save is done, make a copy of it to /dev/shm/sample4SumBackUP.json
- os.system('cp /dev/shm/pprBkup.jsonl /dev/shm/ppr.jsonl')
Advertisement
Add Comment
Please, Sign In to add comment