Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- base_model: mistralai/Mistral-7B-v0.1
- base_model_config: mistralai/Mistral-7B-v0.1
- model_type: MistralForCausalLM
- tokenizer_type: LlamaTokenizer
- is_llama_derived_model: true
- load_in_8bit: false
- load_in_4bit: true
- strict: false
- datasets:
- - path: pippa_sharegpt_trimmed.jsonl
- ds_type: json
- type: sharegpt:chat
- dataset_prepared_path: last_run_prepared
- val_set_size: 0.10
- output_dir: ./pippa-sharegpt-13b-qlora
- adapter: qlora
- lora_model_dir:
- sequence_len: 4096
- sample_packing: true
- lora_r: 8
- lora_alpha: 16
- lora_dropout: 0.05
- lora_target_modules:
- lora_target_linear: true
- lora_fan_in_fan_out:
- wandb_project: pippa-sharegpt-13b-qlora
- wandb_entity:
- wandb_watch:
- wandb_run_id:
- wandb_log_model:
- gradient_accumulation_steps: 4
- micro_batch_size: 4
- num_epochs: 3
- optimizer: paged_adamw_32bit
- lr_scheduler: cosine
- learning_rate: 0.0002
- train_on_inputs: false
- group_by_length: false
- bf16: true
- fp16: false
- tf32: false
- gradient_checkpointing: true
- early_stopping_patience:
- resume_from_checkpoint:
- local_rank:
- logging_steps: 1
- xformers_attention:
- flash_attention: true
- warmup_steps: 10
- eval_steps: 50
- save_steps: 50
- save_total_limit: 99999
- debug:
- deepspeed:
- weight_decay: 0.0
- fsdp:
- fsdp_config:
- special_tokens:
- bos_token: "<s>"
- eos_token: "</s>"
- unk_token: "<unk>"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement