Advertisement
Guest User

qlora-pippa-sharegpt-mistral

a guest
Sep 30th, 2023
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
YAML 1.32 KB | None | 0 0
  1. base_model: mistralai/Mistral-7B-v0.1
  2. base_model_config: mistralai/Mistral-7B-v0.1
  3. model_type: MistralForCausalLM
  4. tokenizer_type: LlamaTokenizer
  5. is_llama_derived_model: true
  6.  
  7. load_in_8bit: false
  8. load_in_4bit: true
  9. strict: false
  10.  
  11. datasets:
  12.   - path: pippa_sharegpt_trimmed.jsonl
  13.     ds_type: json
  14.     type: sharegpt:chat
  15. dataset_prepared_path: last_run_prepared
  16. val_set_size: 0.10
  17. output_dir: ./pippa-sharegpt-13b-qlora
  18.  
  19. adapter: qlora
  20. lora_model_dir:
  21. sequence_len: 4096
  22. sample_packing: true
  23.  
  24. lora_r: 8
  25. lora_alpha: 16
  26. lora_dropout: 0.05
  27. lora_target_modules:
  28. lora_target_linear: true
  29. lora_fan_in_fan_out:
  30. wandb_project: pippa-sharegpt-13b-qlora
  31. wandb_entity:
  32. wandb_watch:
  33. wandb_run_id:
  34. wandb_log_model:
  35. gradient_accumulation_steps: 4
  36. micro_batch_size: 4
  37. num_epochs: 3
  38. optimizer: paged_adamw_32bit
  39. lr_scheduler: cosine
  40. learning_rate: 0.0002
  41.  
  42. train_on_inputs: false
  43. group_by_length: false
  44. bf16: true
  45. fp16: false
  46. tf32: false
  47.  
  48. gradient_checkpointing: true
  49. early_stopping_patience:
  50. resume_from_checkpoint:
  51. local_rank:
  52. logging_steps: 1
  53. xformers_attention:
  54. flash_attention: true
  55.  
  56. warmup_steps: 10
  57. eval_steps: 50
  58. save_steps: 50
  59. save_total_limit: 99999
  60. debug:
  61. deepspeed:
  62. weight_decay: 0.0
  63. fsdp:
  64. fsdp_config:
  65. special_tokens:
  66.   bos_token: "<s>"
  67.   eos_token: "</s>"
  68.   unk_token: "<unk>"
  69.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement