Advertisement
Guest User

Untitled

a guest
Feb 19th, 2024
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.33 KB | None | 0 0
  1. base_model: mistralai/Mistral-7B-v0.1
  2. model_type: MistralForCausalLM
  3. tokenizer_type: LlamaTokenizer
  4. is_mistral_derived_model: true
  5.  
  6. load_in_8bit: true
  7. load_in_4bit: false
  8. strict: false
  9.  
  10. datasets:
  11. - path: combined_file.json
  12. ds_type: json
  13. type: alpaca
  14. output_dir: ./out
  15.  
  16. adapter: lora
  17. lora_r: 8
  18. lora_alpha: 16
  19. lora_dropout: 0.05
  20. lora_target_modules:
  21. - q_proj
  22. - v_proj
  23. - v_proj
  24. - o_proj
  25. - gate_proj
  26. - down_proj
  27. - up_proj
  28.  
  29. sequence_len: 8192
  30. sample_packing: false
  31. pad_to_sequence_len: true
  32.  
  33. wandb_project: axolotl
  34. wandb_entity:
  35. wandb_watch:
  36. wandb_name:
  37. wandb_log_model:
  38.  
  39.  
  40. gradient_accumulation_steps: 3
  41. micro_batch_size: 2
  42. num_epochs: 4
  43. optimizer: adamw_bnb_8bit
  44. lr_scheduler: cosine
  45. learning_rate: 0.0002
  46.  
  47. train_on_inputs: false
  48. group_by_length: false
  49. bf16: true
  50. fp16: false
  51. tf32: false
  52.  
  53. gradient_checkpointing: true
  54. early_stopping_patience:
  55. resume_from_checkpoint:
  56. local_rank:
  57. logging_steps: 1
  58. xformers_attention:
  59. flash_attention: true
  60.  
  61. warmup_steps: 10
  62. evals_per_epoch: 4
  63. eval_table_size:
  64. eval_max_new_tokens: 128
  65. saves_per_epoch: 1
  66. debug:
  67. #default deepspeed, can use more aggresive if needed like zero2, zero3
  68. deepspeed: deepspeed_configs/zero1.json
  69. weight_decay: 0.0
  70. fsdp:
  71. fsdp_config:
  72. special_tokens:
  73. bos_token: "<s>"
  74. eos_token: "</s>"
  75. unk_token: "<unk>"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement