Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- base_model: mistralai/Mistral-7B-v0.1
- model_type: MistralForCausalLM
- tokenizer_type: LlamaTokenizer
- is_mistral_derived_model: true
- load_in_8bit: true
- load_in_4bit: false
- strict: false
- datasets:
- - path: combined_file.json
- ds_type: json
- type: alpaca
- output_dir: ./out
- adapter: lora
- lora_r: 8
- lora_alpha: 16
- lora_dropout: 0.05
- lora_target_modules:
- - q_proj
- - v_proj
- - v_proj
- - o_proj
- - gate_proj
- - down_proj
- - up_proj
- sequence_len: 8192
- sample_packing: false
- pad_to_sequence_len: true
- wandb_project: axolotl
- wandb_entity:
- wandb_watch:
- wandb_name:
- wandb_log_model:
- gradient_accumulation_steps: 3
- micro_batch_size: 2
- num_epochs: 4
- optimizer: adamw_bnb_8bit
- lr_scheduler: cosine
- learning_rate: 0.0002
- train_on_inputs: false
- group_by_length: false
- bf16: true
- fp16: false
- tf32: false
- gradient_checkpointing: true
- early_stopping_patience:
- resume_from_checkpoint:
- local_rank:
- logging_steps: 1
- xformers_attention:
- flash_attention: true
- warmup_steps: 10
- evals_per_epoch: 4
- eval_table_size:
- eval_max_new_tokens: 128
- saves_per_epoch: 1
- debug:
- #default deepspeed, can use more aggresive if needed like zero2, zero3
- deepspeed: deepspeed_configs/zero1.json
- weight_decay: 0.0
- fsdp:
- fsdp_config:
- special_tokens:
- bos_token: "<s>"
- eos_token: "</s>"
- unk_token: "<unk>"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement