Untitled

---
job: "extension"
config:
  name: "your_lora_name"
  process:
    - type: "diffusion_trainer"
      training_folder: "/home/ai-toolkit/output"
      sqlite_db_path: "./aitk_db.db"
      device: "cuda"
      trigger_word: null
      performance_log_every: 10
      network:
        type: "lora"
        linear: 64
        linear_alpha: 16
        conv: 32
        conv_alpha: 8 #start with 8 for 1000 steps - bump to 10 - if stable after 200 steps bump to 12 - at 70% steps-> 14
        lokr_full_rank: true
        lokr_factor: -1
        network_kwargs: #targets I2V layers ONLY
          only_if_contains:
            - "attn1.to_q"
            - "attn1.to_v"
            - "attn1.to_out.0"
          ignore_if_contains:
            - "attn2"
            - "ffn"
      save:
        dtype: "bf16"
        save_every: 100
        max_step_saves_to_keep: 100
        save_format: "diffusers"
        push_to_hub: false
      datasets:
        - folder_path: "/home/ai-toolkit/datasets/shorter"
          mask_path: null
          mask_min_value: 0.1
          default_caption: ""
          caption_ext: "txt"
          token_dropout_rate: 0.3 #NEEDED for motion training
          keep_tokens: 1 #keep the trigger word (first word) when tokens are dropped
          cache_latents_to_disk: false
          is_reg: false
          network_weight: 1
          resolution:
            - 640
          max_pixels_per_frame: 409600 #this is 640x640 - if you change your res to 512 then its 512x512
          controls: []
          shrink_video_to_frames: true
          num_frames: 33
          do_i2v: true
          flip_x: false
          flip_y: false
      train:
        attention_backend: "native" # If you installed sageattention 2 in your venv it SHOULD use it automatically
        batch_size: 1
        bypass_guidance_embedding: false
        steps: 17000
        gradient_accumulation: 1
        train_unet: true
        train_text_encoder: false
        gradient_checkpointing: true
        noise_scheduler: "flowmatch"
        optimizer: "automagic"
        timestep_type: "shift"
        content_or_style: "style"
        max_grad_norm: 1
        optimizer_params:
          lr_bump: 0.000005
          min_lr: 0.000005
          max_lr: 0.0003
          beta2: 0.999
          weight_decay: 0.0001
          clip_threshold: 1
          high_noise_lr_bump: 0.00001
          high_noise_min_lr: 0.00001
          high_noise_max_lr: 0.0003
          low_noise_lr_bump: 0.00001
          low_noise_min_lr: 0.000005
          low_noise_max_lr: 0.0003
        unload_text_encoder: false
        cache_text_embeddings: true
        lr: 0.00001
        ema_config:
          use_ema: true
          ema_decay: 0.99
        skip_first_sample: true
        force_first_sample: false
        disable_sampling: false
        dtype: "bf16"
        diff_output_preservation: false
        diff_output_preservation_multiplier: 1
        diff_output_preservation_class: "person"
        switch_boundary_every: 100
        loss_type: "mse"
      model:
        name_or_path: "ai-toolkit/Wan2.2-I2V-A14B-Diffusers-bf16"
        quantize: true
        qtype: "uint4|ostris/accuracy_recovery_adapters/wan22_14b_i2v_torchao_uint4.safetensors"
        quantize_te: true
        qtype_te: "qfloat8"
        arch: "wan22_14b_i2v"
        low_vram: true
        model_kwargs:
          train_high_noise: true
          train_low_noise: true
        layer_offloading: true
        layer_offloading_transformer_percent: 1
        layer_offloading_text_encoder_percent: 1
      sample:
        sampler: "flowmatch"
        sample_every: 1000
        width: 320
        height: 480
        samples:
          - prompt: ""
            ctrl_img: ""
            network_multiplier: "1"
          - prompt: ""
            ctrl_img: ""
            network_multiplier: "1"
          - prompt: ""
            ctrl_img: ""
            network_multiplier: "1"
          - prompt: ""
            ctrl_img: ""
            network_multiplier: "1"
          - prompt: ""
            ctrl_img: ""
            network_multiplier: "1"
          - prompt: ""
            ctrl_img: ""
            network_multiplier: "1"
        neg: ""
        seed: 41
        walk_seed: false
        guidance_scale: 4
        sample_steps: 25
        num_frames: 41
        fps: 16
meta:
  name: "your_lora"
  version: "1.0"