Spaces:

NTUST-DDRC
/

i2vedit

Paused

App Files Files Community

i2vedit / config /customize_train.yaml

weiyuyeh

init

a45ed83 5 months ago

raw

history blame contribute delete

4.35 kB

	# Pretrained diffusers model path.
	# Don't change
	pretrained_model_path: "stabilityai/stable-video-diffusion-img2vid"
	# The folder where your training outputs will be placed.
	# Don't change
	output_dir: "/home/user/app/outputs"
	seed: 23
	num_steps: 25
	# Xformers must be installed for best memory savings and performance (< Pytorch 2.0)
	enable_xformers_memory_efficient_attention: True
	# Use scaled dot product attention (Only available with >= Torch 2.0)
	enable_torch_2_attn: True

	use_sarp: true

	use_motion_lora: true
	train_motion_lora_only: false
	retrain_motion_lora: true

	use_inversed_latents: true
	use_attention_matching: true
	use_consistency_attention_control: false
	dtype: fp16

	visualize_attention_store: false
	visualize_attention_store_steps: #[0, 5, 10, 15, 20, 24]

	save_last_frames: True
	load_from_last_frames_latents:

	# data_params
	data_params:
	# Don't change
	video_path: "/home/user/app/upload/source_and_edits/source.mp4"
	# Don't change
	keyframe_paths:
	- "/home/user/app/upload/source_and_edits/ref.jpg"
	start_t: 0
	end_t: 1.6
	sample_fps: 10
	chunk_size: 16
	overlay_size: 1
	normalize: true
	output_fps: 3
	save_sampled_frame: true
	output_res: [576, 576]
	pad_to_fit: true
	begin_clip_id: 0
	end_clip_id: 1

	train_motion_lora_params:
	cache_latents: true
	cached_latent_dir: null #/path/to/cached_latents
	lora_rank: 32
	# Use LoRA for the UNET model.
	use_unet_lora: True
	# LoRA Dropout. This parameter adds the probability of randomly zeros out elements. Helps prevent overfitting.
	# See: https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html
	lora_unet_dropout: 0.1
	# The only time you want this off is if you're doing full LoRA training.
	save_pretrained_model: False
	# Learning rate for AdamW
	learning_rate: 5e-4
	# Weight decay. Higher = more regularization. Lower = closer to dataset.
	adam_weight_decay: 1e-2
	# Maximum number of train steps. Model is saved after training.
	max_train_steps: 600
	# Saves a model every nth step.
	checkpointing_steps: 100
	# How many steps to do for validation if sample_preview is enabled.
	validation_steps: 100
	# Whether or not we want to use mixed precision with accelerate
	mixed_precision: "fp16"
	# Trades VRAM usage for speed. You lose roughly 20% of training speed, but save a lot of VRAM.
	# If you need to save more VRAM, it can also be enabled for the text encoder, but reduces speed x2.
	gradient_checkpointing: True
	image_encoder_gradient_checkpointing: True

	train_data:
	# The width and height in which you want your training data to be resized to.
	width: 576
	height: 576
	# This will find the closest aspect ratio to your input width and height.
	# For example, 576x576 width and height with a video of resolution 1280x720 will be resized to 576x256
	use_data_aug: ~ #"controlnet"
	pad_to_fit: true

	validation_data:
	# Whether or not to sample preview during training (Requires more VRAM).
	sample_preview: True
	# The number of frames to sample during validation.
	num_frames: 16
	# Height and width of validation sample.
	width: 576
	height: 576
	pad_to_fit: true
	# scale of spatial LoRAs, default is 0
	spatial_scale: 0
	# scale of noise prior, i.e. the scale of inversion noises
	noise_prior:
	#- 0.0
	- 1.0

	sarp_params:
	sarp_noise_scale: 0.005

	attention_matching_params:
	best_checkpoint_index: 500
	lora_scale: 1.0
	# lora path
	lora_dir: ~
	max_guidance_scale: 2.0

	disk_store: True
	load_attention_store: ~
	load_consistency_attention_store: ~
	load_consistency_train_attention_store: ~
	registered_modules:
	BasicTransformerBlock:
	- "attn1"
	#- "attn2"
	TemporalBasicTransformerBlock:
	- "attn1"
	#- "attn2"
	control_mode:
	spatial_self: "masked_copy"
	temporal_self: "copy_v2"
	cross_replace_steps: 0.0
	temporal_self_replace_steps: 1.0
	spatial_self_replace_steps: 1.0
	spatial_attention_chunk_size: 1

	params:
	edit0:
	temporal_step_thr: [0.5, 0.8]
	mask_thr: [0.35, 0.35]
	edit1:
	temporal_step_thr: [0.5, 0.8]
	mask_thr: [0.35, 0.35]

	long_video_params:
	mode: "skip-interval"
	registered_modules:
	BasicTransformerBlock:
	#- "attn1"
	#- "attn2"
	TemporalBasicTransformerBlock:
	- "attn1"
	#- "attn2"