From a1f6438f7b29bedc47fd66aba7ce5559d2d24ac7 Mon Sep 17 00:00:00 2001 From: bmaltais Date: Tue, 14 Feb 2023 17:42:36 -0500 Subject: [PATCH 1/4] Upgrade upgrade.ps1 script to fix reported issue: https://github.com/bmaltais/kohya_ss/issues/165 --- README.md | 2 ++ upgrade.ps1 | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/README.md b/README.md index d5cb7b8..e18d3b8 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,8 @@ Then redo the installation instruction within the kohya_ss venv. ## Change history +* 2023/02/15 (v20.7.3): + - Update upgrade.ps1 script * 2023/02/11 (v20.7.2): - `lora_interrogator.py` is added in `networks` folder. See `python networks\lora_interrogator.py -h` for usage. - For LoRAs where the activation word is unknown, this script compares the output of Text Encoder after applying LoRA to that of unapplied to find out which token is affected by LoRA. Hopefully you can figure out the activation word. LoRA trained with captions does not seem to be able to interrogate. diff --git a/upgrade.ps1 b/upgrade.ps1 index cf46b68..2cd8f79 100644 --- a/upgrade.ps1 +++ b/upgrade.ps1 @@ -1,3 +1,14 @@ +# Check if there are any changes that need to be committed +if (git status --short) { + Write-Error "There are changes that need to be committed. Please stash or undo your changes before running this script." + return +} + +# Pull the latest changes from the remote repository git pull + +# Activate the virtual environment .\venv\Scripts\activate + +# Upgrade the required packages pip install --upgrade -r requirements.txt \ No newline at end of file From 641a168e55f429c79f9114bcdb123a13bc9b2167 Mon Sep 17 00:00:00 2001 From: bmaltais Date: Tue, 14 Feb 2023 18:52:08 -0500 Subject: [PATCH 2/4] Integrate new kohya sd-script --- README.md | 1 + fine_tune.py | 3 +++ library/train_util.py | 21 ++++++++++++++++----- networks/resize_lora.py | 17 ++++++++++++++--- train_db.py | 5 ++++- train_network.py | 28 +++++++++++++++++++++++----- train_textual_inversion.py | 3 +++ 7 files changed, 64 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index e18d3b8..6165627 100644 --- a/README.md +++ b/README.md @@ -145,6 +145,7 @@ Then redo the installation instruction within the kohya_ss venv. * 2023/02/15 (v20.7.3): - Update upgrade.ps1 script + - Integrate new kohya sd-script * 2023/02/11 (v20.7.2): - `lora_interrogator.py` is added in `networks` folder. See `python networks\lora_interrogator.py -h` for usage. - For LoRAs where the activation word is unknown, this script compares the output of Text Encoder after applying LoRA to that of unapplied to find out which token is affected by LoRA. Hopefully you can figure out the activation word. LoRA trained with captions does not seem to be able to interrogate. diff --git a/fine_tune.py b/fine_tune.py index 5292153..3ba6306 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -255,6 +255,9 @@ def train(args): # Sample noise that we'll add to the latents noise = torch.randn_like(latents, device=latents.device) + if args.noise_offset: + # https://www.crosslabs.org//blog/diffusion-with-offset-noise + noise += args.noise_offset * torch.randn((latents.shape[0], latents.shape[1], 1, 1), device=latents.device) # Sample a random timestep for each image timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (b_size,), device=latents.device) diff --git a/library/train_util.py b/library/train_util.py index 24e15d1..415f9b7 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -12,6 +12,7 @@ import math import os import random import hashlib +import subprocess from io import BytesIO from tqdm import tqdm @@ -299,7 +300,7 @@ class BaseDataset(torch.utils.data.Dataset): if self.shuffle_keep_tokens is None: if self.shuffle_caption: random.shuffle(tokens) - + tokens = dropout_tags(tokens) else: if len(tokens) > self.shuffle_keep_tokens: @@ -308,7 +309,7 @@ class BaseDataset(torch.utils.data.Dataset): if self.shuffle_caption: random.shuffle(tokens) - + tokens = dropout_tags(tokens) tokens = keep_tokens + tokens @@ -1100,6 +1101,13 @@ def addnet_hash_safetensors(b): return hash_sha256.hexdigest() +def get_git_revision_hash() -> str: + try: + return subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode('ascii').strip() + except: + return "(unknown)" + + # flash attention forwards and backwards # https://arxiv.org/abs/2205.14135 @@ -1413,6 +1421,8 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: help="scheduler to use for learning rate / 学習率のスケジューラ: linear, cosine, cosine_with_restarts, polynomial, constant (default), constant_with_warmup") parser.add_argument("--lr_warmup_steps", type=int, default=0, help="Number of steps for the warmup in the lr scheduler (default is 0) / 学習率のスケジューラをウォームアップするステップ数(デフォルト0)") + parser.add_argument("--noise_offset", type=float, default=None, + help="enable noise offset with this value (if enabled, around 0.1 is recommended) / Noise offsetを有効にしてこの値を設定する(有効にする場合は0.1程度を推奨)") if support_dreambooth: # DreamBooth training @@ -1620,9 +1630,6 @@ def get_hidden_states(args: argparse.Namespace, input_ids, tokenizer, text_encod else: enc_out = text_encoder(input_ids, output_hidden_states=True, return_dict=True) encoder_hidden_states = enc_out['hidden_states'][-args.clip_skip] - if weight_dtype is not None: - # this is required for additional network training - encoder_hidden_states = encoder_hidden_states.to(weight_dtype) encoder_hidden_states = text_encoder.text_model.final_layer_norm(encoder_hidden_states) # bs*3, 77, 768 or 1024 @@ -1649,6 +1656,10 @@ def get_hidden_states(args: argparse.Namespace, input_ids, tokenizer, text_encod states_list.append(encoder_hidden_states[:, -1].unsqueeze(1)) # encoder_hidden_states = torch.cat(states_list, dim=1) + if weight_dtype is not None: + # this is required for additional network training + encoder_hidden_states = encoder_hidden_states.to(weight_dtype) + return encoder_hidden_states diff --git a/networks/resize_lora.py b/networks/resize_lora.py index 7beeb25..271de8e 100644 --- a/networks/resize_lora.py +++ b/networks/resize_lora.py @@ -38,9 +38,10 @@ def save_to_file(file_name, model, state_dict, dtype, metadata): torch.save(model, file_name) -def resize_lora_model(lora_sd, new_rank, save_dtype, device): +def resize_lora_model(lora_sd, new_rank, save_dtype, device, verbose): network_alpha = None network_dim = None + verbose_str = "\n" CLAMP_QUANTILE = 0.99 @@ -96,6 +97,12 @@ def resize_lora_model(lora_sd, new_rank, save_dtype, device): U, S, Vh = torch.linalg.svd(full_weight_matrix) + if verbose: + s_sum = torch.sum(torch.abs(S)) + s_rank = torch.sum(torch.abs(S[:new_rank])) + verbose_str+=f"{block_down_name:76} | " + verbose_str+=f"sum(S) retained: {(s_rank)/s_sum:.1%}, max(S) ratio: {S[0]/S[new_rank]:0.1f}\n" + U = U[:, :new_rank] S = S[:new_rank] U = U @ torch.diag(S) @@ -113,7 +120,7 @@ def resize_lora_model(lora_sd, new_rank, save_dtype, device): U = U.unsqueeze(2).unsqueeze(3) Vh = Vh.unsqueeze(2).unsqueeze(3) - if args.device: + if device: U = U.to(org_device) Vh = Vh.to(org_device) @@ -127,6 +134,8 @@ def resize_lora_model(lora_sd, new_rank, save_dtype, device): lora_up_weight = None weights_loaded = False + if verbose: + print(verbose_str) print("resizing complete") return o_lora_sd, network_dim, new_alpha @@ -151,7 +160,7 @@ def resize(args): lora_sd, metadata = load_state_dict(args.model, merge_dtype) print("resizing rank...") - state_dict, old_dim, new_alpha = resize_lora_model(lora_sd, args.new_rank, save_dtype, args.device) + state_dict, old_dim, new_alpha = resize_lora_model(lora_sd, args.new_rank, save_dtype, args.device, args.verbose) # update metadata if metadata is None: @@ -182,6 +191,8 @@ if __name__ == '__main__': parser.add_argument("--model", type=str, default=None, help="LoRA model to resize at to new rank: ckpt or safetensors file / 読み込むLoRAモデル、ckptまたはsafetensors") parser.add_argument("--device", type=str, default=None, help="device to use, cuda for GPU / 計算を行うデバイス、cuda でGPUを使う") + parser.add_argument("--verbose", action="store_true", + help="Display verbose resizing information / rank変更時の詳細情報を出力する") args = parser.parse_args() resize(args) diff --git a/train_db.py b/train_db.py index c210767..4a50dc9 100644 --- a/train_db.py +++ b/train_db.py @@ -233,10 +233,13 @@ def train(args): else: latents = vae.encode(batch["images"].to(dtype=weight_dtype)).latent_dist.sample() latents = latents * 0.18215 + b_size = latents.shape[0] # Sample noise that we'll add to the latents noise = torch.randn_like(latents, device=latents.device) - b_size = latents.shape[0] + if args.noise_offset: + # https://www.crosslabs.org//blog/diffusion-with-offset-noise + noise += args.noise_offset * torch.randn((latents.shape[0], latents.shape[1], 1, 1), device=latents.device) # Get the text embedding for conditioning with torch.set_grad_enabled(global_step < args.stop_text_encoder_training): diff --git a/train_network.py b/train_network.py index bb3159f..1b8046d 100644 --- a/train_network.py +++ b/train_network.py @@ -1,5 +1,7 @@ from diffusers.optimization import SchedulerType, TYPE_TO_SCHEDULER_FUNCTION from torch.optim import Optimizer +from torch.cuda.amp import autocast +from torch.nn.parallel import DistributedDataParallel as DDP from typing import Optional, Union import importlib import argparse @@ -154,7 +156,9 @@ def train(args): # モデルを読み込む text_encoder, vae, unet, _ = train_util.load_target_model(args, weight_dtype) - + # unnecessary, but work on low-ram device + text_encoder.to("cuda") + unet.to("cuda") # モデルに xformers とか memory efficient attention を組み込む train_util.replace_unet_modules(unet, args.mem_eff_attn, args.xformers) @@ -258,17 +262,26 @@ def train(args): unet.requires_grad_(False) unet.to(accelerator.device, dtype=weight_dtype) text_encoder.requires_grad_(False) - text_encoder.to(accelerator.device, dtype=weight_dtype) + text_encoder.to(accelerator.device) if args.gradient_checkpointing: # according to TI example in Diffusers, train is required unet.train() text_encoder.train() # set top parameter requires_grad = True for gradient checkpointing works - text_encoder.text_model.embeddings.requires_grad_(True) + if type(text_encoder) == DDP: + text_encoder.module.text_model.embeddings.requires_grad_(True) + else: + text_encoder.text_model.embeddings.requires_grad_(True) else: unet.eval() text_encoder.eval() + # support DistributedDataParallel + if type(text_encoder) == DDP: + text_encoder = text_encoder.module + unet = unet.module + network = network.module + network.prepare_grad_etc(text_encoder, unet) if not cache_latents: @@ -344,7 +357,8 @@ def train(args): "ss_reg_dataset_dirs": json.dumps(train_dataset.reg_dataset_dirs_info), "ss_tag_frequency": json.dumps(train_dataset.tag_frequency), "ss_bucket_info": json.dumps(train_dataset.bucket_info), - "ss_training_comment": args.training_comment # will not be updated after training + "ss_training_comment": args.training_comment, # will not be updated after training + "ss_sd_scripts_commit_hash": train_util.get_git_revision_hash() } # uncomment if another network is added @@ -405,6 +419,9 @@ def train(args): # Sample noise that we'll add to the latents noise = torch.randn_like(latents, device=latents.device) + if args.noise_offset: + # https://www.crosslabs.org//blog/diffusion-with-offset-noise + noise += args.noise_offset * torch.randn((latents.shape[0], latents.shape[1], 1, 1), device=latents.device) # Sample a random timestep for each image timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (b_size,), device=latents.device) @@ -415,7 +432,8 @@ def train(args): noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps) # Predict the noise residual - noise_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample + with autocast(): + noise_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample if args.v_parameterization: # v-parameterization training diff --git a/train_textual_inversion.py b/train_textual_inversion.py index 4aa91ee..010bd04 100644 --- a/train_textual_inversion.py +++ b/train_textual_inversion.py @@ -320,6 +320,9 @@ def train(args): # Sample noise that we'll add to the latents noise = torch.randn_like(latents, device=latents.device) + if args.noise_offset: + # https://www.crosslabs.org//blog/diffusion-with-offset-noise + noise += args.noise_offset * torch.randn((latents.shape[0], latents.shape[1], 1, 1), device=latents.device) # Sample a random timestep for each image timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (b_size,), device=latents.device) From bb57c1a36e6fefe110ba4f98e12e6a4649a01ea5 Mon Sep 17 00:00:00 2001 From: bmaltais Date: Sun, 19 Feb 2023 06:50:33 -0500 Subject: [PATCH 3/4] Update code to latest sd-script version --- README-ja.md | 6 ++++++ README.md | 14 ++++++++++++++ fine_tune.py | 7 +++++++ library/train_util.py | 6 +++++- train_db.py | 19 ++++++++++++++++--- train_network.py | 34 +++++++++++++++++++++++++++------- train_textual_inversion.py | 7 +++++++ 7 files changed, 82 insertions(+), 11 deletions(-) diff --git a/README-ja.md b/README-ja.md index adf44d2..064464c 100644 --- a/README-ja.md +++ b/README-ja.md @@ -64,6 +64,12 @@ cp .\bitsandbytes_windows\main.py .\venv\Lib\site-packages\bitsandbytes\cuda_set accelerate config ``` + + コマンドプロンプトでは以下になります。 diff --git a/README.md b/README.md index 6165627..9f98ec5 100644 --- a/README.md +++ b/README.md @@ -143,9 +143,23 @@ Then redo the installation instruction within the kohya_ss venv. ## Change history +* 2023/02/19 (v20.7.4): + - Add `--use_lion_optimizer` to each training script to use [Lion optimizer](https://github.com/lucidrains/lion-pytorch). + - Please install Lion optimizer with `pip install lion-pytorch` (it is not in ``requirements.txt`` currently.) + - Add `--lowram` option to `train_network.py`. Load models to VRAM instead of VRAM (for machines which have bigger VRAM than RAM such as Colab and Kaggle). Thanks to Isotr0py! + - Default behavior (without lowram) has reverted to the same as before 14 Feb. + - Fixed git commit hash to be set correctly regardless of the working directory. Thanks to vladmandic! * 2023/02/15 (v20.7.3): - Update upgrade.ps1 script - Integrate new kohya sd-script + - Noise offset is recorded to the metadata. Thanks to space-nuko! + - Show the moving average loss to prevent loss jumping in `train_network.py` and `train_db.py`. Thanks to shirayu! + - Add support with multi-gpu trainining for `train_network.py`. Thanks to Isotr0py! + - Add `--verbose` option for `resize_lora.py`. For details, see [this PR](https://github.com/kohya-ss/sd-scripts/pull/179). Thanks to mgz-dev! + - Git commit hash is added to the metadata for LoRA. Thanks to space-nuko! + - Add `--noise_offset` option for each training scripts. + - Implementation of https://www.crosslabs.org//blog/diffusion-with-offset-noise + - This option may improve ability to generate darker/lighter images. May work with LoRA. * 2023/02/11 (v20.7.2): - `lora_interrogator.py` is added in `networks` folder. See `python networks\lora_interrogator.py -h` for usage. - For LoRAs where the activation word is unknown, this script compares the output of Text Encoder after applying LoRA to that of unapplied to find out which token is affected by LoRA. Hopefully you can figure out the activation word. LoRA trained with captions does not seem to be able to interrogate. diff --git a/fine_tune.py b/fine_tune.py index 3ba6306..13241bc 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -158,6 +158,13 @@ def train(args): raise ImportError("No bitsand bytes / bitsandbytesがインストールされていないようです") print("use 8-bit Adam optimizer") optimizer_class = bnb.optim.AdamW8bit + elif args.use_lion_optimizer: + try: + import lion_pytorch + except ImportError: + raise ImportError("No lion_pytorch / lion_pytorch がインストールされていないようです") + print("use Lion optimizer") + optimizer_class = lion_pytorch.Lion else: optimizer_class = torch.optim.AdamW diff --git a/library/train_util.py b/library/train_util.py index 415f9b7..63868f9 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -1103,7 +1103,7 @@ def addnet_hash_safetensors(b): def get_git_revision_hash() -> str: try: - return subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode('ascii').strip() + return subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=os.path.dirname(__file__)).decode('ascii').strip() except: return "(unknown)" @@ -1389,6 +1389,8 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: help="max token length of text encoder (default for 75, 150 or 225) / text encoderのトークンの最大長(未指定で75、150または225が指定可)") parser.add_argument("--use_8bit_adam", action="store_true", help="use 8bit Adam optimizer (requires bitsandbytes) / 8bit Adamオプティマイザを使う(bitsandbytesのインストールが必要)") + parser.add_argument("--use_lion_optimizer", action="store_true", + help="use Lion optimizer (requires lion-pytorch) / Lionオプティマイザを使う( lion-pytorch のインストールが必要)") parser.add_argument("--mem_eff_attn", action="store_true", help="use memory efficient attention for CrossAttention / CrossAttentionに省メモリ版attentionを使う") parser.add_argument("--xformers", action="store_true", @@ -1423,6 +1425,8 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: help="Number of steps for the warmup in the lr scheduler (default is 0) / 学習率のスケジューラをウォームアップするステップ数(デフォルト0)") parser.add_argument("--noise_offset", type=float, default=None, help="enable noise offset with this value (if enabled, around 0.1 is recommended) / Noise offsetを有効にしてこの値を設定する(有効にする場合は0.1程度を推奨)") + parser.add_argument("--lowram", action="store_true", + help="enable low RAM optimization. e.g. load models to VRAM instead of RAM (for machines which have bigger VRAM than RAM such as Colab and Kaggle) / メインメモリが少ない環境向け最適化を有効にする。たとえばVRAMにモデルを読み込むなど(ColabやKaggleなどRAMに比べてVRAMが多い環境向け)") if support_dreambooth: # DreamBooth training diff --git a/train_db.py b/train_db.py index 4a50dc9..1903c4c 100644 --- a/train_db.py +++ b/train_db.py @@ -124,6 +124,13 @@ def train(args): raise ImportError("No bitsand bytes / bitsandbytesがインストールされていないようです") print("use 8-bit Adam optimizer") optimizer_class = bnb.optim.AdamW8bit + elif args.use_lion_optimizer: + try: + import lion_pytorch + except ImportError: + raise ImportError("No lion_pytorch / lion_pytorch がインストールされていないようです") + print("use Lion optimizer") + optimizer_class = lion_pytorch.Lion else: optimizer_class = torch.optim.AdamW @@ -206,6 +213,8 @@ def train(args): if accelerator.is_main_process: accelerator.init_trackers("dreambooth") + loss_list = [] + loss_total = 0.0 for epoch in range(num_train_epochs): print(f"epoch {epoch+1}/{num_train_epochs}") train_dataset.set_current_epoch(epoch + 1) @@ -216,7 +225,6 @@ def train(args): if args.gradient_checkpointing or global_step < args.stop_text_encoder_training: text_encoder.train() - loss_total = 0 for step, batch in enumerate(train_dataloader): # 指定したステップ数でText Encoderの学習を止める if global_step == args.stop_text_encoder_training: @@ -294,8 +302,13 @@ def train(args): logs = {"loss": current_loss, "lr": lr_scheduler.get_last_lr()[0]} accelerator.log(logs, step=global_step) + if epoch == 0: + loss_list.append(current_loss) + else: + loss_total -= loss_list[step] + loss_list[step] = current_loss loss_total += current_loss - avr_loss = loss_total / (step+1) + avr_loss = loss_total / len(loss_list) logs = {"loss": avr_loss} # , "lr": lr_scheduler.get_last_lr()[0]} progress_bar.set_postfix(**logs) @@ -303,7 +316,7 @@ def train(args): break if args.logging_dir is not None: - logs = {"epoch_loss": loss_total / len(train_dataloader)} + logs = {"loss/epoch": loss_total / len(loss_list)} accelerator.log(logs, step=epoch+1) accelerator.wait_for_everyone() diff --git a/train_network.py b/train_network.py index 1b8046d..1489691 100644 --- a/train_network.py +++ b/train_network.py @@ -156,9 +156,12 @@ def train(args): # モデルを読み込む text_encoder, vae, unet, _ = train_util.load_target_model(args, weight_dtype) - # unnecessary, but work on low-ram device - text_encoder.to("cuda") - unet.to("cuda") + + # work on low-ram device + if args.lowram: + text_encoder.to("cuda") + unet.to("cuda") + # モデルに xformers とか memory efficient attention を組み込む train_util.replace_unet_modules(unet, args.mem_eff_attn, args.xformers) @@ -213,9 +216,18 @@ def train(args): raise ImportError("No bitsand bytes / bitsandbytesがインストールされていないようです") print("use 8-bit Adam optimizer") optimizer_class = bnb.optim.AdamW8bit + elif args.use_lion_optimizer: + try: + import lion_pytorch + except ImportError: + raise ImportError("No lion_pytorch / lion_pytorch がインストールされていないようです") + print("use Lion optimizer") + optimizer_class = lion_pytorch.Lion else: optimizer_class = torch.optim.AdamW + optimizer_name = optimizer_class.__module__ + "." + optimizer_class.__name__ + trainable_params = network.prepare_optimizer_params(args.text_encoder_lr, args.unet_lr) # betaやweight decayはdiffusers DreamBoothもDreamBooth SDもデフォルト値のようなのでオプションはとりあえず省略 @@ -353,12 +365,14 @@ def train(args): "ss_max_bucket_reso": train_dataset.max_bucket_reso, "ss_seed": args.seed, "ss_keep_tokens": args.keep_tokens, + "ss_noise_offset": args.noise_offset, "ss_dataset_dirs": json.dumps(train_dataset.dataset_dirs_info), "ss_reg_dataset_dirs": json.dumps(train_dataset.reg_dataset_dirs_info), "ss_tag_frequency": json.dumps(train_dataset.tag_frequency), "ss_bucket_info": json.dumps(train_dataset.bucket_info), "ss_training_comment": args.training_comment, # will not be updated after training - "ss_sd_scripts_commit_hash": train_util.get_git_revision_hash() + "ss_sd_scripts_commit_hash": train_util.get_git_revision_hash(), + "ss_optimizer": optimizer_name } # uncomment if another network is added @@ -392,6 +406,8 @@ def train(args): if accelerator.is_main_process: accelerator.init_trackers("network_train") + loss_list = [] + loss_total = 0.0 for epoch in range(num_train_epochs): print(f"epoch {epoch+1}/{num_train_epochs}") train_dataset.set_current_epoch(epoch + 1) @@ -400,7 +416,6 @@ def train(args): network.on_epoch_start(text_encoder, unet) - loss_total = 0 for step, batch in enumerate(train_dataloader): with accelerator.accumulate(network): with torch.no_grad(): @@ -464,8 +479,13 @@ def train(args): global_step += 1 current_loss = loss.detach().item() + if epoch == 0: + loss_list.append(current_loss) + else: + loss_total -= loss_list[step] + loss_list[step] = current_loss loss_total += current_loss - avr_loss = loss_total / (step+1) + avr_loss = loss_total / len(loss_list) logs = {"loss": avr_loss} # , "lr": lr_scheduler.get_last_lr()[0]} progress_bar.set_postfix(**logs) @@ -477,7 +497,7 @@ def train(args): break if args.logging_dir is not None: - logs = {"loss/epoch": loss_total / len(train_dataloader)} + logs = {"loss/epoch": loss_total / len(loss_list)} accelerator.log(logs, step=epoch+1) accelerator.wait_for_everyone() diff --git a/train_textual_inversion.py b/train_textual_inversion.py index 010bd04..ffec051 100644 --- a/train_textual_inversion.py +++ b/train_textual_inversion.py @@ -207,6 +207,13 @@ def train(args): raise ImportError("No bitsand bytes / bitsandbytesがインストールされていないようです") print("use 8-bit Adam optimizer") optimizer_class = bnb.optim.AdamW8bit + elif args.use_lion_optimizer: + try: + import lion_pytorch + except ImportError: + raise ImportError("No lion_pytorch / lion_pytorch がインストールされていないようです") + print("use Lion optimizer") + optimizer_class = lion_pytorch.Lion else: optimizer_class = torch.optim.AdamW From 758bfe85dc98aed3dfab55e4cdeb96f742aab3de Mon Sep 17 00:00:00 2001 From: bmaltais Date: Sun, 19 Feb 2023 20:13:03 -0500 Subject: [PATCH 4/4] Adding support for Lion optimizer in gui --- dreambooth_gui.py | 6 ++++ finetune_gui.py | 6 ++++ library/common_gui.py | 67 ++++++++++++++++++++++++++++++++++++- lora_gui.py | 6 ++++ presets/lion_optimizer.json | 59 ++++++++++++++++++++++++++++++++ requirements.txt | 3 +- textual_inversion_gui.py | 6 ++++ 7 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 presets/lion_optimizer.json diff --git a/dreambooth_gui.py b/dreambooth_gui.py index b843df3..a58c333 100644 --- a/dreambooth_gui.py +++ b/dreambooth_gui.py @@ -89,6 +89,7 @@ def save_configuration( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ): # Get list of function parameters and values parameters = list(locals().items()) @@ -179,6 +180,7 @@ def open_configuration( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ): # Get list of function parameters and values parameters = list(locals().items()) @@ -253,6 +255,7 @@ def train_model( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ): if pretrained_model_name_or_path == '': msgbox('Source model information is missing') @@ -397,6 +400,7 @@ def train_model( seed=seed, caption_extension=caption_extension, cache_latents=cache_latents, + optimizer=optimizer ) run_cmd += run_cmd_advanced_training( @@ -541,6 +545,7 @@ def dreambooth_tab( seed, caption_extension, cache_latents, + optimizer, ) = gradio_training( learning_rate_value='1e-5', lr_scheduler_value='cosine', @@ -668,6 +673,7 @@ def dreambooth_tab( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ] button_open_config.click( diff --git a/finetune_gui.py b/finetune_gui.py index 4c2ccec..01f04d1 100644 --- a/finetune_gui.py +++ b/finetune_gui.py @@ -85,6 +85,7 @@ def save_configuration( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ): # Get list of function parameters and values parameters = list(locals().items()) @@ -181,6 +182,7 @@ def open_config_file( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ): # Get list of function parameters and values parameters = list(locals().items()) @@ -262,6 +264,7 @@ def train_model( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ): # create caption json file if generate_caption_database: @@ -386,6 +389,7 @@ def train_model( seed=seed, caption_extension=caption_extension, cache_latents=cache_latents, + optimizer=optimizer, ) run_cmd += run_cmd_advanced_training( @@ -564,6 +568,7 @@ def finetune_tab(): seed, caption_extension, cache_latents, + optimizer, ) = gradio_training(learning_rate_value='1e-5') with gr.Row(): dataset_repeats = gr.Textbox(label='Dataset repeats', value=40) @@ -661,6 +666,7 @@ def finetune_tab(): random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ] button_run.click(train_model, inputs=settings_list) diff --git a/library/common_gui.py b/library/common_gui.py index d52e07e..027224e 100644 --- a/library/common_gui.py +++ b/library/common_gui.py @@ -445,6 +445,7 @@ def gradio_training( value=2, ) seed = gr.Textbox(label='Seed', value=1234) + cache_latents = gr.Checkbox(label='Cache latent', value=True) with gr.Row(): learning_rate = gr.Textbox( label='Learning rate', value=learning_rate_value @@ -464,7 +465,15 @@ def gradio_training( lr_warmup = gr.Textbox( label='LR warmup (% of steps)', value=lr_warmup_value ) - cache_latents = gr.Checkbox(label='Cache latent', value=True) + optimizer = gr.Dropdown( + label='Optimizer', + choices=[ + 'AdamW', + 'Lion', + ], + value="AdamW", + interactive=True, + ) return ( learning_rate, lr_scheduler, @@ -478,6 +487,7 @@ def gradio_training( seed, caption_extension, cache_latents, + optimizer, ) @@ -512,10 +522,34 @@ def run_cmd_training(**kwargs): if kwargs.get('caption_extension') else '', ' --cache_latents' if kwargs.get('cache_latents') else '', + ' --use_lion_optimizer' if kwargs.get('optimizer') == 'Lion' else '', ] run_cmd = ''.join(options) return run_cmd +# # This function takes a dictionary of keyword arguments and returns a string that can be used to run a command-line training script +# def run_cmd_training(**kwargs): +# arg_map = { +# 'learning_rate': ' --learning_rate="{}"', +# 'lr_scheduler': ' --lr_scheduler="{}"', +# 'lr_warmup_steps': ' --lr_warmup_steps="{}"', +# 'train_batch_size': ' --train_batch_size="{}"', +# 'max_train_steps': ' --max_train_steps="{}"', +# 'save_every_n_epochs': ' --save_every_n_epochs="{}"', +# 'mixed_precision': ' --mixed_precision="{}"', +# 'save_precision': ' --save_precision="{}"', +# 'seed': ' --seed="{}"', +# 'caption_extension': ' --caption_extension="{}"', +# 'cache_latents': ' --cache_latents', +# 'optimizer': ' --use_lion_optimizer' if kwargs.get('optimizer') == 'Lion' else '', +# } + +# options = [arg_map[key].format(value) for key, value in kwargs.items() if key in arg_map and value] + +# cmd = ''.join(options) + +# return cmd + def gradio_advanced_training(): with gr.Row(): @@ -664,3 +698,34 @@ def run_cmd_advanced_training(**kwargs): ] run_cmd = ''.join(options) return run_cmd + +# def run_cmd_advanced_training(**kwargs): +# arg_map = { +# 'max_train_epochs': ' --max_train_epochs="{}"', +# 'max_data_loader_n_workers': ' --max_data_loader_n_workers="{}"', +# 'max_token_length': ' --max_token_length={}' if int(kwargs.get('max_token_length', 75)) > 75 else '', +# 'clip_skip': ' --clip_skip={}' if int(kwargs.get('clip_skip', 1)) > 1 else '', +# 'resume': ' --resume="{}"', +# 'keep_tokens': ' --keep_tokens="{}"' if int(kwargs.get('keep_tokens', 0)) > 0 else '', +# 'caption_dropout_every_n_epochs': ' --caption_dropout_every_n_epochs="{}"' if int(kwargs.get('caption_dropout_every_n_epochs', 0)) > 0 else '', +# 'caption_dropout_rate': ' --caption_dropout_rate="{}"' if float(kwargs.get('caption_dropout_rate', 0)) > 0 else '', +# 'bucket_reso_steps': ' --bucket_reso_steps={:d}' if int(kwargs.get('bucket_reso_steps', 64)) >= 1 else '', +# 'save_state': ' --save_state', +# 'mem_eff_attn': ' --mem_eff_attn', +# 'color_aug': ' --color_aug', +# 'flip_aug': ' --flip_aug', +# 'shuffle_caption': ' --shuffle_caption', +# 'gradient_checkpointing': ' --gradient_checkpointing', +# 'full_fp16': ' --full_fp16', +# 'xformers': ' --xformers', +# 'use_8bit_adam': ' --use_8bit_adam', +# 'persistent_data_loader_workers': ' --persistent_data_loader_workers', +# 'bucket_no_upscale': ' --bucket_no_upscale', +# 'random_crop': ' --random_crop', +# } + +# options = [arg_map[key].format(value) for key, value in kwargs.items() if key in arg_map and value] + +# cmd = ''.join(options) + +# return cmd \ No newline at end of file diff --git a/lora_gui.py b/lora_gui.py index a2ec7b7..9d3df33 100644 --- a/lora_gui.py +++ b/lora_gui.py @@ -100,6 +100,7 @@ def save_configuration( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ): # Get list of function parameters and values parameters = list(locals().items()) @@ -197,6 +198,7 @@ def open_configuration( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ): # Get list of function parameters and values parameters = list(locals().items()) @@ -278,6 +280,7 @@ def train_model( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ): if pretrained_model_name_or_path == '': msgbox('Source model information is missing') @@ -457,6 +460,7 @@ def train_model( seed=seed, caption_extension=caption_extension, cache_latents=cache_latents, + optimizer=optimizer, ) run_cmd += run_cmd_advanced_training( @@ -609,6 +613,7 @@ def lora_tab( seed, caption_extension, cache_latents, + optimizer, ) = gradio_training( learning_rate_value='0.0001', lr_scheduler_value='cosine', @@ -778,6 +783,7 @@ def lora_tab( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ] button_open_config.click( diff --git a/presets/lion_optimizer.json b/presets/lion_optimizer.json new file mode 100644 index 0000000..77ffa4d --- /dev/null +++ b/presets/lion_optimizer.json @@ -0,0 +1,59 @@ +{ + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "v2": false, + "v_parameterization": false, + "logging_dir": "D:\\dataset\\marty_mcfly\\1985\\lora/log", + "train_data_dir": "D:\\dataset\\marty_mcfly\\1985\\lora\\img_gan", + "reg_data_dir": "", + "output_dir": "D:/lora/sd1.5/marty_mcfly", + "max_resolution": "512,512", + "learning_rate": "0.00003333", + "lr_scheduler": "cosine", + "lr_warmup": "0", + "train_batch_size": 8, + "epoch": "1", + "save_every_n_epochs": "1", + "mixed_precision": "bf16", + "save_precision": "fp16", + "seed": "1234", + "num_cpu_threads_per_process": 2, + "cache_latents": false, + "caption_extension": "", + "enable_bucket": true, + "gradient_checkpointing": false, + "full_fp16": false, + "no_token_padding": false, + "stop_text_encoder_training": 0, + "use_8bit_adam": false, + "xformers": true, + "save_model_as": "safetensors", + "shuffle_caption": false, + "save_state": false, + "resume": "", + "prior_loss_weight": 1.0, + "text_encoder_lr": "0.000016666", + "unet_lr": "0.00003333", + "network_dim": 128, + "lora_network_weights": "", + "color_aug": false, + "flip_aug": false, + "clip_skip": "1", + "gradient_accumulation_steps": 1.0, + "mem_eff_attn": false, + "output_name": "mrtmcfl_v2.0", + "model_list": "runwayml/stable-diffusion-v1-5", + "max_token_length": "75", + "max_train_epochs": "", + "max_data_loader_n_workers": "0", + "network_alpha": 128, + "training_comment": "", + "keep_tokens": "0", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "persistent_data_loader_workers": false, + "bucket_no_upscale": true, + "random_crop": true, + "bucket_reso_steps": 64.0, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.1 +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a8bcefb..bfbe8d9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,7 @@ gradio==3.16.2 altair==4.2.2 easygui==0.98.3 tk==0.1.0 +lion-pytorch==0.0.6 # for BLIP captioning requests==2.28.2 timm==0.6.12 @@ -21,6 +22,6 @@ fairscale==0.4.13 # tensorflow<2.11 tensorflow==2.10.1 huggingface-hub==0.12.0 -xformers @ https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl +# xformers @ https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl # for kohya_ss library . \ No newline at end of file diff --git a/textual_inversion_gui.py b/textual_inversion_gui.py index 336be06..7ceffb1 100644 --- a/textual_inversion_gui.py +++ b/textual_inversion_gui.py @@ -95,6 +95,7 @@ def save_configuration( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ): # Get list of function parameters and values parameters = list(locals().items()) @@ -195,6 +196,7 @@ def open_configuration( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ): # Get list of function parameters and values parameters = list(locals().items()) @@ -275,6 +277,7 @@ def train_model( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ): if pretrained_model_name_or_path == '': msgbox('Source model information is missing') @@ -434,6 +437,7 @@ def train_model( seed=seed, caption_extension=caption_extension, cache_latents=cache_latents, + optimizer=optimizer, ) run_cmd += run_cmd_advanced_training( @@ -623,6 +627,7 @@ def ti_tab( seed, caption_extension, cache_latents, + optimizer, ) = gradio_training( learning_rate_value='1e-5', lr_scheduler_value='cosine', @@ -756,6 +761,7 @@ def ti_tab( random_crop, bucket_reso_steps, caption_dropout_every_n_epochs, caption_dropout_rate, + optimizer, ] button_open_config.click(