commit
29ec8658a5
46
README.md
46
README.md
@ -6,7 +6,7 @@ This repository repository is providing a Gradio GUI for kohya's Stable Diffusio
|
||||
|
||||
Python 3.10.6+ and Git:
|
||||
|
||||
- Python 3.10.6+: https://www.python.org/ftp/python/3.10.6/python-3.10.6-amd64.exe
|
||||
- Python 3.10.6+: https://www.python.org/ftp/python/3.10.9/python-3.10.9-amd64.exe
|
||||
- git: https://git-scm.com/download/win
|
||||
|
||||
## Installation
|
||||
@ -27,7 +27,7 @@ python -m venv --system-site-packages venv
|
||||
.\venv\Scripts\activate
|
||||
|
||||
pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
|
||||
pip install --upgrade -r requirements.txt
|
||||
pip install --use-pep517 --upgrade -r requirements.txt
|
||||
pip install -U -I --no-deps https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl
|
||||
|
||||
cp .\bitsandbytes_windows\*.dll .\venv\Lib\site-packages\bitsandbytes\
|
||||
@ -61,7 +61,7 @@ When a new release comes out you can upgrade your repo with the following comman
|
||||
cd kohya_ss
|
||||
git pull
|
||||
.\venv\Scripts\activate
|
||||
pip install --upgrade -r requirements.txt
|
||||
pip install --use-pep517 --upgrade -r requirements.txt
|
||||
```
|
||||
|
||||
Once the commands have completed successfully you should be ready to use the new version.
|
||||
@ -104,8 +104,48 @@ python lora_gui.py
|
||||
|
||||
Once you have created the LoRA network you can generate images via auto1111 by installing the extension found here: https://github.com/kohya-ss/sd-webui-additional-networks
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Page file limit
|
||||
|
||||
- if get X error relating to `page file`, increase page file size limit in Windows
|
||||
|
||||
### No module called tkinter
|
||||
|
||||
- Re-install python 3.10.x on your system: https://www.python.org/ftp/python/3.10.9/python-3.10.9-amd64.exe
|
||||
|
||||
## Change history
|
||||
|
||||
* 2023/01/16 (v20.3.0)
|
||||
- Fix a part of LoRA modules are not trained when ``gradient_checkpointing`` is enabled.
|
||||
- Add ``--save_last_n_epochs_state`` option. You can specify how many state folders to keep, apart from how many models to keep. Thanks to shirayu!
|
||||
- Fix Text Encoder training stops at ``max_train_steps`` even if ``max_train_epochs`` is set in `train_db.py``.
|
||||
- Added script to check LoRA weights. You can check weights by ``python networks\check_lora_weights.py <model file>``. If some modules are not trained, the value is ``0.0`` like following.
|
||||
- ``lora_te_text_model_encoder_layers_11_*`` is not trained with ``clip_skip=2``, so ``0.0`` is okay for these modules.
|
||||
|
||||
- example result of ``check_lora_weights.py``, Text Encoder and a part of U-Net are not trained:
|
||||
```
|
||||
number of LoRA-up modules: 264
|
||||
lora_te_text_model_encoder_layers_0_mlp_fc1.lora_up.weight,0.0
|
||||
lora_te_text_model_encoder_layers_0_mlp_fc2.lora_up.weight,0.0
|
||||
lora_te_text_model_encoder_layers_0_self_attn_k_proj.lora_up.weight,0.0
|
||||
:
|
||||
lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj.lora_up.weight,0.0
|
||||
lora_unet_down_blocks_2_attentions_1_transformer_blocks_0_ff_net_2.lora_up.weight,0.0
|
||||
lora_unet_mid_block_attentions_0_proj_in.lora_up.weight,0.003503334941342473
|
||||
lora_unet_mid_block_attentions_0_proj_out.lora_up.weight,0.004308608360588551
|
||||
:
|
||||
```
|
||||
|
||||
- all modules are trained:
|
||||
```
|
||||
number of LoRA-up modules: 264
|
||||
lora_te_text_model_encoder_layers_0_mlp_fc1.lora_up.weight,0.0028684409335255623
|
||||
lora_te_text_model_encoder_layers_0_mlp_fc2.lora_up.weight,0.0029794853180646896
|
||||
lora_te_text_model_encoder_layers_0_self_attn_k_proj.lora_up.weight,0.002507600700482726
|
||||
lora_te_text_model_encoder_layers_0_self_attn_out_proj.lora_up.weight,0.002639499492943287
|
||||
:
|
||||
```
|
||||
* 2023/01/16 (v20.2.1):
|
||||
- Merging latest code update from kohya
|
||||
- Added `--max_train_epochs` and `--max_data_loader_n_workers` option for each training script.
|
||||
|
10
gui.bat
Normal file
10
gui.bat
Normal file
@ -0,0 +1,10 @@
|
||||
@echo off
|
||||
|
||||
set VENV_DIR=.\venv
|
||||
set PYTHON=python
|
||||
|
||||
call %VENV_DIR%\Scripts\activate.bat
|
||||
|
||||
%PYTHON% kohya_gui.py
|
||||
|
||||
pause
|
@ -1029,6 +1029,7 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth:
|
||||
parser.add_argument("--save_every_n_epochs", type=int, default=None,
|
||||
help="save checkpoint every N epochs / 学習中のモデルを指定エポックごとに保存する")
|
||||
parser.add_argument("--save_last_n_epochs", type=int, default=None, help="save last N checkpoints / 最大Nエポック保存する")
|
||||
parser.add_argument("--save_last_n_epochs_state", type=int, default=None, help="save last N checkpoints of state (overrides the value of --save_last_n_epochs)/ 最大Nエポックstateを保存する(--save_last_n_epochsの指定を上書きします)")
|
||||
parser.add_argument("--save_state", action="store_true",
|
||||
help="save training state additionally (including optimizer states etc.) / optimizerなど学習状態も含めたstateを追加で保存する")
|
||||
parser.add_argument("--resume", type=str, default=None, help="saved state to resume training / 学習再開するモデルのstate")
|
||||
@ -1298,7 +1299,6 @@ def get_epoch_ckpt_name(args: argparse.Namespace, use_safetensors, epoch):
|
||||
|
||||
def save_on_epoch_end(args: argparse.Namespace, save_func, remove_old_func, epoch_no: int, num_train_epochs: int):
|
||||
saving = epoch_no % args.save_every_n_epochs == 0 and epoch_no < num_train_epochs
|
||||
remove_epoch_no = None
|
||||
if saving:
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
save_func()
|
||||
@ -1306,7 +1306,7 @@ def save_on_epoch_end(args: argparse.Namespace, save_func, remove_old_func, epoc
|
||||
if args.save_last_n_epochs is not None:
|
||||
remove_epoch_no = epoch_no - args.save_every_n_epochs * args.save_last_n_epochs
|
||||
remove_old_func(remove_epoch_no)
|
||||
return saving, remove_epoch_no
|
||||
return saving
|
||||
|
||||
|
||||
def save_sd_model_on_epoch_end(args: argparse.Namespace, accelerator, src_path: str, save_stable_diffusion_format: bool, use_safetensors: bool, save_dtype: torch.dtype, epoch: int, num_train_epochs: int, global_step: int, text_encoder, unet, vae):
|
||||
@ -1346,15 +1346,18 @@ def save_sd_model_on_epoch_end(args: argparse.Namespace, accelerator, src_path:
|
||||
save_func = save_du
|
||||
remove_old_func = remove_du
|
||||
|
||||
saving, remove_epoch_no = save_on_epoch_end(args, save_func, remove_old_func, epoch_no, num_train_epochs)
|
||||
saving = save_on_epoch_end(args, save_func, remove_old_func, epoch_no, num_train_epochs)
|
||||
if saving and args.save_state:
|
||||
save_state_on_epoch_end(args, accelerator, model_name, epoch_no, remove_epoch_no)
|
||||
save_state_on_epoch_end(args, accelerator, model_name, epoch_no)
|
||||
|
||||
|
||||
def save_state_on_epoch_end(args: argparse.Namespace, accelerator, model_name, epoch_no, remove_epoch_no):
|
||||
def save_state_on_epoch_end(args: argparse.Namespace, accelerator, model_name, epoch_no):
|
||||
print("saving state.")
|
||||
accelerator.save_state(os.path.join(args.output_dir, EPOCH_STATE_NAME.format(model_name, epoch_no)))
|
||||
if remove_epoch_no is not None:
|
||||
|
||||
last_n_epochs = args.save_last_n_epochs_state if args.save_last_n_epochs_state else args.save_last_n_epochs
|
||||
if last_n_epochs is not None:
|
||||
remove_epoch_no = epoch_no - args.save_every_n_epochs * last_n_epochs
|
||||
state_dir_old = os.path.join(args.output_dir, EPOCH_STATE_NAME.format(model_name, remove_epoch_no))
|
||||
if os.path.exists(state_dir_old):
|
||||
print(f"removing old state: {state_dir_old}")
|
||||
|
31
networks/check_lora_weights.py
Normal file
31
networks/check_lora_weights.py
Normal file
@ -0,0 +1,31 @@
|
||||
import argparse
|
||||
import os
|
||||
import torch
|
||||
from safetensors.torch import load_file
|
||||
|
||||
|
||||
def main(file):
|
||||
print(f"loading: {file}")
|
||||
if os.path.splitext(file)[1] == '.safetensors':
|
||||
sd = load_file(file)
|
||||
else:
|
||||
sd = torch.load(file, map_location='cpu')
|
||||
|
||||
values = []
|
||||
|
||||
keys = list(sd.keys())
|
||||
for key in keys:
|
||||
if 'lora_up' in key:
|
||||
values.append((key, sd[key]))
|
||||
print(f"number of LoRA-up modules: {len(values)}")
|
||||
|
||||
for key, value in values:
|
||||
print(f"{key},{torch.mean(torch.abs(value))}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("file", type=str, help="model file to check / 重みを確認するモデルファイル")
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args.file)
|
@ -12,6 +12,8 @@ safetensors==0.2.6
|
||||
gradio==3.15.0
|
||||
altair
|
||||
easygui
|
||||
tk
|
||||
tkinter
|
||||
# for BLIP captioning
|
||||
requests
|
||||
timm
|
||||
|
@ -92,10 +92,7 @@ def train(args):
|
||||
gc.collect()
|
||||
|
||||
# 学習を準備する:モデルを適切な状態にする
|
||||
if args.stop_text_encoder_training is None:
|
||||
args.stop_text_encoder_training = args.max_train_steps + 1 # do not stop until end
|
||||
|
||||
train_text_encoder = args.stop_text_encoder_training >= 0
|
||||
train_text_encoder = args.stop_text_encoder_training is None or args.stop_text_encoder_training >= 0
|
||||
unet.requires_grad_(True) # 念のため追加
|
||||
text_encoder.requires_grad_(train_text_encoder)
|
||||
if not train_text_encoder:
|
||||
@ -143,6 +140,9 @@ def train(args):
|
||||
args.max_train_steps = args.max_train_epochs * len(train_dataloader)
|
||||
print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}")
|
||||
|
||||
if args.stop_text_encoder_training is None:
|
||||
args.stop_text_encoder_training = args.max_train_steps + 1 # do not stop until end
|
||||
|
||||
# lr schedulerを用意する
|
||||
lr_scheduler = diffusers.optimization.get_scheduler(
|
||||
args.lr_scheduler, optimizer, num_warmup_steps=args.lr_warmup_steps, num_training_steps=args.max_train_steps)
|
||||
|
@ -166,6 +166,9 @@ def train(args):
|
||||
if args.gradient_checkpointing: # according to TI example in Diffusers, train is required
|
||||
unet.train()
|
||||
text_encoder.train()
|
||||
|
||||
# set top parameter requires_grad = True for gradient checkpointing works
|
||||
text_encoder.text_model.embeddings.requires_grad_(True)
|
||||
else:
|
||||
unet.eval()
|
||||
text_encoder.eval()
|
||||
@ -364,9 +367,9 @@ def train(args):
|
||||
print(f"removing old checkpoint: {old_ckpt_file}")
|
||||
os.remove(old_ckpt_file)
|
||||
|
||||
saving, remove_epoch_no = train_util.save_on_epoch_end(args, save_func, remove_old_func, epoch + 1, num_train_epochs)
|
||||
saving = train_util.save_on_epoch_end(args, save_func, remove_old_func, epoch + 1, num_train_epochs)
|
||||
if saving and args.save_state:
|
||||
train_util.save_state_on_epoch_end(args, accelerator, model_name, epoch + 1, remove_epoch_no)
|
||||
train_util.save_state_on_epoch_end(args, accelerator, model_name, epoch + 1)
|
||||
|
||||
# end of epoch
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user