Merge dreambooth and finetuning in one repo to align with kohya_ss new repo (#10)
* Merge both dreambooth and finetune back in one repo
This commit is contained in:
parent
b78df38979
commit
706dfe157f
@ -1,21 +0,0 @@
|
|||||||
{
|
|
||||||
"architectures": [
|
|
||||||
"BertModel"
|
|
||||||
],
|
|
||||||
"attention_probs_dropout_prob": 0.1,
|
|
||||||
"hidden_act": "gelu",
|
|
||||||
"hidden_dropout_prob": 0.1,
|
|
||||||
"hidden_size": 768,
|
|
||||||
"initializer_range": 0.02,
|
|
||||||
"intermediate_size": 3072,
|
|
||||||
"layer_norm_eps": 1e-12,
|
|
||||||
"max_position_embeddings": 512,
|
|
||||||
"model_type": "bert",
|
|
||||||
"num_attention_heads": 12,
|
|
||||||
"num_hidden_layers": 12,
|
|
||||||
"pad_token_id": 0,
|
|
||||||
"type_vocab_size": 2,
|
|
||||||
"vocab_size": 30524,
|
|
||||||
"encoder_width": 768,
|
|
||||||
"add_cross_attention": true
|
|
||||||
}
|
|
195
README.md
195
README.md
@ -1,194 +1,13 @@
|
|||||||
# HOWTO
|
# Kohya's dreambooth and finetuning
|
||||||
|
|
||||||
This repo provide all the required config to run the Dreambooth version found in this note: https://note.com/kohya_ss/n/nee3ed1649fb6
|
This repo now combine bot Kohya_ss solution under one roof. I am merging both under a single repo to align with the new official kohya repo where he will maintain his code from now on: https://github.com/kohya-ss/sd-scripts
|
||||||
The setup of bitsandbytes with Adam8bit support for windows: https://note.com/kohya_ss/n/n47f654dc161e
|
|
||||||
|
|
||||||
## Required Dependencies
|
A new note accompaning the release of his new repo can be found here: https://note.com/kohya_ss/n/nba4eceaa4594
|
||||||
|
|
||||||
Python 3.10.6 and Git:
|
## Dreambooth
|
||||||
|
|
||||||
- Python 3.10.6: https://www.python.org/ftp/python/3.10.6/python-3.10.6-amd64.exe
|
You can find the dreambooth solution spercific [Dreambooth README](README_dreambooth.md)
|
||||||
- git: https://git-scm.com/download/win
|
|
||||||
|
|
||||||
Give unrestricted script access to powershell so venv can work:
|
## Finetune
|
||||||
|
|
||||||
- Open an administrator powershell window
|
You can find the finetune solution spercific [Finetune README](README_finetune.md)
|
||||||
- Type `Set-ExecutionPolicy Unrestricted` and answer A
|
|
||||||
- Close admin powershell window
|
|
||||||
|
|
||||||
## Installation
|
|
||||||
|
|
||||||
Open a regular Powershell terminal and type the following inside:
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
git clone https://github.com/bmaltais/kohya_ss.git
|
|
||||||
cd kohya_ss
|
|
||||||
|
|
||||||
python -m venv --system-site-packages venv
|
|
||||||
.\venv\Scripts\activate
|
|
||||||
|
|
||||||
pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
|
|
||||||
pip install --upgrade -r requirements.txt
|
|
||||||
pip install -U -I --no-deps https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl
|
|
||||||
|
|
||||||
cp .\bitsandbytes_windows\*.dll .\venv\Lib\site-packages\bitsandbytes\
|
|
||||||
cp .\bitsandbytes_windows\cextension.py .\venv\Lib\site-packages\bitsandbytes\cextension.py
|
|
||||||
cp .\bitsandbytes_windows\main.py .\venv\Lib\site-packages\bitsandbytes\cuda_setup\main.py
|
|
||||||
|
|
||||||
accelerate config
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
Answers to accelerate config:
|
|
||||||
|
|
||||||
```txt
|
|
||||||
- 0
|
|
||||||
- 0
|
|
||||||
- NO
|
|
||||||
- NO
|
|
||||||
- All
|
|
||||||
- fp16
|
|
||||||
```
|
|
||||||
|
|
||||||
### Optional: CUDNN 8.6
|
|
||||||
|
|
||||||
This step is optional but can improve the learning speed for NVidia 4090 owners...
|
|
||||||
|
|
||||||
Due to the filesize I can't host the DLLs needed for CUDNN 8.6 on Github, I strongly advise you download them for a speed boost in sample generation (almost 50% on 4090) you can download them from here: https://b1.thefileditch.ch/mwxKTEtelILoIbMbruuM.zip
|
|
||||||
|
|
||||||
To install simply unzip the directory and place the cudnn_windows folder in the root of the kohya_diffusers_fine_tuning repo.
|
|
||||||
|
|
||||||
Run the following command to install:
|
|
||||||
|
|
||||||
```
|
|
||||||
python cudann_1.8_install.py
|
|
||||||
```
|
|
||||||
|
|
||||||
## Upgrade
|
|
||||||
|
|
||||||
When a new release comes out you can upgrade your repo with the following command:
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
cd kohya_ss
|
|
||||||
git pull
|
|
||||||
.\venv\Scripts\activate
|
|
||||||
pip install --upgrade -r requirements.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
Once the commands have completed successfully you should be ready to use the new version.
|
|
||||||
|
|
||||||
## GUI
|
|
||||||
|
|
||||||
There is now support for GUI based training using gradio. You can start the GUI interface by running:
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
python .\dreambooth_gui.py
|
|
||||||
```
|
|
||||||
|
|
||||||
## Quickstart screencast
|
|
||||||
|
|
||||||
You can find a screen cast on how to use the GUI at the following location:
|
|
||||||
|
|
||||||
[![Video](https://img.youtube.com/vi/RlvqEKj03WI/maxresdefault.jpg)](https://www.youtube.com/watch?v=RlvqEKj03WI)
|
|
||||||
|
|
||||||
## Folders configuration
|
|
||||||
|
|
||||||
Refer to the note to understand how to create the folde structure. In short it should look like:
|
|
||||||
|
|
||||||
```
|
|
||||||
<arbitrary folder name>
|
|
||||||
|- <arbitrary class folder name>
|
|
||||||
|- <repeat count>_<class>
|
|
||||||
|- <arbitrary training folder name>
|
|
||||||
|- <repeat count>_<token> <class>
|
|
||||||
```
|
|
||||||
|
|
||||||
Example for `asd dog` where `asd` is the token word and `dog` is the class. In this example the regularization `dog` class images contained in the folder will be repeated only 1 time and the `asd dog` images will be repeated 20 times:
|
|
||||||
|
|
||||||
```
|
|
||||||
my_asd_dog_dreambooth
|
|
||||||
|- reg_dog
|
|
||||||
|- 1_dog
|
|
||||||
`- reg_image_1.png
|
|
||||||
`- reg_image_2.png
|
|
||||||
...
|
|
||||||
`- reg_image_256.png
|
|
||||||
|- train_dog
|
|
||||||
|- 20_asd dog
|
|
||||||
`- dog1.png
|
|
||||||
...
|
|
||||||
`- dog8.png
|
|
||||||
```
|
|
||||||
|
|
||||||
## Support
|
|
||||||
|
|
||||||
Drop by the discord server for support: https://discord.com/channels/1041518562487058594/1041518563242020906
|
|
||||||
|
|
||||||
## Contributors
|
|
||||||
|
|
||||||
- Lord of the universe - cacoe (twitter: @cac0e)
|
|
||||||
|
|
||||||
## Change history
|
|
||||||
|
|
||||||
* 12/19 (v18.4) update:
|
|
||||||
- Add support for shuffle_caption, save_state, resume, prior_loss_weight under "Advanced Configuration" section
|
|
||||||
- Fix issue with open/save config not working properly
|
|
||||||
* 12/19 (v18.3) update:
|
|
||||||
- fix stop encoder training issue
|
|
||||||
* 12/19 (v18.2) update:
|
|
||||||
- Fix file/folder opening behind the browser window
|
|
||||||
- Add WD14 and BLIP captioning to utilities
|
|
||||||
- Improve overall GUI layout
|
|
||||||
* 12/18 (v18.1) update:
|
|
||||||
- Add Stable Diffusion model conversion utility. Make sure to run `pip upgrade -U -r requirements.txt` after updating to this release as this introduce new pip requirements.
|
|
||||||
* 12/17 (v18) update:
|
|
||||||
- Save model as option added to train_db_fixed.py
|
|
||||||
- Save model as option added to GUI
|
|
||||||
- Retire "Model conversion" parameters that was essentially performing the same function as the new `--save_model_as` parameter
|
|
||||||
* 12/17 (v17.2) update:
|
|
||||||
- Adding new dataset balancing utility.
|
|
||||||
* 12/17 (v17.1) update:
|
|
||||||
- Adding GUI for kohya_ss called dreambooth_gui.py
|
|
||||||
- removing support for `--finetuning` as there is now a dedicated python repo for that. `--fine-tuning` is still there behind the scene until kohya_ss remove it in a future code release.
|
|
||||||
- removing cli examples as I will now focus on the GUI for training. People who prefer cli based training can still do that.
|
|
||||||
* 12/13 (v17) update:
|
|
||||||
- Added support for learning to fp16 gradient (experimental function). SD1.x can be trained with 8GB of VRAM. Specify full_fp16 options.
|
|
||||||
* 12/06 (v16) update:
|
|
||||||
- Added support for Diffusers 0.10.2 (use code in Diffusers to learn v-parameterization).
|
|
||||||
- Diffusers also supports safetensors.
|
|
||||||
- Added support for accelerate 0.15.0.
|
|
||||||
* 12/05 (v15) update:
|
|
||||||
- The script has been divided into two parts
|
|
||||||
- Support for SafeTensors format has been added. Install SafeTensors with `pip install safetensors`. The script will automatically detect the format based on the file extension when loading. Use the `--use_safetensors` option if you want to save the model as safetensor.
|
|
||||||
- The vae option has been added to load a VAE model separately.
|
|
||||||
- The log_prefix option has been added to allow adding a custom string to the log directory name before the date and time.
|
|
||||||
* 11/30 (v13) update:
|
|
||||||
- fix training text encoder at specified step (`--stop_text_encoder_training=<step #>`) that was causing both Unet and text encoder training to stop completely at the specified step rather than continue without text encoding training.
|
|
||||||
* 11/29 (v12) update:
|
|
||||||
- stop training text encoder at specified step (`--stop_text_encoder_training=<step #>`)
|
|
||||||
- tqdm smoothing
|
|
||||||
- updated fine tuning script to support SD2.0 768/v
|
|
||||||
* 11/27 (v11) update:
|
|
||||||
- DiffUsers 0.9.0 is required. Update with `pip install --upgrade -r requirements.txt` in the virtual environment.
|
|
||||||
- The way captions are handled in DreamBooth has changed. When a caption file existed, the file's caption was added to the folder caption until v10, but from v11 it is only the file's caption. Please be careful.
|
|
||||||
- Fixed a bug where prior_loss_weight was applied to learning images. Sorry for the inconvenience.
|
|
||||||
- Compatible with Stable Diffusion v2.0. Add the `--v2` option. If you are using `768-v-ema.ckpt` or `stable-diffusion-2` instead of `stable-diffusion-v2-base`, add `--v_parameterization` as well. Learn more about other options.
|
|
||||||
- Added options related to the learning rate scheduler.
|
|
||||||
- You can download and use DiffUsers models directly from Hugging Face. In addition, DiffUsers models can be saved during training.
|
|
||||||
* 11/21 (v10):
|
|
||||||
- Added minimum/maximum resolution specification when using Aspect Ratio Bucketing (min_bucket_reso/max_bucket_reso option).
|
|
||||||
- Added extension specification for caption files (caption_extention).
|
|
||||||
- Added support for images with .webp extension.
|
|
||||||
- Added a function that allows captions to learning images and regularized images.
|
|
||||||
* 11/18 (v9):
|
|
||||||
- Added support for Aspect Ratio Bucketing (enable_bucket option). (--enable_bucket)
|
|
||||||
- Added support for selecting data format (fp16/bf16/float) when saving checkpoint (--save_precision)
|
|
||||||
- Added support for saving learning state (--save_state, --resume)
|
|
||||||
- Added support for logging (--logging_dir)
|
|
||||||
* 11/14 (diffusers_fine_tuning v2):
|
|
||||||
- script name is now fine_tune.py.
|
|
||||||
- Added option to learn Text Encoder --train_text_encoder.
|
|
||||||
- The data format of checkpoint at the time of saving can be specified with the --save_precision option. You can choose float, fp16, and bf16.
|
|
||||||
- Added a --save_state option to save the learning state (optimizer, etc.) in the middle. It can be resumed with the --resume option.
|
|
||||||
* 11/9 (v8): supports Diffusers 0.7.2. To upgrade diffusers run `pip install --upgrade diffusers[torch]`
|
|
||||||
* 11/7 (v7): Text Encoder supports checkpoint files in different storage formats (it is converted at the time of import, so export will be in normal format). Changed the average value of EPOCH loss to output to the screen. Added a function to save epoch and global step in checkpoint in SD format (add values if there is existing data). The reg_data_dir option is enabled during fine tuning (fine tuning while mixing regularized images). Added dataset_repeats option that is valid for fine tuning (specified when the number of teacher images is small and the epoch is extremely short).
|
|
203
README_dreambooth.md
Normal file
203
README_dreambooth.md
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
# Kohya_ss Dreambooth
|
||||||
|
|
||||||
|
This repo provide all the required code to run the Dreambooth version found in this note: https://note.com/kohya_ss/n/nee3ed1649fb6
|
||||||
|
|
||||||
|
## Required Dependencies
|
||||||
|
|
||||||
|
Python 3.10.6 and Git:
|
||||||
|
|
||||||
|
- Python 3.10.6: https://www.python.org/ftp/python/3.10.6/python-3.10.6-amd64.exe
|
||||||
|
- git: https://git-scm.com/download/win
|
||||||
|
|
||||||
|
Give unrestricted script access to powershell so venv can work:
|
||||||
|
|
||||||
|
- Open an administrator powershell window
|
||||||
|
- Type `Set-ExecutionPolicy Unrestricted` and answer A
|
||||||
|
- Close admin powershell window
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
Open a regular Powershell terminal and type the following inside:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
git clone https://github.com/bmaltais/kohya_ss.git
|
||||||
|
cd kohya_ss
|
||||||
|
|
||||||
|
python -m venv --system-site-packages venv
|
||||||
|
.\venv\Scripts\activate
|
||||||
|
|
||||||
|
pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
|
||||||
|
pip install --upgrade -r requirements.txt
|
||||||
|
pip install -U -I --no-deps https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl
|
||||||
|
|
||||||
|
cp .\bitsandbytes_windows\*.dll .\venv\Lib\site-packages\bitsandbytes\
|
||||||
|
cp .\bitsandbytes_windows\cextension.py .\venv\Lib\site-packages\bitsandbytes\cextension.py
|
||||||
|
cp .\bitsandbytes_windows\main.py .\venv\Lib\site-packages\bitsandbytes\cuda_setup\main.py
|
||||||
|
|
||||||
|
accelerate config
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Answers to accelerate config:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
- 0
|
||||||
|
- 0
|
||||||
|
- NO
|
||||||
|
- NO
|
||||||
|
- All
|
||||||
|
- fp16
|
||||||
|
```
|
||||||
|
|
||||||
|
### Optional: CUDNN 8.6
|
||||||
|
|
||||||
|
This step is optional but can improve the learning speed for NVidia 4090 owners...
|
||||||
|
|
||||||
|
Due to the filesize I can't host the DLLs needed for CUDNN 8.6 on Github, I strongly advise you download them for a speed boost in sample generation (almost 50% on 4090) you can download them from here: https://b1.thefileditch.ch/mwxKTEtelILoIbMbruuM.zip
|
||||||
|
|
||||||
|
To install simply unzip the directory and place the cudnn_windows folder in the root of the kohya_diffusers_fine_tuning repo.
|
||||||
|
|
||||||
|
Run the following command to install:
|
||||||
|
|
||||||
|
```
|
||||||
|
python .\tools\cudann_1.8_install.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Upgrade
|
||||||
|
|
||||||
|
When a new release comes out you can upgrade your repo with the following command:
|
||||||
|
|
||||||
|
```
|
||||||
|
.\upgrade.bat
|
||||||
|
```
|
||||||
|
|
||||||
|
or you can do it manually with
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
cd kohya_ss
|
||||||
|
git pull
|
||||||
|
.\venv\Scripts\activate
|
||||||
|
pip install --upgrade -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
Once the commands have completed successfully you should be ready to use the new version.
|
||||||
|
|
||||||
|
## GUI
|
||||||
|
|
||||||
|
There is now support for GUI based training using gradio. You can start the GUI interface by running:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
.\dreambooth.bat
|
||||||
|
```
|
||||||
|
|
||||||
|
## CLI
|
||||||
|
|
||||||
|
You can find various examples of how to leverage the fine_tune.py in this folder: https://github.com/bmaltais/kohya_ss/tree/master/examples
|
||||||
|
|
||||||
|
## Quickstart screencast
|
||||||
|
|
||||||
|
You can find a screen cast on how to use the GUI at the following location:
|
||||||
|
|
||||||
|
[![Video](https://img.youtube.com/vi/RlvqEKj03WI/maxresdefault.jpg)](https://www.youtube.com/watch?v=RlvqEKj03WI)
|
||||||
|
|
||||||
|
## Folders configuration
|
||||||
|
|
||||||
|
Refer to the note to understand how to create the folde structure. In short it should look like:
|
||||||
|
|
||||||
|
```
|
||||||
|
<arbitrary folder name>
|
||||||
|
|- <arbitrary class folder name>
|
||||||
|
|- <repeat count>_<class>
|
||||||
|
|- <arbitrary training folder name>
|
||||||
|
|- <repeat count>_<token> <class>
|
||||||
|
```
|
||||||
|
|
||||||
|
Example for `asd dog` where `asd` is the token word and `dog` is the class. In this example the regularization `dog` class images contained in the folder will be repeated only 1 time and the `asd dog` images will be repeated 20 times:
|
||||||
|
|
||||||
|
```
|
||||||
|
my_asd_dog_dreambooth
|
||||||
|
|- reg_dog
|
||||||
|
|- 1_dog
|
||||||
|
`- reg_image_1.png
|
||||||
|
`- reg_image_2.png
|
||||||
|
...
|
||||||
|
`- reg_image_256.png
|
||||||
|
|- train_dog
|
||||||
|
|- 20_asd dog
|
||||||
|
`- dog1.png
|
||||||
|
...
|
||||||
|
`- dog8.png
|
||||||
|
```
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
Drop by the discord server for support: https://discord.com/channels/1041518562487058594/1041518563242020906
|
||||||
|
|
||||||
|
## Contributors
|
||||||
|
|
||||||
|
- Lord of the universe - cacoe (twitter: @cac0e)
|
||||||
|
|
||||||
|
## Change history
|
||||||
|
|
||||||
|
* 12/19 (v18.4) update:
|
||||||
|
- Add support for shuffle_caption, save_state, resume, prior_loss_weight under "Advanced Configuration" section
|
||||||
|
- Fix issue with open/save config not working properly
|
||||||
|
* 12/19 (v18.3) update:
|
||||||
|
- fix stop encoder training issue
|
||||||
|
* 12/19 (v18.2) update:
|
||||||
|
- Fix file/folder opening behind the browser window
|
||||||
|
- Add WD14 and BLIP captioning to utilities
|
||||||
|
- Improve overall GUI layout
|
||||||
|
* 12/18 (v18.1) update:
|
||||||
|
- Add Stable Diffusion model conversion utility. Make sure to run `pip upgrade -U -r requirements.txt` after updating to this release as this introduce new pip requirements.
|
||||||
|
* 12/17 (v18) update:
|
||||||
|
- Save model as option added to train_db_fixed.py
|
||||||
|
- Save model as option added to GUI
|
||||||
|
- Retire "Model conversion" parameters that was essentially performing the same function as the new `--save_model_as` parameter
|
||||||
|
* 12/17 (v17.2) update:
|
||||||
|
- Adding new dataset balancing utility.
|
||||||
|
* 12/17 (v17.1) update:
|
||||||
|
- Adding GUI for kohya_ss called dreambooth_gui.py
|
||||||
|
- removing support for `--finetuning` as there is now a dedicated python repo for that. `--fine-tuning` is still there behind the scene until kohya_ss remove it in a future code release.
|
||||||
|
- removing cli examples as I will now focus on the GUI for training. People who prefer cli based training can still do that.
|
||||||
|
* 12/13 (v17) update:
|
||||||
|
- Added support for learning to fp16 gradient (experimental function). SD1.x can be trained with 8GB of VRAM. Specify full_fp16 options.
|
||||||
|
* 12/06 (v16) update:
|
||||||
|
- Added support for Diffusers 0.10.2 (use code in Diffusers to learn v-parameterization).
|
||||||
|
- Diffusers also supports safetensors.
|
||||||
|
- Added support for accelerate 0.15.0.
|
||||||
|
* 12/05 (v15) update:
|
||||||
|
- The script has been divided into two parts
|
||||||
|
- Support for SafeTensors format has been added. Install SafeTensors with `pip install safetensors`. The script will automatically detect the format based on the file extension when loading. Use the `--use_safetensors` option if you want to save the model as safetensor.
|
||||||
|
- The vae option has been added to load a VAE model separately.
|
||||||
|
- The log_prefix option has been added to allow adding a custom string to the log directory name before the date and time.
|
||||||
|
* 11/30 (v13) update:
|
||||||
|
- fix training text encoder at specified step (`--stop_text_encoder_training=<step #>`) that was causing both Unet and text encoder training to stop completely at the specified step rather than continue without text encoding training.
|
||||||
|
* 11/29 (v12) update:
|
||||||
|
- stop training text encoder at specified step (`--stop_text_encoder_training=<step #>`)
|
||||||
|
- tqdm smoothing
|
||||||
|
- updated fine tuning script to support SD2.0 768/v
|
||||||
|
* 11/27 (v11) update:
|
||||||
|
- DiffUsers 0.9.0 is required. Update with `pip install --upgrade -r requirements.txt` in the virtual environment.
|
||||||
|
- The way captions are handled in DreamBooth has changed. When a caption file existed, the file's caption was added to the folder caption until v10, but from v11 it is only the file's caption. Please be careful.
|
||||||
|
- Fixed a bug where prior_loss_weight was applied to learning images. Sorry for the inconvenience.
|
||||||
|
- Compatible with Stable Diffusion v2.0. Add the `--v2` option. If you are using `768-v-ema.ckpt` or `stable-diffusion-2` instead of `stable-diffusion-v2-base`, add `--v_parameterization` as well. Learn more about other options.
|
||||||
|
- Added options related to the learning rate scheduler.
|
||||||
|
- You can download and use DiffUsers models directly from Hugging Face. In addition, DiffUsers models can be saved during training.
|
||||||
|
* 11/21 (v10):
|
||||||
|
- Added minimum/maximum resolution specification when using Aspect Ratio Bucketing (min_bucket_reso/max_bucket_reso option).
|
||||||
|
- Added extension specification for caption files (caption_extention).
|
||||||
|
- Added support for images with .webp extension.
|
||||||
|
- Added a function that allows captions to learning images and regularized images.
|
||||||
|
* 11/18 (v9):
|
||||||
|
- Added support for Aspect Ratio Bucketing (enable_bucket option). (--enable_bucket)
|
||||||
|
- Added support for selecting data format (fp16/bf16/float) when saving checkpoint (--save_precision)
|
||||||
|
- Added support for saving learning state (--save_state, --resume)
|
||||||
|
- Added support for logging (--logging_dir)
|
||||||
|
* 11/14 (diffusers_fine_tuning v2):
|
||||||
|
- script name is now fine_tune.py.
|
||||||
|
- Added option to learn Text Encoder --train_text_encoder.
|
||||||
|
- The data format of checkpoint at the time of saving can be specified with the --save_precision option. You can choose float, fp16, and bf16.
|
||||||
|
- Added a --save_state option to save the learning state (optimizer, etc.) in the middle. It can be resumed with the --resume option.
|
||||||
|
* 11/9 (v8): supports Diffusers 0.7.2. To upgrade diffusers run `pip install --upgrade diffusers[torch]`
|
||||||
|
* 11/7 (v7): Text Encoder supports checkpoint files in different storage formats (it is converted at the time of import, so export will be in normal format). Changed the average value of EPOCH loss to output to the screen. Added a function to save epoch and global step in checkpoint in SD format (add values if there is existing data). The reg_data_dir option is enabled during fine tuning (fine tuning while mixing regularized images). Added dataset_repeats option that is valid for fine tuning (specified when the number of teacher images is small and the epoch is extremely short).
|
167
README_finetune.md
Normal file
167
README_finetune.md
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
# Kohya_ss Finetune
|
||||||
|
|
||||||
|
This python utility provide code to run the diffusers fine tuning version found in this note: https://note.com/kohya_ss/n/nbf7ce8d80f29
|
||||||
|
|
||||||
|
## Required Dependencies
|
||||||
|
|
||||||
|
Python 3.10.6 and Git:
|
||||||
|
|
||||||
|
- Python 3.10.6: https://www.python.org/ftp/python/3.10.6/python-3.10.6-amd64.exe
|
||||||
|
- git: https://git-scm.com/download/win
|
||||||
|
|
||||||
|
Give unrestricted script access to powershell so venv can work:
|
||||||
|
|
||||||
|
- Open an administrator powershell window
|
||||||
|
- Type `Set-ExecutionPolicy Unrestricted` and answer A
|
||||||
|
- Close admin powershell window
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
Open a regular Powershell terminal and type the following inside:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
git clone https://github.com/bmaltais/kohya_diffusers_fine_tuning.git
|
||||||
|
cd kohya_diffusers_fine_tuning
|
||||||
|
|
||||||
|
python -m venv --system-site-packages venv
|
||||||
|
.\venv\Scripts\activate
|
||||||
|
|
||||||
|
pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
|
||||||
|
pip install --upgrade -r requirements.txt
|
||||||
|
pip install -U -I --no-deps https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl
|
||||||
|
|
||||||
|
cp .\bitsandbytes_windows\*.dll .\venv\Lib\site-packages\bitsandbytes\
|
||||||
|
cp .\bitsandbytes_windows\cextension.py .\venv\Lib\site-packages\bitsandbytes\cextension.py
|
||||||
|
cp .\bitsandbytes_windows\main.py .\venv\Lib\site-packages\bitsandbytes\cuda_setup\main.py
|
||||||
|
|
||||||
|
accelerate config
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Answers to accelerate config:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
- 0
|
||||||
|
- 0
|
||||||
|
- NO
|
||||||
|
- NO
|
||||||
|
- All
|
||||||
|
- fp16
|
||||||
|
```
|
||||||
|
|
||||||
|
### Optional: CUDNN 8.6
|
||||||
|
|
||||||
|
This step is optional but can improve the learning speed for NVidia 4090 owners...
|
||||||
|
|
||||||
|
Due to the filesize I can't host the DLLs needed for CUDNN 8.6 on Github, I strongly advise you download them for a speed boost in sample generation (almost 50% on 4090) you can download them from here: https://b1.thefileditch.ch/mwxKTEtelILoIbMbruuM.zip
|
||||||
|
|
||||||
|
To install simply unzip the directory and place the cudnn_windows folder in the root of the kohya_diffusers_fine_tuning repo.
|
||||||
|
|
||||||
|
Run the following command to install:
|
||||||
|
|
||||||
|
```
|
||||||
|
python .\tools\cudann_1.8_install.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Upgrade
|
||||||
|
|
||||||
|
When a new release comes out you can upgrade your repo with the following command:
|
||||||
|
|
||||||
|
```
|
||||||
|
.\upgrade.bat
|
||||||
|
```
|
||||||
|
|
||||||
|
or you can do it manually with
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
cd kohya_ss
|
||||||
|
git pull
|
||||||
|
.\venv\Scripts\activate
|
||||||
|
pip install --upgrade -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
Once the commands have completed successfully you should be ready to use the new version.
|
||||||
|
|
||||||
|
## Folders configuration
|
||||||
|
|
||||||
|
Simply put all the images you will want to train on in a single directory. It does not matter what size or aspect ratio they have. It is your choice.
|
||||||
|
|
||||||
|
## Captions
|
||||||
|
|
||||||
|
Each file need to be accompanied by a caption file describing what the image is about. For example, if you want to train on cute dog pictures you can put `cute dog` as the caption in every file. You can use the `tools\caption.ps1` sample code to help out with that:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
$folder = "sample"
|
||||||
|
$file_pattern="*.*"
|
||||||
|
$caption_text="cute dog"
|
||||||
|
|
||||||
|
$files = Get-ChildItem "$folder\$file_pattern" -Include *.png, *.jpg, *.webp -File
|
||||||
|
foreach ($file in $files) {
|
||||||
|
if (-not(Test-Path -Path $folder\"$($file.BaseName).txt" -PathType Leaf)) {
|
||||||
|
New-Item -ItemType file -Path $folder -Name "$($file.BaseName).txt" -Value $caption_text
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
You can also use the `Captioning` tool found under the `Utilities` tab in the GUI.
|
||||||
|
```
|
||||||
|
|
||||||
|
## GUI
|
||||||
|
|
||||||
|
Support for GUI based training using gradio. You can start the GUI interface by running:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
.\finetune.bat
|
||||||
|
```
|
||||||
|
|
||||||
|
## CLI
|
||||||
|
|
||||||
|
You can find various examples of how to leverage the fine_tune.py in this folder: https://github.com/bmaltais/kohya_ss/tree/master/examples
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
Drop by the discord server for support: https://discord.com/channels/1041518562487058594/1041518563242020906
|
||||||
|
|
||||||
|
## Change history
|
||||||
|
|
||||||
|
* 12/20 (v9.6) update:
|
||||||
|
- fix issue with config file save and opening
|
||||||
|
* 12/19 (v9.5) update:
|
||||||
|
- Fix file/folder dialog opening behind the browser window
|
||||||
|
- Update GUI layout to be more logical
|
||||||
|
* 12/18 (v9.4) update:
|
||||||
|
- Add WD14 tagging to utilities
|
||||||
|
* 12/18 (v9.3) update:
|
||||||
|
- Add logging option
|
||||||
|
* 12/18 (v9.2) update:
|
||||||
|
- Add BLIP Captioning utility
|
||||||
|
* 12/18 (v9.1) update:
|
||||||
|
- Add Stable Diffusion model conversion utility. Make sure to run `pip upgrade -U -r requirements.txt` after updating to this release as this introduce new pip requirements.
|
||||||
|
* 12/17 (v9) update:
|
||||||
|
- Save model as option added to fine_tune.py
|
||||||
|
- Save model as option added to GUI
|
||||||
|
- Retirement of cli based documentation. Will focus attention to GUI based training
|
||||||
|
* 12/13 (v8):
|
||||||
|
- WD14Tagger now works on its own.
|
||||||
|
- Added support for learning to fp16 up to the gradient. Go to "Building the environment and preparing scripts for Diffusers for more info".
|
||||||
|
* 12/10 (v7):
|
||||||
|
- We have added support for Diffusers 0.10.2.
|
||||||
|
- In addition, we have made other fixes.
|
||||||
|
- For more information, please see the section on "Building the environment and preparing scripts for Diffusers" in our documentation.
|
||||||
|
* 12/6 (v6): We have responded to reports that some models experience an error when saving in SafeTensors format.
|
||||||
|
* 12/5 (v5):
|
||||||
|
- .safetensors format is now supported. Install SafeTensors as "pip install safetensors". When loading, it is automatically determined by extension. Specify use_safetensors options when saving.
|
||||||
|
- Added an option to add any string before the date and time log directory name log_prefix.
|
||||||
|
- Cleaning scripts now work without either captions or tags.
|
||||||
|
* 11/29 (v4):
|
||||||
|
- DiffUsers 0.9.0 is required. Update as "pip install -U diffusers[torch]==0.9.0" in the virtual environment, and update the dependent libraries as "pip install --upgrade -r requirements.txt" if other errors occur.
|
||||||
|
- Compatible with Stable Diffusion v2.0. Add the --v2 option when training (and pre-fetching latents). If you are using 768-v-ema.ckpt or stable-diffusion-2 instead of stable-diffusion-v2-base, add --v_parameterization as well when learning. Learn more about other options.
|
||||||
|
- The minimum resolution and maximum resolution of the bucket can be specified when pre-fetching latents.
|
||||||
|
- Corrected the calculation formula for loss (fixed that it was increasing according to the batch size).
|
||||||
|
- Added options related to the learning rate scheduler.
|
||||||
|
- So that you can download and learn DiffUsers models directly from Hugging Face. In addition, DiffUsers models can be saved during training.
|
||||||
|
- Available even if the clean_captions_and_tags.py is only a caption or a tag.
|
||||||
|
- Other minor fixes such as changing the arguments of the noise scheduler during training.
|
||||||
|
* 11/23 (v3):
|
||||||
|
- Added WD14Tagger tagging script.
|
||||||
|
- A log output function has been added to the fine_tune.py. Also, fixed the double shuffling of data.
|
||||||
|
- Fixed misspelling of options for each script (caption_extention→caption_extension will work for the time being, even if it remains outdated).
|
@ -1,3 +0,0 @@
|
|||||||
# Diffusers Fine Tuning
|
|
||||||
|
|
||||||
Code has been moved to dedicated repo at: https://github.com/bmaltais/kohya_diffusers_fine_tuning
|
|
1
dreambooth.bat
Normal file
1
dreambooth.bat
Normal file
@ -0,0 +1 @@
|
|||||||
|
.\venv\Scripts\python.exe .\dreambooth_gui.py
|
@ -355,7 +355,7 @@ def train_model(
|
|||||||
lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100))
|
lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100))
|
||||||
print(f'lr_warmup_steps = {lr_warmup_steps}')
|
print(f'lr_warmup_steps = {lr_warmup_steps}')
|
||||||
|
|
||||||
run_cmd = f'accelerate launch --num_cpu_threads_per_process={num_cpu_threads_per_process} "train_db_fixed.py"'
|
run_cmd = f'accelerate launch --num_cpu_threads_per_process={num_cpu_threads_per_process} "train_db.py"'
|
||||||
if v2:
|
if v2:
|
||||||
run_cmd += ' --v2'
|
run_cmd += ' --v2'
|
||||||
if v_parameterization:
|
if v_parameterization:
|
||||||
@ -734,10 +734,10 @@ with interface:
|
|||||||
shuffle_caption = gr.Checkbox(
|
shuffle_caption = gr.Checkbox(
|
||||||
label='Shuffle caption', value=False
|
label='Shuffle caption', value=False
|
||||||
)
|
)
|
||||||
save_state = gr.Checkbox(label='Save state', value=False)
|
save_state = gr.Checkbox(label='Save training state', value=False)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
resume = gr.Textbox(
|
resume = gr.Textbox(
|
||||||
label='Resume',
|
label='Resume from saved training state',
|
||||||
placeholder='path to "last-state" state folder to resume from',
|
placeholder='path to "last-state" state folder to resume from',
|
||||||
)
|
)
|
||||||
resume_button = gr.Button('📂', elem_id='open_folder_small')
|
resume_button = gr.Button('📂', elem_id='open_folder_small')
|
||||||
|
@ -32,7 +32,7 @@ Write-Output "Repeats: $repeats"
|
|||||||
|
|
||||||
.\venv\Scripts\activate
|
.\venv\Scripts\activate
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed-ber.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
||||||
--train_data_dir=$data_dir `
|
--train_data_dir=$data_dir `
|
||||||
--output_dir=$output_dir `
|
--output_dir=$output_dir `
|
||||||
@ -51,7 +51,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process tra
|
|||||||
|
|
||||||
# 2nd pass at half the dataset repeat value
|
# 2nd pass at half the dataset repeat value
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$output_dir"\last.ckpt" `
|
--pretrained_model_name_or_path=$output_dir"\last.ckpt" `
|
||||||
--train_data_dir=$data_dir `
|
--train_data_dir=$data_dir `
|
||||||
--output_dir=$output_dir"2" `
|
--output_dir=$output_dir"2" `
|
||||||
@ -68,7 +68,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process tra
|
|||||||
--dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) `
|
--dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) `
|
||||||
--save_precision="fp16"
|
--save_precision="fp16"
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed-ber.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$output_dir"\last.ckpt" `
|
--pretrained_model_name_or_path=$output_dir"\last.ckpt" `
|
||||||
--train_data_dir=$data_dir `
|
--train_data_dir=$data_dir `
|
||||||
--output_dir=$output_dir"2" `
|
--output_dir=$output_dir"2" `
|
||||||
|
@ -48,7 +48,7 @@ $square_mts = [Math]::Ceiling($square_repeats / $train_batch_size * $epoch)
|
|||||||
|
|
||||||
.\venv\Scripts\activate
|
.\venv\Scripts\activate
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
||||||
--train_data_dir=$landscape_data_dir `
|
--train_data_dir=$landscape_data_dir `
|
||||||
--output_dir=$landscape_output_dir `
|
--output_dir=$landscape_output_dir `
|
||||||
@ -65,7 +65,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process tra
|
|||||||
--dataset_repeats=$dataset_repeats `
|
--dataset_repeats=$dataset_repeats `
|
||||||
--save_precision="fp16"
|
--save_precision="fp16"
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$landscape_output_dir"\last.ckpt" `
|
--pretrained_model_name_or_path=$landscape_output_dir"\last.ckpt" `
|
||||||
--train_data_dir=$portrait_data_dir `
|
--train_data_dir=$portrait_data_dir `
|
||||||
--output_dir=$portrait_output_dir `
|
--output_dir=$portrait_output_dir `
|
||||||
@ -82,7 +82,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process tra
|
|||||||
--dataset_repeats=$dataset_repeats `
|
--dataset_repeats=$dataset_repeats `
|
||||||
--save_precision="fp16"
|
--save_precision="fp16"
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$portrait_output_dir"\last.ckpt" `
|
--pretrained_model_name_or_path=$portrait_output_dir"\last.ckpt" `
|
||||||
--train_data_dir=$square_data_dir `
|
--train_data_dir=$square_data_dir `
|
||||||
--output_dir=$square_output_dir `
|
--output_dir=$square_output_dir `
|
||||||
@ -101,7 +101,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process tra
|
|||||||
|
|
||||||
# 2nd pass at half the dataset repeat value
|
# 2nd pass at half the dataset repeat value
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$square_output_dir"\last.ckpt" `
|
--pretrained_model_name_or_path=$square_output_dir"\last.ckpt" `
|
||||||
--train_data_dir=$landscape_data_dir `
|
--train_data_dir=$landscape_data_dir `
|
||||||
--output_dir=$landscape_output_dir"2" `
|
--output_dir=$landscape_output_dir"2" `
|
||||||
@ -118,7 +118,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process tra
|
|||||||
--dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) `
|
--dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) `
|
||||||
--save_precision="fp16"
|
--save_precision="fp16"
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$landscape_output_dir"2\last.ckpt" `
|
--pretrained_model_name_or_path=$landscape_output_dir"2\last.ckpt" `
|
||||||
--train_data_dir=$portrait_data_dir `
|
--train_data_dir=$portrait_data_dir `
|
||||||
--output_dir=$portrait_output_dir"2" `
|
--output_dir=$portrait_output_dir"2" `
|
||||||
@ -135,7 +135,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process tra
|
|||||||
--dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) `
|
--dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) `
|
||||||
--save_precision="fp16"
|
--save_precision="fp16"
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$portrait_output_dir"2\last.ckpt" `
|
--pretrained_model_name_or_path=$portrait_output_dir"2\last.ckpt" `
|
||||||
--train_data_dir=$square_data_dir `
|
--train_data_dir=$square_data_dir `
|
||||||
--output_dir=$square_output_dir"2" `
|
--output_dir=$square_output_dir"2" `
|
||||||
|
@ -48,7 +48,7 @@ $square_mts = [Math]::Ceiling($square_repeats / $train_batch_size * $epoch)
|
|||||||
|
|
||||||
.\venv\Scripts\activate
|
.\venv\Scripts\activate
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
||||||
--train_data_dir=$landscape_data_dir `
|
--train_data_dir=$landscape_data_dir `
|
||||||
--output_dir=$landscape_output_dir `
|
--output_dir=$landscape_output_dir `
|
||||||
@ -65,7 +65,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process tra
|
|||||||
--dataset_repeats=$dataset_repeats `
|
--dataset_repeats=$dataset_repeats `
|
||||||
--save_half
|
--save_half
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$landscape_output_dir"\last.ckpt" `
|
--pretrained_model_name_or_path=$landscape_output_dir"\last.ckpt" `
|
||||||
--train_data_dir=$portrait_data_dir `
|
--train_data_dir=$portrait_data_dir `
|
||||||
--output_dir=$portrait_output_dir `
|
--output_dir=$portrait_output_dir `
|
||||||
@ -82,7 +82,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process tra
|
|||||||
--dataset_repeats=$dataset_repeats `
|
--dataset_repeats=$dataset_repeats `
|
||||||
--save_half
|
--save_half
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$portrait_output_dir"\last.ckpt" `
|
--pretrained_model_name_or_path=$portrait_output_dir"\last.ckpt" `
|
||||||
--train_data_dir=$square_data_dir `
|
--train_data_dir=$square_data_dir `
|
||||||
--output_dir=$square_output_dir `
|
--output_dir=$square_output_dir `
|
||||||
@ -101,7 +101,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process tra
|
|||||||
|
|
||||||
# 2nd pass at half the dataset repeat value
|
# 2nd pass at half the dataset repeat value
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$square_output_dir"\last.ckpt" `
|
--pretrained_model_name_or_path=$square_output_dir"\last.ckpt" `
|
||||||
--train_data_dir=$landscape_data_dir `
|
--train_data_dir=$landscape_data_dir `
|
||||||
--output_dir=$landscape_output_dir"2" `
|
--output_dir=$landscape_output_dir"2" `
|
||||||
@ -118,7 +118,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process tra
|
|||||||
--dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) `
|
--dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) `
|
||||||
--save_half
|
--save_half
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$landscape_output_dir"2\last.ckpt" `
|
--pretrained_model_name_or_path=$landscape_output_dir"2\last.ckpt" `
|
||||||
--train_data_dir=$portrait_data_dir `
|
--train_data_dir=$portrait_data_dir `
|
||||||
--output_dir=$portrait_output_dir"2" `
|
--output_dir=$portrait_output_dir"2" `
|
||||||
@ -135,7 +135,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process tra
|
|||||||
--dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) `
|
--dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) `
|
||||||
--save_half
|
--save_half
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--pretrained_model_name_or_path=$portrait_output_dir"2\last.ckpt" `
|
--pretrained_model_name_or_path=$portrait_output_dir"2\last.ckpt" `
|
||||||
--train_data_dir=$square_data_dir `
|
--train_data_dir=$square_data_dir `
|
||||||
--output_dir=$square_output_dir"2" `
|
--output_dir=$square_output_dir"2" `
|
||||||
|
@ -1,69 +0,0 @@
|
|||||||
# This powershell script will create a model using the fine tuning dreambooth method. It will require landscape,
|
|
||||||
# portrait and square images.
|
|
||||||
#
|
|
||||||
# Adjust the script to your own needs
|
|
||||||
|
|
||||||
# Sylvia Ritter
|
|
||||||
# variable values
|
|
||||||
$pretrained_model_name_or_path = "D:\models\v1-5-pruned-mse-vae.ckpt"
|
|
||||||
$train_dir = "D:\dreambooth\train_bernard\v3"
|
|
||||||
$folder_name = "dataset"
|
|
||||||
|
|
||||||
$learning_rate = 1e-6
|
|
||||||
$dataset_repeats = 80
|
|
||||||
$train_batch_size = 6
|
|
||||||
$epoch = 1
|
|
||||||
$save_every_n_epochs=1
|
|
||||||
$mixed_precision="fp16"
|
|
||||||
$num_cpu_threads_per_process=6
|
|
||||||
|
|
||||||
|
|
||||||
# You should not have to change values past this point
|
|
||||||
|
|
||||||
$data_dir = $train_dir + "\" + $folder_name
|
|
||||||
$output_dir = $train_dir + "\model"
|
|
||||||
|
|
||||||
# stop script on error
|
|
||||||
$ErrorActionPreference = "Stop"
|
|
||||||
|
|
||||||
.\venv\Scripts\activate
|
|
||||||
|
|
||||||
$data_dir_buckets = $data_dir + "-buckets"
|
|
||||||
|
|
||||||
python .\diffusers_fine_tuning\create_buckets.py $data_dir $data_dir_buckets --max_resolution "768,512"
|
|
||||||
|
|
||||||
foreach($directory in Get-ChildItem -path $data_dir_buckets -Directory)
|
|
||||||
|
|
||||||
{
|
|
||||||
if (Test-Path -Path $output_dir-$directory)
|
|
||||||
{
|
|
||||||
Write-Host "The folder $output_dir-$directory already exists, skipping bucket."
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Write-Host $directory
|
|
||||||
$dir_img_num = Get-ChildItem "$data_dir_buckets\$directory" -Recurse -File -Include *.jpg | Measure-Object | %{$_.Count}
|
|
||||||
$repeats = $dir_img_num * $dataset_repeats
|
|
||||||
$mts = [Math]::Ceiling($repeats / $train_batch_size * $epoch)
|
|
||||||
|
|
||||||
Write-Host
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed-ber.py `
|
|
||||||
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
|
||||||
--train_data_dir=$data_dir_buckets\$directory `
|
|
||||||
--output_dir=$output_dir-$directory `
|
|
||||||
--resolution=$directory `
|
|
||||||
--train_batch_size=$train_batch_size `
|
|
||||||
--learning_rate=$learning_rate `
|
|
||||||
--max_train_steps=$mts `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision=$mixed_precision `
|
|
||||||
--save_every_n_epochs=$save_every_n_epochs `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=$dataset_repeats `
|
|
||||||
--save_precision="fp16"
|
|
||||||
}
|
|
||||||
|
|
||||||
$pretrained_model_name_or_path = "$output_dir-$directory\last.ckpt"
|
|
||||||
}
|
|
@ -1,72 +0,0 @@
|
|||||||
# Sylvia Ritter. AKA: by silvery trait
|
|
||||||
|
|
||||||
# variable values
|
|
||||||
$pretrained_model_name_or_path = "D:\models\v1-5-pruned-mse-vae.ckpt"
|
|
||||||
$train_dir = "D:\dreambooth\train_sylvia_ritter\raw_data"
|
|
||||||
$training_folder = "all-images-v3"
|
|
||||||
|
|
||||||
$learning_rate = 5e-6
|
|
||||||
$dataset_repeats = 40
|
|
||||||
$train_batch_size = 6
|
|
||||||
$epoch = 4
|
|
||||||
$save_every_n_epochs=1
|
|
||||||
$mixed_precision="bf16"
|
|
||||||
$num_cpu_threads_per_process=6
|
|
||||||
|
|
||||||
$max_resolution = "768,576"
|
|
||||||
|
|
||||||
# You should not have to change values past this point
|
|
||||||
|
|
||||||
# stop script on error
|
|
||||||
$ErrorActionPreference = "Stop"
|
|
||||||
|
|
||||||
# activate venv
|
|
||||||
.\venv\Scripts\activate
|
|
||||||
|
|
||||||
# create caption json file
|
|
||||||
python D:\kohya_ss\diffusers_fine_tuning\merge_captions_to_metadata.py `
|
|
||||||
--caption_extention ".txt" $train_dir"\"$training_folder $train_dir"\meta_cap.json"
|
|
||||||
|
|
||||||
# create images buckets
|
|
||||||
python D:\kohya_ss\diffusers_fine_tuning\prepare_buckets_latents.py `
|
|
||||||
$train_dir"\"$training_folder `
|
|
||||||
$train_dir"\meta_cap.json" `
|
|
||||||
$train_dir"\meta_lat.json" `
|
|
||||||
$pretrained_model_name_or_path `
|
|
||||||
--batch_size 4 --max_resolution $max_resolution --mixed_precision fp16
|
|
||||||
|
|
||||||
# Get number of valid images
|
|
||||||
$image_num = Get-ChildItem "$train_dir\$training_folder" -Recurse -File -Include *.npz | Measure-Object | %{$_.Count}
|
|
||||||
$repeats = $image_num * $dataset_repeats
|
|
||||||
|
|
||||||
# calculate max_train_set
|
|
||||||
$max_train_set = [Math]::Ceiling($repeats / $train_batch_size * $epoch)
|
|
||||||
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\kohya_ss\diffusers_fine_tuning\fine_tune.py `
|
|
||||||
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
|
||||||
--in_json $train_dir"\meta_lat.json" `
|
|
||||||
--train_data_dir=$train_dir"\"$training_folder `
|
|
||||||
--output_dir=$train_dir"\fine_tuned" `
|
|
||||||
--train_batch_size=$train_batch_size `
|
|
||||||
--dataset_repeats=$dataset_repeats `
|
|
||||||
--learning_rate=$learning_rate `
|
|
||||||
--max_train_steps=$max_train_set `
|
|
||||||
--use_8bit_adam --xformers `
|
|
||||||
--mixed_precision=$mixed_precision `
|
|
||||||
--save_every_n_epochs=$save_every_n_epochs `
|
|
||||||
--save_precision="fp16"
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\kohya_ss\diffusers_fine_tuning\fine_tune.py `
|
|
||||||
--pretrained_model_name_or_path=$train_dir"\fine_tuned\last.ckpt" `
|
|
||||||
--in_json $train_dir"\meta_lat.json" `
|
|
||||||
--train_data_dir=$train_dir"\"$training_folder `
|
|
||||||
--output_dir=$train_dir"\fine_tuned2" `
|
|
||||||
--train_batch_size=$train_batch_size `
|
|
||||||
--dataset_repeats=$([Math]::Ceiling($dataset_repeats / 2)) `
|
|
||||||
--learning_rate=$learning_rate `
|
|
||||||
--max_train_steps=$([Math]::Ceiling($max_train_set / 2)) `
|
|
||||||
--use_8bit_adam --xformers `
|
|
||||||
--mixed_precision=$mixed_precision `
|
|
||||||
--save_every_n_epochs=$save_every_n_epochs `
|
|
||||||
--save_precision="fp16"
|
|
153
examples/kohya_finetune.ps1
Normal file
153
examples/kohya_finetune.ps1
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
# variables related to the pretrained model
|
||||||
|
$pretrained_model_name_or_path = "D:\models\test\samdoesart2\model\last"
|
||||||
|
$v2 = 1 # set to 1 for true or 0 for false
|
||||||
|
$v_model = 0 # set to 1 for true or 0 for false
|
||||||
|
|
||||||
|
# variables related to the training dataset and output directory
|
||||||
|
$train_dir = "D:\models\test\samdoesart2"
|
||||||
|
$image_folder = "D:\dataset\samdoesart2\raw"
|
||||||
|
$output_dir = "D:\models\test\samdoesart2\model_e2\"
|
||||||
|
$max_resolution = "512,512"
|
||||||
|
|
||||||
|
# variables related to the training process
|
||||||
|
$learning_rate = 1e-6
|
||||||
|
$lr_scheduler = "constant" # Default is constant
|
||||||
|
$lr_warmup = 0 # % of steps to warmup for 0 - 100. Default is 0.
|
||||||
|
$dataset_repeats = 40
|
||||||
|
$train_batch_size = 8
|
||||||
|
$epoch = 1
|
||||||
|
$save_every_n_epochs = 1
|
||||||
|
$mixed_precision = "bf16"
|
||||||
|
$save_precision = "fp16" # use fp16 for better compatibility with auto1111 and other repo
|
||||||
|
$seed = "494481440"
|
||||||
|
$num_cpu_threads_per_process = 6
|
||||||
|
$train_text_encoder = 0 # set to 1 to train text encoder otherwise set to 0
|
||||||
|
|
||||||
|
# variables related to the resulting diffuser model. If input is ckpt or tensors then it is not applicable
|
||||||
|
$convert_to_safetensors = 1 # set to 1 to convert resulting diffuser to ckpt
|
||||||
|
$convert_to_ckpt = 1 # set to 1 to convert resulting diffuser to ckpt
|
||||||
|
|
||||||
|
# other variables
|
||||||
|
$kohya_finetune_repo_path = "D:\kohya_ss"
|
||||||
|
|
||||||
|
### You should not need to change things below
|
||||||
|
|
||||||
|
# Set variables to useful values using ternary operator
|
||||||
|
$v_model = ($v_model -eq 0) ? $null : "--v_parameterization"
|
||||||
|
$v2 = ($v2 -eq 0) ? $null : "--v2"
|
||||||
|
$train_text_encoder = ($train_text_encoder -eq 0) ? $null : "--train_text_encoder"
|
||||||
|
|
||||||
|
# stop script on error
|
||||||
|
$ErrorActionPreference = "Stop"
|
||||||
|
|
||||||
|
# define a list of substrings to search for
|
||||||
|
$substrings_v2 = "stable-diffusion-2-1-base", "stable-diffusion-2-base"
|
||||||
|
|
||||||
|
# check if $v2 and $v_model are empty and if $pretrained_model_name_or_path contains any of the substrings in the v2 list
|
||||||
|
if ($v2 -eq $null -and $v_model -eq $null -and ($substrings_v2 | Where-Object { $pretrained_model_name_or_path -match $_ }).Count -gt 0) {
|
||||||
|
Write-Host("SD v2 model detected. Setting --v2 parameter")
|
||||||
|
$v2 = "--v2"
|
||||||
|
$v_model = $null
|
||||||
|
}
|
||||||
|
|
||||||
|
# define a list of substrings to search for v-objective
|
||||||
|
$substrings_v_model = "stable-diffusion-2-1", "stable-diffusion-2"
|
||||||
|
|
||||||
|
# check if $v2 and $v_model are empty and if $pretrained_model_name_or_path contains any of the substrings in the v_model list
|
||||||
|
elseif ($v2 -eq $null -and $v_model -eq $null -and ($substrings_v_model | Where-Object { $pretrained_model_name_or_path -match $_ }).Count -gt 0) {
|
||||||
|
Write-Host("SD v2 v_model detected. Setting --v2 parameter and --v_parameterization")
|
||||||
|
$v2 = "--v2"
|
||||||
|
$v_model = "--v_parameterization"
|
||||||
|
}
|
||||||
|
|
||||||
|
# activate venv
|
||||||
|
cd $kohya_finetune_repo_path
|
||||||
|
.\venv\Scripts\activate
|
||||||
|
|
||||||
|
# create caption json file
|
||||||
|
if (!(Test-Path -Path $train_dir)) {
|
||||||
|
New-Item -Path $train_dir -ItemType "directory"
|
||||||
|
}
|
||||||
|
|
||||||
|
python $kohya_finetune_repo_path\script\merge_captions_to_metadata.py `
|
||||||
|
--caption_extention ".txt" $image_folder $train_dir"\meta_cap.json"
|
||||||
|
|
||||||
|
# create images buckets
|
||||||
|
python $kohya_finetune_repo_path\script\prepare_buckets_latents.py `
|
||||||
|
$image_folder `
|
||||||
|
$train_dir"\meta_cap.json" `
|
||||||
|
$train_dir"\meta_lat.json" `
|
||||||
|
$pretrained_model_name_or_path `
|
||||||
|
--batch_size 4 --max_resolution $max_resolution --mixed_precision $mixed_precision
|
||||||
|
|
||||||
|
# Get number of valid images
|
||||||
|
$image_num = Get-ChildItem "$image_folder" -Recurse -File -Include *.npz | Measure-Object | % { $_.Count }
|
||||||
|
|
||||||
|
$repeats = $image_num * $dataset_repeats
|
||||||
|
Write-Host("Repeats = $repeats")
|
||||||
|
|
||||||
|
# calculate max_train_set
|
||||||
|
$max_train_set = [Math]::Ceiling($repeats / $train_batch_size * $epoch)
|
||||||
|
Write-Host("max_train_set = $max_train_set")
|
||||||
|
|
||||||
|
$lr_warmup_steps = [Math]::Round($lr_warmup * $max_train_set / 100)
|
||||||
|
Write-Host("lr_warmup_steps = $lr_warmup_steps")
|
||||||
|
|
||||||
|
Write-Host("$v2 $v_model")
|
||||||
|
|
||||||
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process $kohya_finetune_repo_path\script\fine_tune.py `
|
||||||
|
$v2 `
|
||||||
|
$v_model `
|
||||||
|
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
||||||
|
--in_json $train_dir\meta_lat.json `
|
||||||
|
--train_data_dir="$image_folder" `
|
||||||
|
--output_dir=$output_dir `
|
||||||
|
--train_batch_size=$train_batch_size `
|
||||||
|
--dataset_repeats=$dataset_repeats `
|
||||||
|
--learning_rate=$learning_rate `
|
||||||
|
--lr_scheduler=$lr_scheduler `
|
||||||
|
--lr_warmup_steps=$lr_warmup_steps `
|
||||||
|
--max_train_steps=$max_train_set `
|
||||||
|
--use_8bit_adam `
|
||||||
|
--xformers `
|
||||||
|
--mixed_precision=$mixed_precision `
|
||||||
|
--save_every_n_epochs=$save_every_n_epochs `
|
||||||
|
--seed=$seed `
|
||||||
|
$train_text_encoder `
|
||||||
|
--save_precision=$save_precision
|
||||||
|
|
||||||
|
# check if $output_dir\last is a directory... therefore it is a diffuser model
|
||||||
|
if (Test-Path "$output_dir\last" -PathType Container) {
|
||||||
|
if ($convert_to_ckpt) {
|
||||||
|
Write-Host("Converting diffuser model $output_dir\last to $output_dir\last.ckpt")
|
||||||
|
python "$kohya_finetune_repo_path\tools\convert_diffusers20_original_sd.py" `
|
||||||
|
$output_dir\last `
|
||||||
|
$output_dir\last.ckpt `
|
||||||
|
--$save_precision
|
||||||
|
}
|
||||||
|
if ($convert_to_safetensors) {
|
||||||
|
Write-Host("Converting diffuser model $output_dir\last to $output_dir\last.safetensors")
|
||||||
|
python "$kohya_finetune_repo_path\tools\convert_diffusers20_original_sd.py" `
|
||||||
|
$output_dir\last `
|
||||||
|
$output_dir\last.safetensors `
|
||||||
|
--$save_precision
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# define a list of substrings to search for inference file
|
||||||
|
$substrings_sd_model = ".ckpt", ".safetensors"
|
||||||
|
$matching_extension = foreach ($ext in $substrings_sd_model) {
|
||||||
|
Get-ChildItem $output_dir -File | Where-Object { $_.Extension -contains $ext }
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($matching_extension.Count -gt 0) {
|
||||||
|
# copy the file named "v2-inference.yaml" from the "v2_inference" folder to $output_dir as last.yaml
|
||||||
|
if ( $v2 -ne $null -and $v_model -ne $null) {
|
||||||
|
Write-Host("Saving v2-inference-v.yaml as $output_dir\last.yaml")
|
||||||
|
Copy-Item -Path "$kohya_finetune_repo_path\v2_inference\v2-inference-v.yaml" -Destination "$output_dir\last.yaml"
|
||||||
|
}
|
||||||
|
elseif ( $v2 -ne $null ) {
|
||||||
|
Write-Host("Saving v2-inference.yaml as $output_dir\last.yaml")
|
||||||
|
Copy-Item -Path "$kohya_finetune_repo_path\v2_inference\v2-inference.yaml" -Destination "$output_dir\last.yaml"
|
||||||
|
}
|
||||||
|
}
|
@ -24,11 +24,11 @@ $ErrorActionPreference = "Stop"
|
|||||||
.\venv\Scripts\activate
|
.\venv\Scripts\activate
|
||||||
|
|
||||||
# create caption json file
|
# create caption json file
|
||||||
python D:\kohya_ss\diffusers_fine_tuning\merge_captions_to_metadata.py `
|
python D:\kohya_ss\finetune\merge_captions_to_metadata.py `
|
||||||
--caption_extention ".txt" $train_dir"\"$training_folder $train_dir"\meta_cap.json"
|
--caption_extention ".txt" $train_dir"\"$training_folder $train_dir"\meta_cap.json"
|
||||||
|
|
||||||
# create images buckets
|
# create images buckets
|
||||||
python D:\kohya_ss\diffusers_fine_tuning\prepare_buckets_latents.py `
|
python D:\kohya_ss\finetune\prepare_buckets_latents.py `
|
||||||
$train_dir"\"$training_folder `
|
$train_dir"\"$training_folder `
|
||||||
$train_dir"\meta_cap.json" `
|
$train_dir"\meta_cap.json" `
|
||||||
$train_dir"\meta_lat.json" `
|
$train_dir"\meta_lat.json" `
|
||||||
@ -43,7 +43,7 @@ $repeats = $image_num * $dataset_repeats
|
|||||||
$max_train_set = [Math]::Ceiling($repeats / $train_batch_size * $epoch)
|
$max_train_set = [Math]::Ceiling($repeats / $train_batch_size * $epoch)
|
||||||
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\kohya_ss\diffusers_fine_tuning\fine_tune.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\kohya_ss\finetune\fine_tune.py `
|
||||||
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
||||||
--in_json $train_dir"\meta_lat.json" `
|
--in_json $train_dir"\meta_lat.json" `
|
||||||
--train_data_dir=$train_dir"\"$training_folder `
|
--train_data_dir=$train_dir"\"$training_folder `
|
||||||
@ -58,7 +58,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\
|
|||||||
--train_text_encoder `
|
--train_text_encoder `
|
||||||
--save_precision="fp16"
|
--save_precision="fp16"
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\kohya_ss\diffusers_fine_tuning\fine_tune_v1-ber.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\kohya_ss\finetune\fine_tune.py `
|
||||||
--pretrained_model_name_or_path=$train_dir"\fine_tuned\last.ckpt" `
|
--pretrained_model_name_or_path=$train_dir"\fine_tuned\last.ckpt" `
|
||||||
--in_json $train_dir"\meta_lat.json" `
|
--in_json $train_dir"\meta_lat.json" `
|
||||||
--train_data_dir=$train_dir"\"$training_folder `
|
--train_data_dir=$train_dir"\"$training_folder `
|
||||||
@ -74,7 +74,7 @@ accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\
|
|||||||
|
|
||||||
# Hypernetwork
|
# Hypernetwork
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\kohya_ss\diffusers_fine_tuning\fine_tune_v1-ber.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\kohya_ss\finetune\fine_tune.py `
|
||||||
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
||||||
--in_json $train_dir"\meta_lat.json" `
|
--in_json $train_dir"\meta_lat.json" `
|
||||||
--train_data_dir=$train_dir"\"$training_folder `
|
--train_data_dir=$train_dir"\"$training_folder `
|
||||||
|
@ -8,7 +8,7 @@ $pretrained_model_name_or_path = "D:\models\512-base-ema.ckpt"
|
|||||||
$data_dir = "D:\models\dariusz_zawadzki\kohya_reg\data"
|
$data_dir = "D:\models\dariusz_zawadzki\kohya_reg\data"
|
||||||
$reg_data_dir = "D:\models\dariusz_zawadzki\kohya_reg\reg"
|
$reg_data_dir = "D:\models\dariusz_zawadzki\kohya_reg\reg"
|
||||||
$logging_dir = "D:\models\dariusz_zawadzki\logs"
|
$logging_dir = "D:\models\dariusz_zawadzki\logs"
|
||||||
$output_dir = "D:\models\dariusz_zawadzki\train_db_fixed_model_reg_v2"
|
$output_dir = "D:\models\dariusz_zawadzki\train_db_model_reg_v2"
|
||||||
$resolution = "512,512"
|
$resolution = "512,512"
|
||||||
$lr_scheduler="polynomial"
|
$lr_scheduler="polynomial"
|
||||||
$cache_latents = 1 # 1 = true, 0 = false
|
$cache_latents = 1 # 1 = true, 0 = false
|
||||||
@ -41,7 +41,7 @@ Write-Output "Repeats: $repeats"
|
|||||||
cd D:\kohya_ss
|
cd D:\kohya_ss
|
||||||
.\venv\Scripts\activate
|
.\venv\Scripts\activate
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db_fixed.py `
|
accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py `
|
||||||
--v2 `
|
--v2 `
|
||||||
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
--pretrained_model_name_or_path=$pretrained_model_name_or_path `
|
||||||
--train_data_dir=$data_dir `
|
--train_data_dir=$data_dir `
|
||||||
|
@ -1,6 +0,0 @@
|
|||||||
$date = Read-Host "Enter the date (yyyy-mm-dd):" -Prompt "Invalid date format. Please try again (yyyy-mm-dd):" -ValidateScript {
|
|
||||||
# Parse the date input and return $true if it is in the correct format,
|
|
||||||
# or $false if it is not
|
|
||||||
$date = [DateTime]::Parse($_)
|
|
||||||
return $date -ne $null
|
|
||||||
}
|
|
1059
fine_tune.py
Normal file
1059
fine_tune.py
Normal file
File diff suppressed because it is too large
Load Diff
1
finetune.bat
Normal file
1
finetune.bat
Normal file
@ -0,0 +1 @@
|
|||||||
|
.\venv\Scripts\python.exe .\finetune_gui.py
|
@ -8,8 +8,10 @@
|
|||||||
import warnings
|
import warnings
|
||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
|
|
||||||
from models.vit import VisionTransformer, interpolate_pos_embed
|
# from models.vit import VisionTransformer, interpolate_pos_embed
|
||||||
from models.med import BertConfig, BertModel, BertLMHeadModel
|
# from models.med import BertConfig, BertModel, BertLMHeadModel
|
||||||
|
from blip.vit import VisionTransformer, interpolate_pos_embed
|
||||||
|
from blip.med import BertConfig, BertModel, BertLMHeadModel
|
||||||
from transformers import BertTokenizer
|
from transformers import BertTokenizer
|
||||||
|
|
||||||
import torch
|
import torch
|
@ -929,7 +929,7 @@ class BertLMHeadModel(BertPreTrainedModel):
|
|||||||
cross_attentions=outputs.cross_attentions,
|
cross_attentions=outputs.cross_attentions,
|
||||||
)
|
)
|
||||||
|
|
||||||
def prepare_inputs_for_generation(self, input_ids, past=None, attention_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, **model_kwargs):
|
def prepare_inputs_for_generation(self, input_ids, past=None, attention_mask=None, **model_kwargs):
|
||||||
input_shape = input_ids.shape
|
input_shape = input_ids.shape
|
||||||
# if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly
|
# if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly
|
||||||
if attention_mask is None:
|
if attention_mask is None:
|
||||||
@ -943,8 +943,8 @@ class BertLMHeadModel(BertPreTrainedModel):
|
|||||||
"input_ids": input_ids,
|
"input_ids": input_ids,
|
||||||
"attention_mask": attention_mask,
|
"attention_mask": attention_mask,
|
||||||
"past_key_values": past,
|
"past_key_values": past,
|
||||||
"encoder_hidden_states": encoder_hidden_states,
|
"encoder_hidden_states": model_kwargs.get("encoder_hidden_states", None),
|
||||||
"encoder_attention_mask": encoder_attention_mask,
|
"encoder_attention_mask": model_kwargs.get("encoder_attention_mask", None),
|
||||||
"is_decoder": True,
|
"is_decoder": True,
|
||||||
}
|
}
|
||||||
|
|
22
finetune/blip/med_config.json
Normal file
22
finetune/blip/med_config.json
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"architectures": [
|
||||||
|
"BertModel"
|
||||||
|
],
|
||||||
|
"attention_probs_dropout_prob": 0.1,
|
||||||
|
"hidden_act": "gelu",
|
||||||
|
"hidden_dropout_prob": 0.1,
|
||||||
|
"hidden_size": 768,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 3072,
|
||||||
|
"layer_norm_eps": 1e-12,
|
||||||
|
"max_position_embeddings": 512,
|
||||||
|
"model_type": "bert",
|
||||||
|
"num_attention_heads": 12,
|
||||||
|
"num_hidden_layers": 12,
|
||||||
|
"pad_token_id": 0,
|
||||||
|
"type_vocab_size": 2,
|
||||||
|
"vocab_size": 30524,
|
||||||
|
"encoder_width": 768,
|
||||||
|
"add_cross_attention": true
|
||||||
|
}
|
||||||
|
|
123
finetune/clean_captions_and_tags.py
Normal file
123
finetune/clean_captions_and_tags.py
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
# このスクリプトのライセンスは、Apache License 2.0とします
|
||||||
|
# (c) 2022 Kohya S. @kohya_ss
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
def clean_tags(image_key, tags):
|
||||||
|
# replace '_' to ' '
|
||||||
|
tags = tags.replace('_', ' ')
|
||||||
|
|
||||||
|
# remove rating: deepdanbooruのみ
|
||||||
|
tokens = tags.split(", rating")
|
||||||
|
if len(tokens) == 1:
|
||||||
|
# WD14 taggerのときはこちらになるのでメッセージは出さない
|
||||||
|
# print("no rating:")
|
||||||
|
# print(f"{image_key} {tags}")
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
if len(tokens) > 2:
|
||||||
|
print("multiple ratings:")
|
||||||
|
print(f"{image_key} {tags}")
|
||||||
|
tags = tokens[0]
|
||||||
|
|
||||||
|
return tags
|
||||||
|
|
||||||
|
|
||||||
|
# 上から順に検索、置換される
|
||||||
|
# ('置換元文字列', '置換後文字列')
|
||||||
|
CAPTION_REPLACEMENTS = [
|
||||||
|
('anime anime', 'anime'),
|
||||||
|
('young ', ''),
|
||||||
|
('anime girl', 'girl'),
|
||||||
|
('cartoon female', 'girl'),
|
||||||
|
('cartoon lady', 'girl'),
|
||||||
|
('cartoon character', 'girl'), # a or ~s
|
||||||
|
('cartoon woman', 'girl'),
|
||||||
|
('cartoon women', 'girls'),
|
||||||
|
('cartoon girl', 'girl'),
|
||||||
|
('anime female', 'girl'),
|
||||||
|
('anime lady', 'girl'),
|
||||||
|
('anime character', 'girl'), # a or ~s
|
||||||
|
('anime woman', 'girl'),
|
||||||
|
('anime women', 'girls'),
|
||||||
|
('lady', 'girl'),
|
||||||
|
('female', 'girl'),
|
||||||
|
('woman', 'girl'),
|
||||||
|
('women', 'girls'),
|
||||||
|
('people', 'girls'),
|
||||||
|
('person', 'girl'),
|
||||||
|
('a cartoon figure', 'a figure'),
|
||||||
|
('a cartoon image', 'an image'),
|
||||||
|
('a cartoon picture', 'a picture'),
|
||||||
|
('an anime cartoon image', 'an image'),
|
||||||
|
('a cartoon anime drawing', 'a drawing'),
|
||||||
|
('a cartoon drawing', 'a drawing'),
|
||||||
|
('girl girl', 'girl'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def clean_caption(caption):
|
||||||
|
for rf, rt in CAPTION_REPLACEMENTS:
|
||||||
|
replaced = True
|
||||||
|
while replaced:
|
||||||
|
bef = caption
|
||||||
|
caption = caption.replace(rf, rt)
|
||||||
|
replaced = bef != caption
|
||||||
|
return caption
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
if os.path.exists(args.in_json):
|
||||||
|
print(f"loading existing metadata: {args.in_json}")
|
||||||
|
with open(args.in_json, "rt", encoding='utf-8') as f:
|
||||||
|
metadata = json.load(f)
|
||||||
|
else:
|
||||||
|
print("no metadata / メタデータファイルがありません")
|
||||||
|
return
|
||||||
|
|
||||||
|
print("cleaning captions and tags.")
|
||||||
|
image_keys = list(metadata.keys())
|
||||||
|
for image_key in tqdm(image_keys):
|
||||||
|
tags = metadata[image_key].get('tags')
|
||||||
|
if tags is None:
|
||||||
|
print(f"image does not have tags / メタデータにタグがありません: {image_key}")
|
||||||
|
else:
|
||||||
|
metadata[image_key]['tags'] = clean_tags(image_key, tags)
|
||||||
|
|
||||||
|
caption = metadata[image_key].get('caption')
|
||||||
|
if caption is None:
|
||||||
|
print(f"image does not have caption / メタデータにキャプションがありません: {image_key}")
|
||||||
|
else:
|
||||||
|
metadata[image_key]['caption'] = clean_caption(caption)
|
||||||
|
|
||||||
|
# metadataを書き出して終わり
|
||||||
|
print(f"writing metadata: {args.out_json}")
|
||||||
|
with open(args.out_json, "wt", encoding='utf-8') as f:
|
||||||
|
json.dump(metadata, f, indent=2)
|
||||||
|
print("done!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
# parser.add_argument("train_data_dir", type=str, help="directory for train images / 学習画像データのディレクトリ")
|
||||||
|
parser.add_argument("in_json", type=str, help="metadata file to input / 読み込むメタデータファイル")
|
||||||
|
parser.add_argument("out_json", type=str, help="metadata file to output / メタデータファイル書き出し先")
|
||||||
|
|
||||||
|
args, unknown = parser.parse_known_args()
|
||||||
|
if len(unknown) == 1:
|
||||||
|
print("WARNING: train_data_dir argument is removed. This script will not work with three arguments in future. Please specify two arguments: in_json and out_json.")
|
||||||
|
print("All captions and tags in the metadata are processed.")
|
||||||
|
print("警告: train_data_dir引数は不要になりました。将来的には三つの引数を指定すると動かなくなる予定です。読み込み元のメタデータと書き出し先の二つの引数だけ指定してください。")
|
||||||
|
print("メタデータ内のすべてのキャプションとタグが処理されます。")
|
||||||
|
args.in_json = args.out_json
|
||||||
|
args.out_json = unknown[0]
|
||||||
|
elif len(unknown) > 0:
|
||||||
|
raise ValueError(f"error: unrecognized arguments: {unknown}")
|
||||||
|
|
||||||
|
main(args)
|
96
finetune/hypernetwork_nai.py
Normal file
96
finetune/hypernetwork_nai.py
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
# NAI compatible
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
class HypernetworkModule(torch.nn.Module):
|
||||||
|
def __init__(self, dim, multiplier=1.0):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
linear1 = torch.nn.Linear(dim, dim * 2)
|
||||||
|
linear2 = torch.nn.Linear(dim * 2, dim)
|
||||||
|
linear1.weight.data.normal_(mean=0.0, std=0.01)
|
||||||
|
linear1.bias.data.zero_()
|
||||||
|
linear2.weight.data.normal_(mean=0.0, std=0.01)
|
||||||
|
linear2.bias.data.zero_()
|
||||||
|
linears = [linear1, linear2]
|
||||||
|
|
||||||
|
self.linear = torch.nn.Sequential(*linears)
|
||||||
|
self.multiplier = multiplier
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return x + self.linear(x) * self.multiplier
|
||||||
|
|
||||||
|
|
||||||
|
class Hypernetwork(torch.nn.Module):
|
||||||
|
enable_sizes = [320, 640, 768, 1280]
|
||||||
|
# return self.modules[Hypernetwork.enable_sizes.index(size)]
|
||||||
|
|
||||||
|
def __init__(self, multiplier=1.0) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.modules = []
|
||||||
|
for size in Hypernetwork.enable_sizes:
|
||||||
|
self.modules.append((HypernetworkModule(size, multiplier), HypernetworkModule(size, multiplier)))
|
||||||
|
self.register_module(f"{size}_0", self.modules[-1][0])
|
||||||
|
self.register_module(f"{size}_1", self.modules[-1][1])
|
||||||
|
|
||||||
|
def apply_to_stable_diffusion(self, text_encoder, vae, unet):
|
||||||
|
blocks = unet.input_blocks + [unet.middle_block] + unet.output_blocks
|
||||||
|
for block in blocks:
|
||||||
|
for subblk in block:
|
||||||
|
if 'SpatialTransformer' in str(type(subblk)):
|
||||||
|
for tf_block in subblk.transformer_blocks:
|
||||||
|
for attn in [tf_block.attn1, tf_block.attn2]:
|
||||||
|
size = attn.context_dim
|
||||||
|
if size in Hypernetwork.enable_sizes:
|
||||||
|
attn.hypernetwork = self
|
||||||
|
else:
|
||||||
|
attn.hypernetwork = None
|
||||||
|
|
||||||
|
def apply_to_diffusers(self, text_encoder, vae, unet):
|
||||||
|
blocks = unet.down_blocks + [unet.mid_block] + unet.up_blocks
|
||||||
|
for block in blocks:
|
||||||
|
if hasattr(block, 'attentions'):
|
||||||
|
for subblk in block.attentions:
|
||||||
|
if 'SpatialTransformer' in str(type(subblk)) or 'Transformer2DModel' in str(type(subblk)): # 0.6.0 and 0.7~
|
||||||
|
for tf_block in subblk.transformer_blocks:
|
||||||
|
for attn in [tf_block.attn1, tf_block.attn2]:
|
||||||
|
size = attn.to_k.in_features
|
||||||
|
if size in Hypernetwork.enable_sizes:
|
||||||
|
attn.hypernetwork = self
|
||||||
|
else:
|
||||||
|
attn.hypernetwork = None
|
||||||
|
return True # TODO error checking
|
||||||
|
|
||||||
|
def forward(self, x, context):
|
||||||
|
size = context.shape[-1]
|
||||||
|
assert size in Hypernetwork.enable_sizes
|
||||||
|
module = self.modules[Hypernetwork.enable_sizes.index(size)]
|
||||||
|
return module[0].forward(context), module[1].forward(context)
|
||||||
|
|
||||||
|
def load_from_state_dict(self, state_dict):
|
||||||
|
# old ver to new ver
|
||||||
|
changes = {
|
||||||
|
'linear1.bias': 'linear.0.bias',
|
||||||
|
'linear1.weight': 'linear.0.weight',
|
||||||
|
'linear2.bias': 'linear.1.bias',
|
||||||
|
'linear2.weight': 'linear.1.weight',
|
||||||
|
}
|
||||||
|
for key_from, key_to in changes.items():
|
||||||
|
if key_from in state_dict:
|
||||||
|
state_dict[key_to] = state_dict[key_from]
|
||||||
|
del state_dict[key_from]
|
||||||
|
|
||||||
|
for size, sd in state_dict.items():
|
||||||
|
if type(size) == int:
|
||||||
|
self.modules[Hypernetwork.enable_sizes.index(size)][0].load_state_dict(sd[0], strict=True)
|
||||||
|
self.modules[Hypernetwork.enable_sizes.index(size)][1].load_state_dict(sd[1], strict=True)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get_state_dict(self):
|
||||||
|
state_dict = {}
|
||||||
|
for i, size in enumerate(Hypernetwork.enable_sizes):
|
||||||
|
sd0 = self.modules[i][0].state_dict()
|
||||||
|
sd1 = self.modules[i][1].state_dict()
|
||||||
|
state_dict[size] = [sd0, sd1]
|
||||||
|
return state_dict
|
@ -1,10 +1,8 @@
|
|||||||
# このスクリプトのライセンスは、Apache License 2.0とします
|
|
||||||
# (c) 2022 Kohya S. @kohya_ss
|
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
|
import random
|
||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
@ -12,51 +10,45 @@ import numpy as np
|
|||||||
import torch
|
import torch
|
||||||
from torchvision import transforms
|
from torchvision import transforms
|
||||||
from torchvision.transforms.functional import InterpolationMode
|
from torchvision.transforms.functional import InterpolationMode
|
||||||
from models.blip import blip_decoder
|
from blip.blip import blip_decoder
|
||||||
# from Salesforce_BLIP.models.blip import blip_decoder
|
# from Salesforce_BLIP.models.blip import blip_decoder
|
||||||
|
|
||||||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
cwd = os.getcwd()
|
# fix the seed for reproducibility
|
||||||
print('Current Working Directory is: ', cwd)
|
seed = args.seed # + utils.get_rank()
|
||||||
|
torch.manual_seed(seed)
|
||||||
|
np.random.seed(seed)
|
||||||
|
random.seed(seed)
|
||||||
|
|
||||||
|
if not os.path.exists("blip"):
|
||||||
|
cwd = os.getcwd()
|
||||||
|
print('Current Working Directory is: ', cwd)
|
||||||
|
os.chdir('finetune')
|
||||||
|
|
||||||
os.chdir('.\BLIP_caption')
|
|
||||||
|
|
||||||
image_paths = glob.glob(os.path.join(args.train_data_dir, "*.jpg")) + \
|
image_paths = glob.glob(os.path.join(args.train_data_dir, "*.jpg")) + \
|
||||||
glob.glob(os.path.join(args.train_data_dir, "*.png")) + glob.glob(os.path.join(args.train_data_dir, "*.webp"))
|
glob.glob(os.path.join(args.train_data_dir, "*.png")) + glob.glob(os.path.join(args.train_data_dir, "*.webp"))
|
||||||
print(f"found {len(image_paths)} images.")
|
print(f"found {len(image_paths)} images.")
|
||||||
|
|
||||||
print(f"loading BLIP caption: {args.caption_weights}")
|
print(f"loading BLIP caption: {args.caption_weights}")
|
||||||
# image_size = 384
|
|
||||||
# model = blip_decoder(pretrained=args.caption_weights, image_size=image_size, vit='large', med_config='configs/med_config.json')
|
|
||||||
# model.eval()
|
|
||||||
# model = model.to(device)
|
|
||||||
|
|
||||||
image_size = 384
|
image_size = 384
|
||||||
|
model = blip_decoder(pretrained=args.caption_weights, image_size=image_size, vit='large', med_config="./blip/med_config.json")
|
||||||
|
model.eval()
|
||||||
|
model = model.to(DEVICE)
|
||||||
|
print("BLIP loaded")
|
||||||
|
|
||||||
|
# 正方形でいいのか? という気がするがソースがそうなので
|
||||||
transform = transforms.Compose([
|
transform = transforms.Compose([
|
||||||
transforms.Resize((image_size,image_size),interpolation=InterpolationMode.BICUBIC),
|
transforms.Resize((image_size, image_size), interpolation=InterpolationMode.BICUBIC),
|
||||||
transforms.ToTensor(),
|
transforms.ToTensor(),
|
||||||
transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
|
transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
|
||||||
])
|
])
|
||||||
|
|
||||||
model_url = args.caption_weights # 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_large_caption.pth'
|
|
||||||
|
|
||||||
model = blip_decoder(pretrained=model_url, image_size=384, vit='large')
|
|
||||||
model.eval()
|
|
||||||
model = model.to(device)
|
|
||||||
print("BLIP loaded")
|
|
||||||
# 正方形でいいのか? という気がするがソースがそうなので
|
|
||||||
# transform = transforms.Compose([
|
|
||||||
# transforms.Resize((image_size, image_size), interpolation=InterpolationMode.BICUBIC),
|
|
||||||
# transforms.ToTensor(),
|
|
||||||
# transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
|
|
||||||
# ])
|
|
||||||
|
|
||||||
# captioningする
|
# captioningする
|
||||||
def run_batch(path_imgs):
|
def run_batch(path_imgs):
|
||||||
imgs = torch.stack([im for _, im in path_imgs]).to(device)
|
imgs = torch.stack([im for _, im in path_imgs]).to(DEVICE)
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
if args.beam_search:
|
if args.beam_search:
|
||||||
@ -92,7 +84,7 @@ def main(args):
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("train_data_dir", type=str, help="directory for train images / 学習画像データのディレクトリ")
|
parser.add_argument("train_data_dir", type=str, help="directory for train images / 学習画像データのディレクトリ")
|
||||||
parser.add_argument("caption_weights", type=str,
|
parser.add_argument("--caption_weights", type=str, default="https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_large_caption.pth",
|
||||||
help="BLIP caption weights (model_large_caption.pth) / BLIP captionの重みファイル(model_large_caption.pth)")
|
help="BLIP caption weights (model_large_caption.pth) / BLIP captionの重みファイル(model_large_caption.pth)")
|
||||||
parser.add_argument("--caption_extention", type=str, default=None,
|
parser.add_argument("--caption_extention", type=str, default=None,
|
||||||
help="extension of caption file (for backward compatibility) / 出力されるキャプションファイルの拡張子(スペルミスしていたのを残してあります)")
|
help="extension of caption file (for backward compatibility) / 出力されるキャプションファイルの拡張子(スペルミスしていたのを残してあります)")
|
||||||
@ -104,6 +96,7 @@ if __name__ == '__main__':
|
|||||||
parser.add_argument("--top_p", type=float, default=0.9, help="top_p in Nucleus sampling / Nucleus sampling時のtop_p")
|
parser.add_argument("--top_p", type=float, default=0.9, help="top_p in Nucleus sampling / Nucleus sampling時のtop_p")
|
||||||
parser.add_argument("--max_length", type=int, default=75, help="max length of caption / captionの最大長")
|
parser.add_argument("--max_length", type=int, default=75, help="max length of caption / captionの最大長")
|
||||||
parser.add_argument("--min_length", type=int, default=5, help="min length of caption / captionの最小長")
|
parser.add_argument("--min_length", type=int, default=5, help="min length of caption / captionの最小長")
|
||||||
|
parser.add_argument('--seed', default=42, type=int, help='seed for reproducibility / 再現性を確保するための乱数seed')
|
||||||
parser.add_argument("--debug", action="store_true", help="debug mode")
|
parser.add_argument("--debug", action="store_true", help="debug mode")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
68
finetune/merge_captions_to_metadata.py
Normal file
68
finetune/merge_captions_to_metadata.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
# このスクリプトのライセンスは、Apache License 2.0とします
|
||||||
|
# (c) 2022 Kohya S. @kohya_ss
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
image_paths = glob.glob(os.path.join(args.train_data_dir, "*.jpg")) + \
|
||||||
|
glob.glob(os.path.join(args.train_data_dir, "*.png")) + glob.glob(os.path.join(args.train_data_dir, "*.webp"))
|
||||||
|
print(f"found {len(image_paths)} images.")
|
||||||
|
|
||||||
|
if args.in_json is None and os.path.isfile(args.out_json):
|
||||||
|
args.in_json = args.out_json
|
||||||
|
|
||||||
|
if args.in_json is not None:
|
||||||
|
print(f"loading existing metadata: {args.in_json}")
|
||||||
|
with open(args.in_json, "rt", encoding='utf-8') as f:
|
||||||
|
metadata = json.load(f)
|
||||||
|
print("captions for existing images will be overwritten / 既存の画像のキャプションは上書きされます")
|
||||||
|
else:
|
||||||
|
print("new metadata will be created / 新しいメタデータファイルが作成されます")
|
||||||
|
metadata = {}
|
||||||
|
|
||||||
|
print("merge caption texts to metadata json.")
|
||||||
|
for image_path in tqdm(image_paths):
|
||||||
|
caption_path = os.path.splitext(image_path)[0] + args.caption_extension
|
||||||
|
with open(caption_path, "rt", encoding='utf-8') as f:
|
||||||
|
caption = f.readlines()[0].strip()
|
||||||
|
|
||||||
|
image_key = image_path if args.full_path else os.path.splitext(os.path.basename(image_path))[0]
|
||||||
|
if image_key not in metadata:
|
||||||
|
metadata[image_key] = {}
|
||||||
|
|
||||||
|
metadata[image_key]['caption'] = caption
|
||||||
|
if args.debug:
|
||||||
|
print(image_key, caption)
|
||||||
|
|
||||||
|
# metadataを書き出して終わり
|
||||||
|
print(f"writing metadata: {args.out_json}")
|
||||||
|
with open(args.out_json, "wt", encoding='utf-8') as f:
|
||||||
|
json.dump(metadata, f, indent=2)
|
||||||
|
print("done!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("train_data_dir", type=str, help="directory for train images / 学習画像データのディレクトリ")
|
||||||
|
parser.add_argument("out_json", type=str, help="metadata file to output / メタデータファイル書き出し先")
|
||||||
|
parser.add_argument("--in_json", type=str, help="metadata file to input (if omitted and out_json exists, existing out_json is read) / 読み込むメタデータファイル(省略時、out_jsonが存在すればそれを読み込む)")
|
||||||
|
parser.add_argument("--caption_extention", type=str, default=None,
|
||||||
|
help="extension of caption file (for backward compatibility) / 読み込むキャプションファイルの拡張子(スペルミスしていたのを残してあります)")
|
||||||
|
parser.add_argument("--caption_extension", type=str, default=".caption", help="extension of caption file / 読み込むキャプションファイルの拡張子")
|
||||||
|
parser.add_argument("--full_path", action="store_true",
|
||||||
|
help="use full path as image-key in metadata (supports multiple directories) / メタデータで画像キーをフルパスにする(複数の学習画像ディレクトリに対応)")
|
||||||
|
parser.add_argument("--debug", action="store_true", help="debug mode")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# スペルミスしていたオプションを復元する
|
||||||
|
if args.caption_extention is not None:
|
||||||
|
args.caption_extension = args.caption_extention
|
||||||
|
|
||||||
|
main(args)
|
60
finetune/merge_dd_tags_to_metadata.py
Normal file
60
finetune/merge_dd_tags_to_metadata.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# このスクリプトのライセンスは、Apache License 2.0とします
|
||||||
|
# (c) 2022 Kohya S. @kohya_ss
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
image_paths = glob.glob(os.path.join(args.train_data_dir, "*.jpg")) + \
|
||||||
|
glob.glob(os.path.join(args.train_data_dir, "*.png")) + glob.glob(os.path.join(args.train_data_dir, "*.webp"))
|
||||||
|
print(f"found {len(image_paths)} images.")
|
||||||
|
|
||||||
|
if args.in_json is None and os.path.isfile(args.out_json):
|
||||||
|
args.in_json = args.out_json
|
||||||
|
|
||||||
|
if args.in_json is not None:
|
||||||
|
print(f"loading existing metadata: {args.in_json}")
|
||||||
|
with open(args.in_json, "rt", encoding='utf-8') as f:
|
||||||
|
metadata = json.load(f)
|
||||||
|
print("tags data for existing images will be overwritten / 既存の画像のタグは上書きされます")
|
||||||
|
else:
|
||||||
|
print("new metadata will be created / 新しいメタデータファイルが作成されます")
|
||||||
|
metadata = {}
|
||||||
|
|
||||||
|
print("merge tags to metadata json.")
|
||||||
|
for image_path in tqdm(image_paths):
|
||||||
|
tags_path = os.path.splitext(image_path)[0] + '.txt'
|
||||||
|
with open(tags_path, "rt", encoding='utf-8') as f:
|
||||||
|
tags = f.readlines()[0].strip()
|
||||||
|
|
||||||
|
image_key = image_path if args.full_path else os.path.splitext(os.path.basename(image_path))[0]
|
||||||
|
if image_key not in metadata:
|
||||||
|
metadata[image_key] = {}
|
||||||
|
|
||||||
|
metadata[image_key]['tags'] = tags
|
||||||
|
if args.debug:
|
||||||
|
print(image_key, tags)
|
||||||
|
|
||||||
|
# metadataを書き出して終わり
|
||||||
|
print(f"writing metadata: {args.out_json}")
|
||||||
|
with open(args.out_json, "wt", encoding='utf-8') as f:
|
||||||
|
json.dump(metadata, f, indent=2)
|
||||||
|
print("done!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("train_data_dir", type=str, help="directory for train images / 学習画像データのディレクトリ")
|
||||||
|
parser.add_argument("out_json", type=str, help="metadata file to output / メタデータファイル書き出し先")
|
||||||
|
parser.add_argument("--in_json", type=str, help="metadata file to input (if omitted and out_json exists, existing out_json is read) / 読み込むメタデータファイル(省略時、out_jsonが存在すればそれを読み込む)")
|
||||||
|
parser.add_argument("--full_path", action="store_true",
|
||||||
|
help="use full path as image-key in metadata (supports multiple directories) / メタデータで画像キーをフルパスにする(複数の学習画像ディレクトリに対応)")
|
||||||
|
parser.add_argument("--debug", action="store_true", help="debug mode, print tags")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args)
|
177
finetune/prepare_buckets_latents.py
Normal file
177
finetune/prepare_buckets_latents.py
Normal file
@ -0,0 +1,177 @@
|
|||||||
|
# このスクリプトのライセンスは、Apache License 2.0とします
|
||||||
|
# (c) 2022 Kohya S. @kohya_ss
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
import numpy as np
|
||||||
|
from diffusers import AutoencoderKL
|
||||||
|
from PIL import Image
|
||||||
|
import cv2
|
||||||
|
import torch
|
||||||
|
from torchvision import transforms
|
||||||
|
|
||||||
|
import library.model_util as model_util
|
||||||
|
|
||||||
|
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||||
|
|
||||||
|
IMAGE_TRANSFORMS = transforms.Compose(
|
||||||
|
[
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize([0.5], [0.5]),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_latents(vae, images, weight_dtype):
|
||||||
|
img_tensors = [IMAGE_TRANSFORMS(image) for image in images]
|
||||||
|
img_tensors = torch.stack(img_tensors)
|
||||||
|
img_tensors = img_tensors.to(DEVICE, weight_dtype)
|
||||||
|
with torch.no_grad():
|
||||||
|
latents = vae.encode(img_tensors).latent_dist.sample().float().to("cpu").numpy()
|
||||||
|
return latents
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
image_paths = glob.glob(os.path.join(args.train_data_dir, "*.jpg")) + \
|
||||||
|
glob.glob(os.path.join(args.train_data_dir, "*.png")) + glob.glob(os.path.join(args.train_data_dir, "*.webp"))
|
||||||
|
print(f"found {len(image_paths)} images.")
|
||||||
|
|
||||||
|
if os.path.exists(args.in_json):
|
||||||
|
print(f"loading existing metadata: {args.in_json}")
|
||||||
|
with open(args.in_json, "rt", encoding='utf-8') as f:
|
||||||
|
metadata = json.load(f)
|
||||||
|
else:
|
||||||
|
print(f"no metadata / メタデータファイルがありません: {args.in_json}")
|
||||||
|
return
|
||||||
|
|
||||||
|
weight_dtype = torch.float32
|
||||||
|
if args.mixed_precision == "fp16":
|
||||||
|
weight_dtype = torch.float16
|
||||||
|
elif args.mixed_precision == "bf16":
|
||||||
|
weight_dtype = torch.bfloat16
|
||||||
|
|
||||||
|
vae = model_util.load_vae(args.model_name_or_path, weight_dtype)
|
||||||
|
vae.eval()
|
||||||
|
vae.to(DEVICE, dtype=weight_dtype)
|
||||||
|
|
||||||
|
# bucketのサイズを計算する
|
||||||
|
max_reso = tuple([int(t) for t in args.max_resolution.split(',')])
|
||||||
|
assert len(max_reso) == 2, f"illegal resolution (not 'width,height') / 画像サイズに誤りがあります。'幅,高さ'で指定してください: {args.max_resolution}"
|
||||||
|
|
||||||
|
bucket_resos, bucket_aspect_ratios = model_util.make_bucket_resolutions(
|
||||||
|
max_reso, args.min_bucket_reso, args.max_bucket_reso)
|
||||||
|
|
||||||
|
# 画像をひとつずつ適切なbucketに割り当てながらlatentを計算する
|
||||||
|
bucket_aspect_ratios = np.array(bucket_aspect_ratios)
|
||||||
|
buckets_imgs = [[] for _ in range(len(bucket_resos))]
|
||||||
|
bucket_counts = [0 for _ in range(len(bucket_resos))]
|
||||||
|
img_ar_errors = []
|
||||||
|
for i, image_path in enumerate(tqdm(image_paths, smoothing=0.0)):
|
||||||
|
image_key = image_path if args.full_path else os.path.splitext(os.path.basename(image_path))[0]
|
||||||
|
if image_key not in metadata:
|
||||||
|
metadata[image_key] = {}
|
||||||
|
|
||||||
|
image = Image.open(image_path)
|
||||||
|
if image.mode != 'RGB':
|
||||||
|
image = image.convert("RGB")
|
||||||
|
|
||||||
|
aspect_ratio = image.width / image.height
|
||||||
|
ar_errors = bucket_aspect_ratios - aspect_ratio
|
||||||
|
bucket_id = np.abs(ar_errors).argmin()
|
||||||
|
reso = bucket_resos[bucket_id]
|
||||||
|
ar_error = ar_errors[bucket_id]
|
||||||
|
img_ar_errors.append(abs(ar_error))
|
||||||
|
|
||||||
|
# どのサイズにリサイズするか→トリミングする方向で
|
||||||
|
if ar_error <= 0: # 横が長い→縦を合わせる
|
||||||
|
scale = reso[1] / image.height
|
||||||
|
else:
|
||||||
|
scale = reso[0] / image.width
|
||||||
|
|
||||||
|
resized_size = (int(image.width * scale + .5), int(image.height * scale + .5))
|
||||||
|
|
||||||
|
# print(image.width, image.height, bucket_id, bucket_resos[bucket_id], ar_errors[bucket_id], resized_size,
|
||||||
|
# bucket_resos[bucket_id][0] - resized_size[0], bucket_resos[bucket_id][1] - resized_size[1])
|
||||||
|
|
||||||
|
assert resized_size[0] == reso[0] or resized_size[1] == reso[
|
||||||
|
1], f"internal error, resized size not match: {reso}, {resized_size}, {image.width}, {image.height}"
|
||||||
|
assert resized_size[0] >= reso[0] and resized_size[1] >= reso[
|
||||||
|
1], f"internal error, resized size too small: {reso}, {resized_size}, {image.width}, {image.height}"
|
||||||
|
|
||||||
|
# 画像をリサイズしてトリミングする
|
||||||
|
# PILにinter_areaがないのでcv2で……
|
||||||
|
image = np.array(image)
|
||||||
|
image = cv2.resize(image, resized_size, interpolation=cv2.INTER_AREA)
|
||||||
|
if resized_size[0] > reso[0]:
|
||||||
|
trim_size = resized_size[0] - reso[0]
|
||||||
|
image = image[:, trim_size//2:trim_size//2 + reso[0]]
|
||||||
|
elif resized_size[1] > reso[1]:
|
||||||
|
trim_size = resized_size[1] - reso[1]
|
||||||
|
image = image[trim_size//2:trim_size//2 + reso[1]]
|
||||||
|
assert image.shape[0] == reso[1] and image.shape[1] == reso[0], f"internal error, illegal trimmed size: {image.shape}, {reso}"
|
||||||
|
|
||||||
|
# # debug
|
||||||
|
# cv2.imwrite(f"r:\\test\\img_{i:05d}.jpg", image[:, :, ::-1])
|
||||||
|
|
||||||
|
# バッチへ追加
|
||||||
|
buckets_imgs[bucket_id].append((image_key, reso, image))
|
||||||
|
bucket_counts[bucket_id] += 1
|
||||||
|
metadata[image_key]['train_resolution'] = reso
|
||||||
|
|
||||||
|
# バッチを推論するか判定して推論する
|
||||||
|
is_last = i == len(image_paths) - 1
|
||||||
|
for j in range(len(buckets_imgs)):
|
||||||
|
bucket = buckets_imgs[j]
|
||||||
|
if (is_last and len(bucket) > 0) or len(bucket) >= args.batch_size:
|
||||||
|
latents = get_latents(vae, [img for _, _, img in bucket], weight_dtype)
|
||||||
|
|
||||||
|
for (image_key, reso, _), latent in zip(bucket, latents):
|
||||||
|
np.savez(os.path.join(args.train_data_dir, os.path.splitext(os.path.basename(image_key))[0]), latent)
|
||||||
|
|
||||||
|
# flip
|
||||||
|
if args.flip_aug:
|
||||||
|
latents = get_latents(vae, [img[:, ::-1].copy() for _, _, img in bucket], weight_dtype) # copyがないとTensor変換できない
|
||||||
|
|
||||||
|
for (image_key, reso, _), latent in zip(bucket, latents):
|
||||||
|
np.savez(os.path.join(args.train_data_dir, os.path.splitext(os.path.basename(image_key))[0] + '_flip'), latent)
|
||||||
|
|
||||||
|
bucket.clear()
|
||||||
|
|
||||||
|
for i, (reso, count) in enumerate(zip(bucket_resos, bucket_counts)):
|
||||||
|
print(f"bucket {i} {reso}: {count}")
|
||||||
|
img_ar_errors = np.array(img_ar_errors)
|
||||||
|
print(f"mean ar error: {np.mean(img_ar_errors)}")
|
||||||
|
|
||||||
|
# metadataを書き出して終わり
|
||||||
|
print(f"writing metadata: {args.out_json}")
|
||||||
|
with open(args.out_json, "wt", encoding='utf-8') as f:
|
||||||
|
json.dump(metadata, f, indent=2)
|
||||||
|
print("done!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("train_data_dir", type=str, help="directory for train images / 学習画像データのディレクトリ")
|
||||||
|
parser.add_argument("in_json", type=str, help="metadata file to input / 読み込むメタデータファイル")
|
||||||
|
parser.add_argument("out_json", type=str, help="metadata file to output / メタデータファイル書き出し先")
|
||||||
|
parser.add_argument("model_name_or_path", type=str, help="model name or path to encode latents / latentを取得するためのモデル")
|
||||||
|
parser.add_argument("--v2", action='store_true',
|
||||||
|
help='load Stable Diffusion v2.0 model / Stable Diffusion 2.0のモデルを読み込む')
|
||||||
|
parser.add_argument("--batch_size", type=int, default=1, help="batch size in inference / 推論時のバッチサイズ")
|
||||||
|
parser.add_argument("--max_resolution", type=str, default="512,512",
|
||||||
|
help="max resolution in fine tuning (width,height) / fine tuning時の最大画像サイズ 「幅,高さ」(使用メモリ量に関係します)")
|
||||||
|
parser.add_argument("--min_bucket_reso", type=int, default=256, help="minimum resolution for buckets / bucketの最小解像度")
|
||||||
|
parser.add_argument("--max_bucket_reso", type=int, default=1024, help="maximum resolution for buckets / bucketの最小解像度")
|
||||||
|
parser.add_argument("--mixed_precision", type=str, default="no",
|
||||||
|
choices=["no", "fp16", "bf16"], help="use mixed precision / 混合精度を使う場合、その精度")
|
||||||
|
parser.add_argument("--full_path", action="store_true",
|
||||||
|
help="use full path as image-key in metadata (supports multiple directories) / メタデータで画像キーをフルパスにする(複数の学習画像ディレクトリに対応)")
|
||||||
|
parser.add_argument("--flip_aug", action="store_true",
|
||||||
|
help="flip augmentation, save latents for flipped images / 左右反転した画像もlatentを取得、保存する")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args)
|
143
finetune/tag_images_by_wd14_tagger.py
Normal file
143
finetune/tag_images_by_wd14_tagger.py
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
# このスクリプトのライセンスは、Apache License 2.0とします
|
||||||
|
# (c) 2022 Kohya S. @kohya_ss
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import csv
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
import cv2
|
||||||
|
from tqdm import tqdm
|
||||||
|
import numpy as np
|
||||||
|
from tensorflow.keras.models import load_model
|
||||||
|
from huggingface_hub import hf_hub_download
|
||||||
|
|
||||||
|
# from wd14 tagger
|
||||||
|
IMAGE_SIZE = 448
|
||||||
|
|
||||||
|
WD14_TAGGER_REPO = 'SmilingWolf/wd-v1-4-vit-tagger'
|
||||||
|
FILES = ["keras_metadata.pb", "saved_model.pb", "selected_tags.csv"]
|
||||||
|
SUB_DIR = "variables"
|
||||||
|
SUB_DIR_FILES = ["variables.data-00000-of-00001", "variables.index"]
|
||||||
|
CSV_FILE = FILES[-1]
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
# hf_hub_downloadをそのまま使うとsymlink関係で問題があるらしいので、キャッシュディレクトリとforce_filenameを指定してなんとかする
|
||||||
|
# depreacatedの警告が出るけどなくなったらその時
|
||||||
|
# https://github.com/toriato/stable-diffusion-webui-wd14-tagger/issues/22
|
||||||
|
if not os.path.exists(args.model_dir) or args.force_download:
|
||||||
|
print("downloading wd14 tagger model from hf_hub")
|
||||||
|
for file in FILES:
|
||||||
|
hf_hub_download(args.repo_id, file, cache_dir=args.model_dir, force_download=True, force_filename=file)
|
||||||
|
for file in SUB_DIR_FILES:
|
||||||
|
hf_hub_download(args.repo_id, file, subfolder=SUB_DIR, cache_dir=os.path.join(
|
||||||
|
args.model_dir, SUB_DIR), force_download=True, force_filename=file)
|
||||||
|
|
||||||
|
# 画像を読み込む
|
||||||
|
image_paths = glob.glob(os.path.join(args.train_data_dir, "*.jpg")) + \
|
||||||
|
glob.glob(os.path.join(args.train_data_dir, "*.png")) + glob.glob(os.path.join(args.train_data_dir, "*.webp"))
|
||||||
|
print(f"found {len(image_paths)} images.")
|
||||||
|
|
||||||
|
print("loading model and labels")
|
||||||
|
model = load_model(args.model_dir)
|
||||||
|
|
||||||
|
# label_names = pd.read_csv("2022_0000_0899_6549/selected_tags.csv")
|
||||||
|
# 依存ライブラリを増やしたくないので自力で読むよ
|
||||||
|
with open(os.path.join(args.model_dir, CSV_FILE), "r", encoding="utf-8") as f:
|
||||||
|
reader = csv.reader(f)
|
||||||
|
l = [row for row in reader]
|
||||||
|
header = l[0] # tag_id,name,category,count
|
||||||
|
rows = l[1:]
|
||||||
|
assert header[0] == 'tag_id' and header[1] == 'name' and header[2] == 'category', f"unexpected csv format: {header}"
|
||||||
|
|
||||||
|
tags = [row[1] for row in rows[1:] if row[2] == '0'] # categoryが0、つまり通常のタグのみ
|
||||||
|
|
||||||
|
# 推論する
|
||||||
|
def run_batch(path_imgs):
|
||||||
|
imgs = np.array([im for _, im in path_imgs])
|
||||||
|
|
||||||
|
probs = model(imgs, training=False)
|
||||||
|
probs = probs.numpy()
|
||||||
|
|
||||||
|
for (image_path, _), prob in zip(path_imgs, probs):
|
||||||
|
# 最初の4つはratingなので無視する
|
||||||
|
# # First 4 labels are actually ratings: pick one with argmax
|
||||||
|
# ratings_names = label_names[:4]
|
||||||
|
# rating_index = ratings_names["probs"].argmax()
|
||||||
|
# found_rating = ratings_names[rating_index: rating_index + 1][["name", "probs"]]
|
||||||
|
|
||||||
|
# それ以降はタグなのでconfidenceがthresholdより高いものを追加する
|
||||||
|
# Everything else is tags: pick any where prediction confidence > threshold
|
||||||
|
tag_text = ""
|
||||||
|
for i, p in enumerate(prob[4:]): # numpyとか使うのが良いけど、まあそれほど数も多くないのでループで
|
||||||
|
if p >= args.thresh:
|
||||||
|
tag_text += ", " + tags[i]
|
||||||
|
|
||||||
|
if len(tag_text) > 0:
|
||||||
|
tag_text = tag_text[2:] # 最初の ", " を消す
|
||||||
|
|
||||||
|
with open(os.path.splitext(image_path)[0] + args.caption_extension, "wt", encoding='utf-8') as f:
|
||||||
|
f.write(tag_text + '\n')
|
||||||
|
if args.debug:
|
||||||
|
print(image_path, tag_text)
|
||||||
|
|
||||||
|
b_imgs = []
|
||||||
|
for image_path in tqdm(image_paths, smoothing=0.0):
|
||||||
|
img = Image.open(image_path) # cv2は日本語ファイル名で死ぬのとモード変換したいのでpillowで開く
|
||||||
|
if img.mode != 'RGB':
|
||||||
|
img = img.convert("RGB")
|
||||||
|
img = np.array(img)
|
||||||
|
img = img[:, :, ::-1] # RGB->BGR
|
||||||
|
|
||||||
|
# pad to square
|
||||||
|
size = max(img.shape[0:2])
|
||||||
|
pad_x = size - img.shape[1]
|
||||||
|
pad_y = size - img.shape[0]
|
||||||
|
pad_l = pad_x // 2
|
||||||
|
pad_t = pad_y // 2
|
||||||
|
img = np.pad(img, ((pad_t, pad_y - pad_t), (pad_l, pad_x - pad_l), (0, 0)), mode='constant', constant_values=255)
|
||||||
|
|
||||||
|
interp = cv2.INTER_AREA if size > IMAGE_SIZE else cv2.INTER_LANCZOS4
|
||||||
|
img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE), interpolation=interp)
|
||||||
|
# cv2.imshow("img", img)
|
||||||
|
# cv2.waitKey()
|
||||||
|
# cv2.destroyAllWindows()
|
||||||
|
|
||||||
|
img = img.astype(np.float32)
|
||||||
|
b_imgs.append((image_path, img))
|
||||||
|
|
||||||
|
if len(b_imgs) >= args.batch_size:
|
||||||
|
run_batch(b_imgs)
|
||||||
|
b_imgs.clear()
|
||||||
|
|
||||||
|
if len(b_imgs) > 0:
|
||||||
|
run_batch(b_imgs)
|
||||||
|
|
||||||
|
print("done!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("train_data_dir", type=str, help="directory for train images / 学習画像データのディレクトリ")
|
||||||
|
parser.add_argument("--repo_id", type=str, default=WD14_TAGGER_REPO,
|
||||||
|
help="repo id for wd14 tagger on Hugging Face / Hugging Faceのwd14 taggerのリポジトリID")
|
||||||
|
parser.add_argument("--model_dir", type=str, default="wd14_tagger_model",
|
||||||
|
help="directory to store wd14 tagger model / wd14 taggerのモデルを格納するディレクトリ")
|
||||||
|
parser.add_argument("--force_download", action='store_true',
|
||||||
|
help="force downloading wd14 tagger models / wd14 taggerのモデルを再ダウンロードします")
|
||||||
|
parser.add_argument("--thresh", type=float, default=0.35, help="threshold of confidence to add a tag / タグを追加するか判定する閾値")
|
||||||
|
parser.add_argument("--batch_size", type=int, default=1, help="batch size in inference / 推論時のバッチサイズ")
|
||||||
|
parser.add_argument("--caption_extention", type=str, default=None,
|
||||||
|
help="extension of caption file (for backward compatibility) / 出力されるキャプションファイルの拡張子(スペルミスしていたのを残してあります)")
|
||||||
|
parser.add_argument("--caption_extension", type=str, default=".txt", help="extension of caption file / 出力されるキャプションファイルの拡張子")
|
||||||
|
parser.add_argument("--debug", action="store_true", help="debug mode")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# スペルミスしていたオプションを復元する
|
||||||
|
if args.caption_extention is not None:
|
||||||
|
args.caption_extension = args.caption_extention
|
||||||
|
|
||||||
|
main(args)
|
795
finetune_gui.py
Normal file
795
finetune_gui.py
Normal file
@ -0,0 +1,795 @@
|
|||||||
|
import gradio as gr
|
||||||
|
import json
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import pathlib
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
# from easygui import fileopenbox, filesavebox, diropenbox, msgbox
|
||||||
|
from library.basic_caption_gui import gradio_basic_caption_gui_tab
|
||||||
|
from library.convert_model_gui import gradio_convert_model_tab
|
||||||
|
from library.blip_caption_gui import gradio_blip_caption_gui_tab
|
||||||
|
from library.wd14_caption_gui import gradio_wd14_caption_gui_tab
|
||||||
|
from library.common_gui import (
|
||||||
|
get_folder_path,
|
||||||
|
get_file_path,
|
||||||
|
get_saveasfile_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
folder_symbol = '\U0001f4c2' # 📂
|
||||||
|
refresh_symbol = '\U0001f504' # 🔄
|
||||||
|
save_style_symbol = '\U0001f4be' # 💾
|
||||||
|
document_symbol = '\U0001F4C4' # 📄
|
||||||
|
|
||||||
|
|
||||||
|
def save_configuration(
|
||||||
|
save_as,
|
||||||
|
file_path,
|
||||||
|
pretrained_model_name_or_path,
|
||||||
|
v2,
|
||||||
|
v_parameterization,
|
||||||
|
train_dir,
|
||||||
|
image_folder,
|
||||||
|
output_dir,
|
||||||
|
logging_dir,
|
||||||
|
max_resolution,
|
||||||
|
learning_rate,
|
||||||
|
lr_scheduler,
|
||||||
|
lr_warmup,
|
||||||
|
dataset_repeats,
|
||||||
|
train_batch_size,
|
||||||
|
epoch,
|
||||||
|
save_every_n_epochs,
|
||||||
|
mixed_precision,
|
||||||
|
save_precision,
|
||||||
|
seed,
|
||||||
|
num_cpu_threads_per_process,
|
||||||
|
train_text_encoder,
|
||||||
|
create_buckets,
|
||||||
|
create_caption,
|
||||||
|
train,
|
||||||
|
save_model_as,
|
||||||
|
caption_extension,
|
||||||
|
):
|
||||||
|
original_file_path = file_path
|
||||||
|
|
||||||
|
save_as_bool = True if save_as.get('label') == 'True' else False
|
||||||
|
|
||||||
|
if save_as_bool:
|
||||||
|
print('Save as...')
|
||||||
|
file_path = get_saveasfile_path(file_path)
|
||||||
|
else:
|
||||||
|
print('Save...')
|
||||||
|
if file_path == None or file_path == '':
|
||||||
|
file_path = get_saveasfile_path(file_path)
|
||||||
|
|
||||||
|
# print(file_path)
|
||||||
|
|
||||||
|
if file_path == None:
|
||||||
|
return original_file_path
|
||||||
|
|
||||||
|
# Return the values of the variables as a dictionary
|
||||||
|
variables = {
|
||||||
|
'pretrained_model_name_or_path': pretrained_model_name_or_path,
|
||||||
|
'v2': v2,
|
||||||
|
'v_parameterization': v_parameterization,
|
||||||
|
'train_dir': train_dir,
|
||||||
|
'image_folder': image_folder,
|
||||||
|
'output_dir': output_dir,
|
||||||
|
'logging_dir': logging_dir,
|
||||||
|
'max_resolution': max_resolution,
|
||||||
|
'learning_rate': learning_rate,
|
||||||
|
'lr_scheduler': lr_scheduler,
|
||||||
|
'lr_warmup': lr_warmup,
|
||||||
|
'dataset_repeats': dataset_repeats,
|
||||||
|
'train_batch_size': train_batch_size,
|
||||||
|
'epoch': epoch,
|
||||||
|
'save_every_n_epochs': save_every_n_epochs,
|
||||||
|
'mixed_precision': mixed_precision,
|
||||||
|
'save_precision': save_precision,
|
||||||
|
'seed': seed,
|
||||||
|
'num_cpu_threads_per_process': num_cpu_threads_per_process,
|
||||||
|
'train_text_encoder': train_text_encoder,
|
||||||
|
'create_buckets': create_buckets,
|
||||||
|
'create_caption': create_caption,
|
||||||
|
'train': train,
|
||||||
|
'save_model_as': save_model_as,
|
||||||
|
'caption_extension': caption_extension,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save the data to the selected file
|
||||||
|
# with open(file_path, 'w') as file:
|
||||||
|
# json.dump(variables, file)
|
||||||
|
# msgbox('File was saved...')
|
||||||
|
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
|
||||||
|
def open_config_file(
|
||||||
|
file_path,
|
||||||
|
pretrained_model_name_or_path,
|
||||||
|
v2,
|
||||||
|
v_parameterization,
|
||||||
|
train_dir,
|
||||||
|
image_folder,
|
||||||
|
output_dir,
|
||||||
|
logging_dir,
|
||||||
|
max_resolution,
|
||||||
|
learning_rate,
|
||||||
|
lr_scheduler,
|
||||||
|
lr_warmup,
|
||||||
|
dataset_repeats,
|
||||||
|
train_batch_size,
|
||||||
|
epoch,
|
||||||
|
save_every_n_epochs,
|
||||||
|
mixed_precision,
|
||||||
|
save_precision,
|
||||||
|
seed,
|
||||||
|
num_cpu_threads_per_process,
|
||||||
|
train_text_encoder,
|
||||||
|
create_buckets,
|
||||||
|
create_caption,
|
||||||
|
train,
|
||||||
|
save_model_as,
|
||||||
|
caption_extension,
|
||||||
|
):
|
||||||
|
original_file_path = file_path
|
||||||
|
file_path = get_file_path(file_path)
|
||||||
|
|
||||||
|
if file_path != '' and file_path != None:
|
||||||
|
print(file_path)
|
||||||
|
# load variables from JSON file
|
||||||
|
with open(file_path, 'r') as f:
|
||||||
|
my_data = json.load(f)
|
||||||
|
else:
|
||||||
|
file_path = original_file_path # In case a file_path was provided and the user decide to cancel the open action
|
||||||
|
my_data = {}
|
||||||
|
|
||||||
|
# Return the values of the variables as a dictionary
|
||||||
|
return (
|
||||||
|
file_path,
|
||||||
|
my_data.get(
|
||||||
|
'pretrained_model_name_or_path', pretrained_model_name_or_path
|
||||||
|
),
|
||||||
|
my_data.get('v2', v2),
|
||||||
|
my_data.get('v_parameterization', v_parameterization),
|
||||||
|
my_data.get('train_dir', train_dir),
|
||||||
|
my_data.get('image_folder', image_folder),
|
||||||
|
my_data.get('output_dir', output_dir),
|
||||||
|
my_data.get('logging_dir', logging_dir),
|
||||||
|
my_data.get('max_resolution', max_resolution),
|
||||||
|
my_data.get('learning_rate', learning_rate),
|
||||||
|
my_data.get('lr_scheduler', lr_scheduler),
|
||||||
|
my_data.get('lr_warmup', lr_warmup),
|
||||||
|
my_data.get('dataset_repeats', dataset_repeats),
|
||||||
|
my_data.get('train_batch_size', train_batch_size),
|
||||||
|
my_data.get('epoch', epoch),
|
||||||
|
my_data.get('save_every_n_epochs', save_every_n_epochs),
|
||||||
|
my_data.get('mixed_precision', mixed_precision),
|
||||||
|
my_data.get('save_precision', save_precision),
|
||||||
|
my_data.get('seed', seed),
|
||||||
|
my_data.get(
|
||||||
|
'num_cpu_threads_per_process', num_cpu_threads_per_process
|
||||||
|
),
|
||||||
|
my_data.get('train_text_encoder', train_text_encoder),
|
||||||
|
my_data.get('create_buckets', create_buckets),
|
||||||
|
my_data.get('create_caption', create_caption),
|
||||||
|
my_data.get('train', train),
|
||||||
|
my_data.get('save_model_as', save_model_as),
|
||||||
|
my_data.get('caption_extension', caption_extension),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def train_model(
|
||||||
|
generate_caption_database,
|
||||||
|
generate_image_buckets,
|
||||||
|
train,
|
||||||
|
pretrained_model_name_or_path,
|
||||||
|
v2,
|
||||||
|
v_parameterization,
|
||||||
|
train_dir,
|
||||||
|
image_folder,
|
||||||
|
output_dir,
|
||||||
|
logging_dir,
|
||||||
|
max_resolution,
|
||||||
|
learning_rate,
|
||||||
|
lr_scheduler,
|
||||||
|
lr_warmup,
|
||||||
|
dataset_repeats,
|
||||||
|
train_batch_size,
|
||||||
|
epoch,
|
||||||
|
save_every_n_epochs,
|
||||||
|
mixed_precision,
|
||||||
|
save_precision,
|
||||||
|
seed,
|
||||||
|
num_cpu_threads_per_process,
|
||||||
|
train_text_encoder,
|
||||||
|
save_model_as,
|
||||||
|
caption_extension,
|
||||||
|
):
|
||||||
|
def save_inference_file(output_dir, v2, v_parameterization):
|
||||||
|
# Copy inference model for v2 if required
|
||||||
|
if v2 and v_parameterization:
|
||||||
|
print(f'Saving v2-inference-v.yaml as {output_dir}/last.yaml')
|
||||||
|
shutil.copy(
|
||||||
|
f'./v2_inference/v2-inference-v.yaml',
|
||||||
|
f'{output_dir}/last.yaml',
|
||||||
|
)
|
||||||
|
elif v2:
|
||||||
|
print(f'Saving v2-inference.yaml as {output_dir}/last.yaml')
|
||||||
|
shutil.copy(
|
||||||
|
f'./v2_inference/v2-inference.yaml',
|
||||||
|
f'{output_dir}/last.yaml',
|
||||||
|
)
|
||||||
|
|
||||||
|
# create caption json file
|
||||||
|
if generate_caption_database:
|
||||||
|
if not os.path.exists(train_dir):
|
||||||
|
os.mkdir(train_dir)
|
||||||
|
|
||||||
|
run_cmd = (
|
||||||
|
f'./venv/Scripts/python.exe finetune/merge_captions_to_metadata.py'
|
||||||
|
)
|
||||||
|
if caption_extension == '':
|
||||||
|
run_cmd += f' --caption_extension=".txt"'
|
||||||
|
else:
|
||||||
|
run_cmd += f' --caption_extension={caption_extension}'
|
||||||
|
run_cmd += f' {image_folder}'
|
||||||
|
run_cmd += f' {train_dir}/meta_cap.json'
|
||||||
|
run_cmd += f' --full_path'
|
||||||
|
|
||||||
|
print(run_cmd)
|
||||||
|
|
||||||
|
# Run the command
|
||||||
|
subprocess.run(run_cmd)
|
||||||
|
|
||||||
|
# create images buckets
|
||||||
|
if generate_image_buckets:
|
||||||
|
command = [
|
||||||
|
'./venv/Scripts/python.exe',
|
||||||
|
'finetune/prepare_buckets_latents.py',
|
||||||
|
image_folder,
|
||||||
|
'{}/meta_cap.json'.format(train_dir),
|
||||||
|
'{}/meta_lat.json'.format(train_dir),
|
||||||
|
pretrained_model_name_or_path,
|
||||||
|
'--batch_size',
|
||||||
|
'4',
|
||||||
|
'--max_resolution',
|
||||||
|
max_resolution,
|
||||||
|
'--mixed_precision',
|
||||||
|
mixed_precision,
|
||||||
|
'--full_path',
|
||||||
|
]
|
||||||
|
|
||||||
|
print(command)
|
||||||
|
|
||||||
|
# Run the command
|
||||||
|
subprocess.run(command)
|
||||||
|
|
||||||
|
if train:
|
||||||
|
image_num = len(
|
||||||
|
[f for f in os.listdir(image_folder) if f.endswith('.npz')]
|
||||||
|
)
|
||||||
|
print(f'image_num = {image_num}')
|
||||||
|
|
||||||
|
repeats = int(image_num) * int(dataset_repeats)
|
||||||
|
print(f'repeats = {str(repeats)}')
|
||||||
|
|
||||||
|
# calculate max_train_steps
|
||||||
|
max_train_steps = int(
|
||||||
|
math.ceil(float(repeats) / int(train_batch_size) * int(epoch))
|
||||||
|
)
|
||||||
|
print(f'max_train_steps = {max_train_steps}')
|
||||||
|
|
||||||
|
lr_warmup_steps = round(
|
||||||
|
float(int(lr_warmup) * int(max_train_steps) / 100)
|
||||||
|
)
|
||||||
|
print(f'lr_warmup_steps = {lr_warmup_steps}')
|
||||||
|
|
||||||
|
run_cmd = f'accelerate launch --num_cpu_threads_per_process={num_cpu_threads_per_process} "./fine_tune.py"'
|
||||||
|
if v2:
|
||||||
|
run_cmd += ' --v2'
|
||||||
|
if v_parameterization:
|
||||||
|
run_cmd += ' --v_parameterization'
|
||||||
|
if train_text_encoder:
|
||||||
|
run_cmd += ' --train_text_encoder'
|
||||||
|
run_cmd += (
|
||||||
|
f' --pretrained_model_name_or_path={pretrained_model_name_or_path}'
|
||||||
|
)
|
||||||
|
run_cmd += f' --in_json={train_dir}/meta_lat.json'
|
||||||
|
run_cmd += f' --train_data_dir={image_folder}'
|
||||||
|
run_cmd += f' --output_dir={output_dir}'
|
||||||
|
if not logging_dir == '':
|
||||||
|
run_cmd += f' --logging_dir={logging_dir}'
|
||||||
|
run_cmd += f' --train_batch_size={train_batch_size}'
|
||||||
|
run_cmd += f' --dataset_repeats={dataset_repeats}'
|
||||||
|
run_cmd += f' --learning_rate={learning_rate}'
|
||||||
|
run_cmd += f' --lr_scheduler={lr_scheduler}'
|
||||||
|
run_cmd += f' --lr_warmup_steps={lr_warmup_steps}'
|
||||||
|
run_cmd += f' --max_train_steps={max_train_steps}'
|
||||||
|
run_cmd += f' --use_8bit_adam'
|
||||||
|
run_cmd += f' --xformers'
|
||||||
|
run_cmd += f' --mixed_precision={mixed_precision}'
|
||||||
|
run_cmd += f' --save_every_n_epochs={save_every_n_epochs}'
|
||||||
|
run_cmd += f' --seed={seed}'
|
||||||
|
run_cmd += f' --save_precision={save_precision}'
|
||||||
|
if not save_model_as == 'same as source model':
|
||||||
|
run_cmd += f' --save_model_as={save_model_as}'
|
||||||
|
|
||||||
|
print(run_cmd)
|
||||||
|
# Run the command
|
||||||
|
subprocess.run(run_cmd)
|
||||||
|
|
||||||
|
# check if output_dir/last is a folder... therefore it is a diffuser model
|
||||||
|
last_dir = pathlib.Path(f'{output_dir}/last')
|
||||||
|
|
||||||
|
if not last_dir.is_dir():
|
||||||
|
# Copy inference model for v2 if required
|
||||||
|
save_inference_file(output_dir, v2, v_parameterization)
|
||||||
|
|
||||||
|
|
||||||
|
def set_pretrained_model_name_or_path_input(value, v2, v_parameterization):
|
||||||
|
# define a list of substrings to search for
|
||||||
|
substrings_v2 = [
|
||||||
|
'stabilityai/stable-diffusion-2-1-base',
|
||||||
|
'stabilityai/stable-diffusion-2-base',
|
||||||
|
]
|
||||||
|
|
||||||
|
# check if $v2 and $v_parameterization are empty and if $pretrained_model_name_or_path contains any of the substrings in the v2 list
|
||||||
|
if str(value) in substrings_v2:
|
||||||
|
print('SD v2 model detected. Setting --v2 parameter')
|
||||||
|
v2 = True
|
||||||
|
v_parameterization = False
|
||||||
|
|
||||||
|
return value, v2, v_parameterization
|
||||||
|
|
||||||
|
# define a list of substrings to search for v-objective
|
||||||
|
substrings_v_parameterization = [
|
||||||
|
'stabilityai/stable-diffusion-2-1',
|
||||||
|
'stabilityai/stable-diffusion-2',
|
||||||
|
]
|
||||||
|
|
||||||
|
# check if $v2 and $v_parameterization are empty and if $pretrained_model_name_or_path contains any of the substrings in the v_parameterization list
|
||||||
|
if str(value) in substrings_v_parameterization:
|
||||||
|
print(
|
||||||
|
'SD v2 v_parameterization detected. Setting --v2 parameter and --v_parameterization'
|
||||||
|
)
|
||||||
|
v2 = True
|
||||||
|
v_parameterization = True
|
||||||
|
|
||||||
|
return value, v2, v_parameterization
|
||||||
|
|
||||||
|
# define a list of substrings to v1.x
|
||||||
|
substrings_v1_model = [
|
||||||
|
'CompVis/stable-diffusion-v1-4',
|
||||||
|
'runwayml/stable-diffusion-v1-5',
|
||||||
|
]
|
||||||
|
|
||||||
|
if str(value) in substrings_v1_model:
|
||||||
|
v2 = False
|
||||||
|
v_parameterization = False
|
||||||
|
|
||||||
|
return value, v2, v_parameterization
|
||||||
|
|
||||||
|
if value == 'custom':
|
||||||
|
value = ''
|
||||||
|
v2 = False
|
||||||
|
v_parameterization = False
|
||||||
|
|
||||||
|
return value, v2, v_parameterization
|
||||||
|
|
||||||
|
|
||||||
|
def remove_doublequote(file_path):
|
||||||
|
if file_path != None:
|
||||||
|
file_path = file_path.replace('"', '')
|
||||||
|
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
|
||||||
|
css = ''
|
||||||
|
|
||||||
|
if os.path.exists('./style.css'):
|
||||||
|
with open(os.path.join('./style.css'), 'r', encoding='utf8') as file:
|
||||||
|
print('Load CSS...')
|
||||||
|
css += file.read() + '\n'
|
||||||
|
|
||||||
|
interface = gr.Blocks(css=css)
|
||||||
|
|
||||||
|
with interface:
|
||||||
|
dummy_true = gr.Label(value=True, visible=False)
|
||||||
|
dummy_false = gr.Label(value=False, visible=False)
|
||||||
|
with gr.Tab('Finetuning'):
|
||||||
|
gr.Markdown('Enter kohya finetuner parameter using this interface.')
|
||||||
|
with gr.Accordion('Configuration File Load/Save', open=False):
|
||||||
|
with gr.Row():
|
||||||
|
button_open_config = gr.Button(
|
||||||
|
f'Open {folder_symbol}', elem_id='open_folder'
|
||||||
|
)
|
||||||
|
button_save_config = gr.Button(
|
||||||
|
f'Save {save_style_symbol}', elem_id='open_folder'
|
||||||
|
)
|
||||||
|
button_save_as_config = gr.Button(
|
||||||
|
f'Save as... {save_style_symbol}', elem_id='open_folder'
|
||||||
|
)
|
||||||
|
config_file_name = gr.Textbox(
|
||||||
|
label='', placeholder='type file path or use buttons...'
|
||||||
|
)
|
||||||
|
config_file_name.change(
|
||||||
|
remove_doublequote,
|
||||||
|
inputs=[config_file_name],
|
||||||
|
outputs=[config_file_name],
|
||||||
|
)
|
||||||
|
with gr.Tab('Source model'):
|
||||||
|
# Define the input elements
|
||||||
|
with gr.Row():
|
||||||
|
pretrained_model_name_or_path_input = gr.Textbox(
|
||||||
|
label='Pretrained model name or path',
|
||||||
|
placeholder='enter the path to custom model or name of pretrained model',
|
||||||
|
)
|
||||||
|
pretrained_model_name_or_path_file = gr.Button(
|
||||||
|
document_symbol, elem_id='open_folder_small'
|
||||||
|
)
|
||||||
|
pretrained_model_name_or_path_file.click(
|
||||||
|
get_file_path,
|
||||||
|
inputs=pretrained_model_name_or_path_input,
|
||||||
|
outputs=pretrained_model_name_or_path_input,
|
||||||
|
)
|
||||||
|
pretrained_model_name_or_path_folder = gr.Button(
|
||||||
|
folder_symbol, elem_id='open_folder_small'
|
||||||
|
)
|
||||||
|
pretrained_model_name_or_path_folder.click(
|
||||||
|
get_folder_path,
|
||||||
|
inputs=pretrained_model_name_or_path_input,
|
||||||
|
outputs=pretrained_model_name_or_path_input,
|
||||||
|
)
|
||||||
|
model_list = gr.Dropdown(
|
||||||
|
label='(Optional) Model Quick Pick',
|
||||||
|
choices=[
|
||||||
|
'custom',
|
||||||
|
'stabilityai/stable-diffusion-2-1-base',
|
||||||
|
'stabilityai/stable-diffusion-2-base',
|
||||||
|
'stabilityai/stable-diffusion-2-1',
|
||||||
|
'stabilityai/stable-diffusion-2',
|
||||||
|
'runwayml/stable-diffusion-v1-5',
|
||||||
|
'CompVis/stable-diffusion-v1-4',
|
||||||
|
],
|
||||||
|
)
|
||||||
|
save_model_as_dropdown = gr.Dropdown(
|
||||||
|
label='Save trained model as',
|
||||||
|
choices=[
|
||||||
|
'same as source model',
|
||||||
|
'ckpt',
|
||||||
|
'diffusers',
|
||||||
|
'diffusers_safetensors',
|
||||||
|
'safetensors',
|
||||||
|
],
|
||||||
|
value='same as source model',
|
||||||
|
)
|
||||||
|
|
||||||
|
with gr.Row():
|
||||||
|
v2_input = gr.Checkbox(label='v2', value=True)
|
||||||
|
v_parameterization_input = gr.Checkbox(
|
||||||
|
label='v_parameterization', value=False
|
||||||
|
)
|
||||||
|
model_list.change(
|
||||||
|
set_pretrained_model_name_or_path_input,
|
||||||
|
inputs=[model_list, v2_input, v_parameterization_input],
|
||||||
|
outputs=[
|
||||||
|
pretrained_model_name_or_path_input,
|
||||||
|
v2_input,
|
||||||
|
v_parameterization_input,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
with gr.Tab('Directories'):
|
||||||
|
with gr.Row():
|
||||||
|
train_dir_input = gr.Textbox(
|
||||||
|
label='Training config folder',
|
||||||
|
placeholder='folder where the training configuration files will be saved',
|
||||||
|
)
|
||||||
|
train_dir_folder = gr.Button(
|
||||||
|
folder_symbol, elem_id='open_folder_small'
|
||||||
|
)
|
||||||
|
train_dir_folder.click(
|
||||||
|
get_folder_path, outputs=train_dir_input
|
||||||
|
)
|
||||||
|
|
||||||
|
image_folder_input = gr.Textbox(
|
||||||
|
label='Training Image folder',
|
||||||
|
placeholder='folder where the training images are located',
|
||||||
|
)
|
||||||
|
image_folder_input_folder = gr.Button(
|
||||||
|
folder_symbol, elem_id='open_folder_small'
|
||||||
|
)
|
||||||
|
image_folder_input_folder.click(
|
||||||
|
get_folder_path, outputs=image_folder_input
|
||||||
|
)
|
||||||
|
with gr.Row():
|
||||||
|
output_dir_input = gr.Textbox(
|
||||||
|
label='Output folder',
|
||||||
|
placeholder='folder where the model will be saved',
|
||||||
|
)
|
||||||
|
output_dir_input_folder = gr.Button(
|
||||||
|
folder_symbol, elem_id='open_folder_small'
|
||||||
|
)
|
||||||
|
output_dir_input_folder.click(
|
||||||
|
get_folder_path, outputs=output_dir_input
|
||||||
|
)
|
||||||
|
|
||||||
|
logging_dir_input = gr.Textbox(
|
||||||
|
label='Logging folder',
|
||||||
|
placeholder='Optional: enable logging and output TensorBoard log to this folder',
|
||||||
|
)
|
||||||
|
logging_dir_input_folder = gr.Button(
|
||||||
|
folder_symbol, elem_id='open_folder_small'
|
||||||
|
)
|
||||||
|
logging_dir_input_folder.click(
|
||||||
|
get_folder_path, outputs=logging_dir_input
|
||||||
|
)
|
||||||
|
train_dir_input.change(
|
||||||
|
remove_doublequote,
|
||||||
|
inputs=[train_dir_input],
|
||||||
|
outputs=[train_dir_input],
|
||||||
|
)
|
||||||
|
image_folder_input.change(
|
||||||
|
remove_doublequote,
|
||||||
|
inputs=[image_folder_input],
|
||||||
|
outputs=[image_folder_input],
|
||||||
|
)
|
||||||
|
output_dir_input.change(
|
||||||
|
remove_doublequote,
|
||||||
|
inputs=[output_dir_input],
|
||||||
|
outputs=[output_dir_input],
|
||||||
|
)
|
||||||
|
with gr.Tab('Training parameters'):
|
||||||
|
with gr.Row():
|
||||||
|
learning_rate_input = gr.Textbox(
|
||||||
|
label='Learning rate', value=1e-6
|
||||||
|
)
|
||||||
|
lr_scheduler_input = gr.Dropdown(
|
||||||
|
label='LR Scheduler',
|
||||||
|
choices=[
|
||||||
|
'constant',
|
||||||
|
'constant_with_warmup',
|
||||||
|
'cosine',
|
||||||
|
'cosine_with_restarts',
|
||||||
|
'linear',
|
||||||
|
'polynomial',
|
||||||
|
],
|
||||||
|
value='constant',
|
||||||
|
)
|
||||||
|
lr_warmup_input = gr.Textbox(label='LR warmup', value=0)
|
||||||
|
with gr.Row():
|
||||||
|
dataset_repeats_input = gr.Textbox(
|
||||||
|
label='Dataset repeats', value=40
|
||||||
|
)
|
||||||
|
train_batch_size_input = gr.Slider(
|
||||||
|
minimum=1,
|
||||||
|
maximum=32,
|
||||||
|
label='Train batch size',
|
||||||
|
value=1,
|
||||||
|
step=1,
|
||||||
|
)
|
||||||
|
epoch_input = gr.Textbox(label='Epoch', value=1)
|
||||||
|
save_every_n_epochs_input = gr.Textbox(
|
||||||
|
label='Save every N epochs', value=1
|
||||||
|
)
|
||||||
|
with gr.Row():
|
||||||
|
mixed_precision_input = gr.Dropdown(
|
||||||
|
label='Mixed precision',
|
||||||
|
choices=[
|
||||||
|
'no',
|
||||||
|
'fp16',
|
||||||
|
'bf16',
|
||||||
|
],
|
||||||
|
value='fp16',
|
||||||
|
)
|
||||||
|
save_precision_input = gr.Dropdown(
|
||||||
|
label='Save precision',
|
||||||
|
choices=[
|
||||||
|
'float',
|
||||||
|
'fp16',
|
||||||
|
'bf16',
|
||||||
|
],
|
||||||
|
value='fp16',
|
||||||
|
)
|
||||||
|
num_cpu_threads_per_process_input = gr.Slider(
|
||||||
|
minimum=1,
|
||||||
|
maximum=os.cpu_count(),
|
||||||
|
step=1,
|
||||||
|
label='Number of CPU threads per process',
|
||||||
|
value=os.cpu_count(),
|
||||||
|
)
|
||||||
|
with gr.Row():
|
||||||
|
seed_input = gr.Textbox(label='Seed', value=1234)
|
||||||
|
max_resolution_input = gr.Textbox(
|
||||||
|
label='Max resolution', value='512,512'
|
||||||
|
)
|
||||||
|
with gr.Row():
|
||||||
|
caption_extention_input = gr.Textbox(
|
||||||
|
label='Caption Extension',
|
||||||
|
placeholder='(Optional) Extension for caption files. default: .txt',
|
||||||
|
)
|
||||||
|
train_text_encoder_input = gr.Checkbox(
|
||||||
|
label='Train text encoder', value=True
|
||||||
|
)
|
||||||
|
with gr.Box():
|
||||||
|
with gr.Row():
|
||||||
|
create_caption = gr.Checkbox(
|
||||||
|
label='Generate caption database', value=True
|
||||||
|
)
|
||||||
|
create_buckets = gr.Checkbox(
|
||||||
|
label='Generate image buckets', value=True
|
||||||
|
)
|
||||||
|
train = gr.Checkbox(label='Train model', value=True)
|
||||||
|
|
||||||
|
button_run = gr.Button('Run')
|
||||||
|
|
||||||
|
button_run.click(
|
||||||
|
train_model,
|
||||||
|
inputs=[
|
||||||
|
create_caption,
|
||||||
|
create_buckets,
|
||||||
|
train,
|
||||||
|
pretrained_model_name_or_path_input,
|
||||||
|
v2_input,
|
||||||
|
v_parameterization_input,
|
||||||
|
train_dir_input,
|
||||||
|
image_folder_input,
|
||||||
|
output_dir_input,
|
||||||
|
logging_dir_input,
|
||||||
|
max_resolution_input,
|
||||||
|
learning_rate_input,
|
||||||
|
lr_scheduler_input,
|
||||||
|
lr_warmup_input,
|
||||||
|
dataset_repeats_input,
|
||||||
|
train_batch_size_input,
|
||||||
|
epoch_input,
|
||||||
|
save_every_n_epochs_input,
|
||||||
|
mixed_precision_input,
|
||||||
|
save_precision_input,
|
||||||
|
seed_input,
|
||||||
|
num_cpu_threads_per_process_input,
|
||||||
|
train_text_encoder_input,
|
||||||
|
save_model_as_dropdown,
|
||||||
|
caption_extention_input,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
button_open_config.click(
|
||||||
|
open_config_file,
|
||||||
|
inputs=[
|
||||||
|
config_file_name,
|
||||||
|
pretrained_model_name_or_path_input,
|
||||||
|
v2_input,
|
||||||
|
v_parameterization_input,
|
||||||
|
train_dir_input,
|
||||||
|
image_folder_input,
|
||||||
|
output_dir_input,
|
||||||
|
logging_dir_input,
|
||||||
|
max_resolution_input,
|
||||||
|
learning_rate_input,
|
||||||
|
lr_scheduler_input,
|
||||||
|
lr_warmup_input,
|
||||||
|
dataset_repeats_input,
|
||||||
|
train_batch_size_input,
|
||||||
|
epoch_input,
|
||||||
|
save_every_n_epochs_input,
|
||||||
|
mixed_precision_input,
|
||||||
|
save_precision_input,
|
||||||
|
seed_input,
|
||||||
|
num_cpu_threads_per_process_input,
|
||||||
|
train_text_encoder_input,
|
||||||
|
create_buckets,
|
||||||
|
create_caption,
|
||||||
|
train,
|
||||||
|
save_model_as_dropdown,
|
||||||
|
caption_extention_input,
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
config_file_name,
|
||||||
|
pretrained_model_name_or_path_input,
|
||||||
|
v2_input,
|
||||||
|
v_parameterization_input,
|
||||||
|
train_dir_input,
|
||||||
|
image_folder_input,
|
||||||
|
output_dir_input,
|
||||||
|
logging_dir_input,
|
||||||
|
max_resolution_input,
|
||||||
|
learning_rate_input,
|
||||||
|
lr_scheduler_input,
|
||||||
|
lr_warmup_input,
|
||||||
|
dataset_repeats_input,
|
||||||
|
train_batch_size_input,
|
||||||
|
epoch_input,
|
||||||
|
save_every_n_epochs_input,
|
||||||
|
mixed_precision_input,
|
||||||
|
save_precision_input,
|
||||||
|
seed_input,
|
||||||
|
num_cpu_threads_per_process_input,
|
||||||
|
train_text_encoder_input,
|
||||||
|
create_buckets,
|
||||||
|
create_caption,
|
||||||
|
train,
|
||||||
|
save_model_as_dropdown,
|
||||||
|
caption_extention_input,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
button_save_config.click(
|
||||||
|
save_configuration,
|
||||||
|
inputs=[
|
||||||
|
dummy_false,
|
||||||
|
config_file_name,
|
||||||
|
pretrained_model_name_or_path_input,
|
||||||
|
v2_input,
|
||||||
|
v_parameterization_input,
|
||||||
|
train_dir_input,
|
||||||
|
image_folder_input,
|
||||||
|
output_dir_input,
|
||||||
|
logging_dir_input,
|
||||||
|
max_resolution_input,
|
||||||
|
learning_rate_input,
|
||||||
|
lr_scheduler_input,
|
||||||
|
lr_warmup_input,
|
||||||
|
dataset_repeats_input,
|
||||||
|
train_batch_size_input,
|
||||||
|
epoch_input,
|
||||||
|
save_every_n_epochs_input,
|
||||||
|
mixed_precision_input,
|
||||||
|
save_precision_input,
|
||||||
|
seed_input,
|
||||||
|
num_cpu_threads_per_process_input,
|
||||||
|
train_text_encoder_input,
|
||||||
|
create_buckets,
|
||||||
|
create_caption,
|
||||||
|
train,
|
||||||
|
save_model_as_dropdown,
|
||||||
|
caption_extention_input,
|
||||||
|
],
|
||||||
|
outputs=[config_file_name],
|
||||||
|
)
|
||||||
|
|
||||||
|
button_save_as_config.click(
|
||||||
|
save_configuration,
|
||||||
|
inputs=[
|
||||||
|
dummy_true,
|
||||||
|
config_file_name,
|
||||||
|
pretrained_model_name_or_path_input,
|
||||||
|
v2_input,
|
||||||
|
v_parameterization_input,
|
||||||
|
train_dir_input,
|
||||||
|
image_folder_input,
|
||||||
|
output_dir_input,
|
||||||
|
logging_dir_input,
|
||||||
|
max_resolution_input,
|
||||||
|
learning_rate_input,
|
||||||
|
lr_scheduler_input,
|
||||||
|
lr_warmup_input,
|
||||||
|
dataset_repeats_input,
|
||||||
|
train_batch_size_input,
|
||||||
|
epoch_input,
|
||||||
|
save_every_n_epochs_input,
|
||||||
|
mixed_precision_input,
|
||||||
|
save_precision_input,
|
||||||
|
seed_input,
|
||||||
|
num_cpu_threads_per_process_input,
|
||||||
|
train_text_encoder_input,
|
||||||
|
create_buckets,
|
||||||
|
create_caption,
|
||||||
|
train,
|
||||||
|
save_model_as_dropdown,
|
||||||
|
caption_extention_input,
|
||||||
|
],
|
||||||
|
outputs=[config_file_name],
|
||||||
|
)
|
||||||
|
|
||||||
|
with gr.Tab('Utilities'):
|
||||||
|
gradio_basic_caption_gui_tab()
|
||||||
|
gradio_blip_caption_gui_tab()
|
||||||
|
gradio_wd14_caption_gui_tab()
|
||||||
|
gradio_convert_model_tab()
|
||||||
|
|
||||||
|
|
||||||
|
# Show the interface
|
||||||
|
interface.launch()
|
@ -28,7 +28,7 @@ def caption_images(
|
|||||||
return
|
return
|
||||||
|
|
||||||
print(f'Captioning files in {train_data_dir}...')
|
print(f'Captioning files in {train_data_dir}...')
|
||||||
run_cmd = f'.\\venv\\Scripts\\python.exe "./BLIP_caption/make_captions.py"'
|
run_cmd = f'.\\venv\\Scripts\\python.exe "finetune/make_captions.py"'
|
||||||
run_cmd += f' --batch_size="{int(batch_size)}"'
|
run_cmd += f' --batch_size="{int(batch_size)}"'
|
||||||
run_cmd += f' --num_beams="{int(num_beams)}"'
|
run_cmd += f' --num_beams="{int(num_beams)}"'
|
||||||
run_cmd += f' --top_p="{top_p}"'
|
run_cmd += f' --top_p="{top_p}"'
|
||||||
@ -39,7 +39,7 @@ def caption_images(
|
|||||||
if caption_file_ext != '':
|
if caption_file_ext != '':
|
||||||
run_cmd += f' --caption_extension="{caption_file_ext}"'
|
run_cmd += f' --caption_extension="{caption_file_ext}"'
|
||||||
run_cmd += f' "{train_data_dir}"'
|
run_cmd += f' "{train_data_dir}"'
|
||||||
run_cmd += f' "https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_large_caption.pth"'
|
run_cmd += f' --caption_weights="https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_large_caption.pth"'
|
||||||
|
|
||||||
print(run_cmd)
|
print(run_cmd)
|
||||||
|
|
||||||
|
@ -72,6 +72,10 @@ def get_saveasfile_path(file_path='', defaultextension='.json'):
|
|||||||
def add_pre_postfix(
|
def add_pre_postfix(
|
||||||
folder='', prefix='', postfix='', caption_file_ext='.caption'
|
folder='', prefix='', postfix='', caption_file_ext='.caption'
|
||||||
):
|
):
|
||||||
|
# set caption extention to default in case it was not provided
|
||||||
|
if caption_file_ext == '':
|
||||||
|
caption_file_ext = '.caption'
|
||||||
|
|
||||||
files = [f for f in os.listdir(folder) if f.endswith(caption_file_ext)]
|
files = [f for f in os.listdir(folder) if f.endswith(caption_file_ext)]
|
||||||
if not prefix == '':
|
if not prefix == '':
|
||||||
prefix = f'{prefix} '
|
prefix = f'{prefix} '
|
||||||
|
@ -16,7 +16,7 @@ def caption_images(train_data_dir, caption_extension, batch_size, thresh):
|
|||||||
return
|
return
|
||||||
|
|
||||||
print(f'Captioning files in {train_data_dir}...')
|
print(f'Captioning files in {train_data_dir}...')
|
||||||
run_cmd = f'accelerate launch "./script/tag_images_by_wd14_tagger.py"'
|
run_cmd = f'accelerate launch "./finetune/tag_images_by_wd14_tagger.py"'
|
||||||
run_cmd += f' --batch_size="{int(batch_size)}"'
|
run_cmd += f' --batch_size="{int(batch_size)}"'
|
||||||
run_cmd += f' --thresh="{thresh}"'
|
run_cmd += f' --thresh="{thresh}"'
|
||||||
if caption_extension != '':
|
if caption_extension != '':
|
||||||
|
609
mytraining.ps
609
mytraining.ps
@ -1,609 +0,0 @@
|
|||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\v1-5-pruned.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_bernard\train_man" `
|
|
||||||
--reg_data_dir="D:\dreambooth\train_bernard\reg_man" `
|
|
||||||
--output_dir="D:\dreambooth\train_bernard" `
|
|
||||||
--prior_loss_weight=1.0 `
|
|
||||||
--resolution="512,512" `
|
|
||||||
--train_batch_size=1 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=3000 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--gradient_checkpointing `
|
|
||||||
--save_every_n_epochs=1
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\bernard\asd man-3000-remgb-sd15.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_bernard\train_man" `
|
|
||||||
--reg_data_dir="D:\dreambooth\train_bernard\reg_man" `
|
|
||||||
--output_dir="D:\dreambooth\train_bernard" `
|
|
||||||
--prior_loss_weight=1.0 `
|
|
||||||
--resolution="512,512" `
|
|
||||||
--train_batch_size=1 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=1500 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--gradient_checkpointing `
|
|
||||||
--save_every_n_epochs=1
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\v1-5-pruned-mse-vae.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_bernard\train_man" `
|
|
||||||
--reg_data_dir="D:\dreambooth\train_bernard\reg_man" `
|
|
||||||
--output_dir="D:\dreambooth\train_bernard" `
|
|
||||||
--prior_loss_weight=1.0 `
|
|
||||||
--resolution="512,512" `
|
|
||||||
--train_batch_size=1 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=4500 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--gradient_checkpointing `
|
|
||||||
--no_token_padding `
|
|
||||||
--save_every_n_epochs=1
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\v1-5-pruned-mse-vae.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\source\alex\train" `
|
|
||||||
--output_dir="D:\dreambooth\train_alex" `
|
|
||||||
--prior_loss_weight=1.0 `
|
|
||||||
--resolution="448,640" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=4500 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--shuffle_caption
|
|
||||||
|
|
||||||
# -fine_tuning
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\v1-5-pruned-mse-vae.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\source\alex\train\50_portrait-pp" `
|
|
||||||
--output_dir="D:\dreambooth\train_alex" `
|
|
||||||
--resolution="448,640" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=4500 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--shuffle_caption
|
|
||||||
|
|
||||||
Resume:
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\v1-5-pruned-mse-vae.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\source\alet_et_bernard\landscape-pp" `
|
|
||||||
--output_dir="D:\dreambooth\train_alex_and_bernard" `
|
|
||||||
--resolution="640,448" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=550 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--fine_tuning_repeat=200 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
# Mollie Monger
|
|
||||||
|
|
||||||
e1:
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\v1-5-pruned-mse-vae.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_mollie_monger\landscape-pp" `
|
|
||||||
--output_dir="D:\dreambooth\train_mollie_monger\output" `
|
|
||||||
--resolution="640,448" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=625 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--fine_tuning_repeat=200 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\mollie_monger-kohya-l-200-sd15.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_mollie_monger\portrait-pp" `
|
|
||||||
--output_dir="D:\dreambooth\train_mollie_monger\output" `
|
|
||||||
--resolution="448,640" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=1275 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--fine_tuning_repeat=200 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\mollie_monger-kohya-l+p-200-sd15.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_mollie_monger\square-pp" `
|
|
||||||
--output_dir="D:\dreambooth\train_mollie_monger\output" `
|
|
||||||
--resolution="512,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=500 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--fine_tuning_repeat=200 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
e2:
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\mollie_monger\mollie_monger-kohya-l+p+s-r200-e1-sd15.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_mollie_monger\landscape-pp" `
|
|
||||||
--output_dir="D:\dreambooth\train_mollie_monger\output" `
|
|
||||||
--resolution="640,448" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=625 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--fine_tuning_repeat=200 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\mollie_monger\last.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_mollie_monger\portrait-pp" `
|
|
||||||
--output_dir="D:\dreambooth\train_mollie_monger\output" `
|
|
||||||
--resolution="448,640" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=1275 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--fine_tuning_repeat=200 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\mollie_monger\last.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_mollie_monger\square-pp" `
|
|
||||||
--output_dir="D:\dreambooth\train_mollie_monger\output" `
|
|
||||||
--resolution="512,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=500 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--fine_tuning_repeat=200 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
|
|
||||||
Midjourney images download:
|
|
||||||
|
|
||||||
https://storage.googleapis.com/dream-machines-output/2932e6e4-ddef-410e-947b-2a6275e31f35/0_3.png
|
|
||||||
|
|
||||||
# Midjourney
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\v1-5-pruned-mse-vae.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_midjourney_v4\all data" `
|
|
||||||
--output_dir="D:\dreambooth\train_midjourney_v4\model" `
|
|
||||||
--resolution="512,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=528 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--fine_tuning_repeat=12 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\midjourney_v4-khoya-r100-e1-sd15.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_midjourney_v4\data2" `
|
|
||||||
--output_dir="D:\dreambooth\train_midjourney_v4\model" `
|
|
||||||
--resolution="512,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=850 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--fine_tuning_repeat=100 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\midjourney_v4_finetune\epoch-000001.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_midjourney_v4\newdata3" `
|
|
||||||
--output_dir="D:\dreambooth\train_midjourney_v4\model" `
|
|
||||||
--resolution="512,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=159 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--fine_tuning_repeat=24 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
# train n
|
|
||||||
|
|
||||||
# Midjourney
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v6-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\dreambooth\train_childrens_drawings\model\last2.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_childrens_drawings\data2-pp" `
|
|
||||||
--output_dir="D:\dreambooth\train_childrens_drawings\model" `
|
|
||||||
--resolution="704,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=312 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--fine_tuning_repeat=48 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\dreambooth\train_childrens_drawings\model\last2.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_childrens_drawings\data2-pp" `
|
|
||||||
--output_dir="D:\dreambooth\train_childrens_drawings\model" `
|
|
||||||
--resolution="704,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=312 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=48 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
# twq
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\v1-5-pruned-mse-vae.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\source\bernardv2-ft" `
|
|
||||||
--output_dir="D:\dreambooth\train_bernard\model" `
|
|
||||||
--resolution="512,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=720 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=48 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
# the white queen
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\v1-5-pruned-mse-vae.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\training_twq\the_white_queen\landscape-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l" `
|
|
||||||
--resolution="704,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=520 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=40 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\dreambooth\training_twq\the_white_queen\model+l\last.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\training_twq\the_white_queen\portrait-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l+p" `
|
|
||||||
--resolution="512,704" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=260 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=40 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\dreambooth\training_twq\the_white_queen\model+l+p\last.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\training_twq\the_white_queen\square-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l+p+s" `
|
|
||||||
--resolution="512,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=220 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=40 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
# the white queen slow progress init phase
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\v1-5-pruned-mse-vae.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\training_twq\the_white_queen\landscape-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l" `
|
|
||||||
--resolution="704,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=260 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=80 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\dreambooth\training_twq\the_white_queen\model+l\last.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\training_twq\the_white_queen\portrait-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l+p" `
|
|
||||||
--resolution="512,704" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=130 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=80 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\dreambooth\training_twq\the_white_queen\model+l+p\last.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\training_twq\the_white_queen\square-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l+p+s" `
|
|
||||||
--resolution="512,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=90 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=80 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
# the white queen slow progress extra steps phase
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\dreambooth\training_twq\the_white_queen\model+l+p+s\last.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\training_twq\the_white_queen\landscape-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l" `
|
|
||||||
--resolution="704,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=130 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=40 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\dreambooth\training_twq\the_white_queen\model+l\last.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\training_twq\the_white_queen\portrait-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l+p" `
|
|
||||||
--resolution="512,704" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=65 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=40 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\dreambooth\training_twq\the_white_queen\model+l+p\last.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\training_twq\the_white_queen\square-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l+p+s" `
|
|
||||||
--resolution="512,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=45 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=40 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
# the queen of heart init phase
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\models\v1-5-pruned-mse-vae.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\train_qoh\landscape-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l" `
|
|
||||||
--resolution="704,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=260 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=80 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\dreambooth\training_twq\the_white_queen\model+l\last.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\training_twq\the_white_queen\portrait-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l+p" `
|
|
||||||
--resolution="512,704" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=130 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=80 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\dreambooth\training_twq\the_white_queen\model+l+p\last.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\training_twq\the_white_queen\square-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l+p+s" `
|
|
||||||
--resolution="512,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=90 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=80 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
# the white queen slow progress extra steps phase
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\dreambooth\training_twq\the_white_queen\model+l+p+s\last.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\training_twq\the_white_queen\landscape-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l" `
|
|
||||||
--resolution="704,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=130 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=40 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\dreambooth\training_twq\the_white_queen\model+l\last.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\training_twq\the_white_queen\portrait-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l+p" `
|
|
||||||
--resolution="512,704" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=65 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=40 `
|
|
||||||
--save_half
|
|
||||||
|
|
||||||
accelerate launch --num_cpu_threads_per_process 6 train_db_fixed_v7-ber.py `
|
|
||||||
--pretrained_model_name_or_path="D:\dreambooth\training_twq\the_white_queen\model+l+p\last.ckpt" `
|
|
||||||
--train_data_dir="D:\dreambooth\training_twq\the_white_queen\square-ft" `
|
|
||||||
--output_dir="D:\dreambooth\training_twq\the_white_queen\model+l+p+s" `
|
|
||||||
--resolution="512,512" `
|
|
||||||
--train_batch_size=8 `
|
|
||||||
--learning_rate=1e-6 `
|
|
||||||
--max_train_steps=45 `
|
|
||||||
--use_8bit_adam `
|
|
||||||
--xformers `
|
|
||||||
--mixed_precision="fp16" `
|
|
||||||
--cache_latents `
|
|
||||||
--save_every_n_epochs=1 `
|
|
||||||
--fine_tuning `
|
|
||||||
--dataset_repeats=40 `
|
|
||||||
--seed=23 `
|
|
||||||
--save_half
|
|
@ -1 +0,0 @@
|
|||||||
Put your asd dog images you want to train in this folder
|
|
@ -1 +0,0 @@
|
|||||||
Put your dog class regularization images in here
|
|
2
setup.py
2
setup.py
@ -1,3 +1,3 @@
|
|||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
setup(name = "library", packages = find_packages())
|
setup(name = "library", version="1.0.0", packages = find_packages())
|
@ -11,7 +11,7 @@ if sys.version_info < (3, 8):
|
|||||||
else:
|
else:
|
||||||
import importlib.metadata as importlib_metadata
|
import importlib.metadata as importlib_metadata
|
||||||
|
|
||||||
req_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "requirements.txt")
|
req_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../requirements.txt")
|
||||||
|
|
||||||
def run(command, desc=None, errdesc=None, custom_env=None):
|
def run(command, desc=None, errdesc=None, custom_env=None):
|
||||||
if desc is not None:
|
if desc is not None:
|
||||||
@ -83,9 +83,9 @@ check_versions()
|
|||||||
# Check for "different" B&B Files and copy only if necessary
|
# Check for "different" B&B Files and copy only if necessary
|
||||||
if os.name == "nt":
|
if os.name == "nt":
|
||||||
python = sys.executable
|
python = sys.executable
|
||||||
bnb_src = os.path.join(os.path.dirname(os.path.realpath(__file__)), "bitsandbytes_windows")
|
bnb_src = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..\bitsandbytes_windows")
|
||||||
bnb_dest = os.path.join(sysconfig.get_paths()["purelib"], "bitsandbytes")
|
bnb_dest = os.path.join(sysconfig.get_paths()["purelib"], "bitsandbytes")
|
||||||
cudnn_src = os.path.join(os.path.dirname(os.path.realpath(__file__)), "cudnn_windows")
|
cudnn_src = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..\cudnn_windows")
|
||||||
cudnn_dest = os.path.join(sysconfig.get_paths()["purelib"], "torch", "lib")
|
cudnn_dest = os.path.join(sysconfig.get_paths()["purelib"], "torch", "lib")
|
||||||
|
|
||||||
print(f"Checking for CUDNN files in {cudnn_dest}")
|
print(f"Checking for CUDNN files in {cudnn_dest}")
|
||||||
@ -101,9 +101,4 @@ if os.name == "nt":
|
|||||||
shutil.copy2(src_file, cudnn_dest)
|
shutil.copy2(src_file, cudnn_dest)
|
||||||
print("Copied CUDNN 8.6 files to destination")
|
print("Copied CUDNN 8.6 files to destination")
|
||||||
|
|
||||||
# diffusers_cmd = "git+https://github.com/huggingface/diffusers.git@8e74efa#egg=diffusers --force-reinstall"
|
|
||||||
# run(f'"{python}" -m pip install {diffusers_cmd}', "Installing particular diffusers commit", "Couldn't install diffusers")
|
|
||||||
# #install requirements file
|
|
||||||
# req_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "requirements.txt")
|
|
||||||
# run(f'"{python}" -m pip install -r "{req_file}"', "Updating requirements", "Couldn't install requirements")
|
|
||||||
|
|
2
upgrade.bat
Normal file
2
upgrade.bat
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
git pull
|
||||||
|
.\venv\Scripts\python.exe -m pip install -U -r .\requirements.txt
|
Loading…
x
Reference in New Issue
Block a user