v20.6.0
- Increase max LoRA rank (dim) size to 1024. - Update finetune preprocessing scripts. - ``.bmp`` and ``.jpeg`` are supported. Thanks to breakcore2 and p1atdev! - The default weights of ``tag_images_by_wd14_tagger.py`` is now ``SmilingWolf/wd-v1-4-convnext-tagger-v2``. You can specify another model id from ``SmilingWolf`` by ``--repo_id`` option. Thanks to SmilingWolf for the great work. - To change the weight, remove ``wd14_tagger_model`` folder, and run the script again. - ``--max_data_loader_n_workers`` option is added to each script. This option uses the DataLoader for data loading to speed up loading, 20%~30% faster. - Please specify 2 or 4, depends on the number of CPU cores. - ``--recursive`` option is added to ``merge_dd_tags_to_metadata.py`` and ``merge_captions_to_metadata.py``, only works with ``--full_path``. - ``make_captions_by_git.py`` is added. It uses [GIT microsoft/git-large-textcaps](https://huggingface.co/microsoft/git-large-textcaps) for captioning. - ``requirements.txt`` is updated. If you use this script, [please update the libraries](https://github.com/kohya-ss/sd-scripts#upgrade). - Usage is almost the same as ``make_captions.py``, but batch size should be smaller. - ``--remove_words`` option removes as much text as possible (such as ``the word "XXXX" on it``). - ``--skip_existing`` option is added to ``prepare_buckets_latents.py``. Images with existing npz files are ignored by this option. - ``clean_captions_and_tags.py`` is updated to remove duplicated or conflicting tags, e.g. ``shirt`` is removed when ``white shirt`` exists. if ``black hair`` is with ``red hair``, both are removed. - Tag frequency is added to the metadata in ``train_network.py``. Thanks to space-nuko! - __All tags and number of occurrences of the tag are recorded.__ If you do not want it, disable metadata storing with ``--no_metadata`` option.
This commit is contained in:
parent
20e62af1a6
commit
045750b46a
@ -19,7 +19,7 @@ def UI(username, password):
|
|||||||
print('Load CSS...')
|
print('Load CSS...')
|
||||||
css += file.read() + '\n'
|
css += file.read() + '\n'
|
||||||
|
|
||||||
interface = gr.Blocks(css=css)
|
interface = gr.Blocks(css=css, title="Kohya_ss GUI")
|
||||||
|
|
||||||
with interface:
|
with interface:
|
||||||
with gr.Tab('Dreambooth'):
|
with gr.Tab('Dreambooth'):
|
||||||
|
@ -109,11 +109,11 @@ def gradio_extract_lora_tab():
|
|||||||
)
|
)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
dim = gr.Slider(
|
dim = gr.Slider(
|
||||||
minimum=1,
|
minimum=4,
|
||||||
maximum=128,
|
maximum=1024,
|
||||||
label='Network Dimension',
|
label='Network Dimension',
|
||||||
value=8,
|
value=128,
|
||||||
step=1,
|
step=4,
|
||||||
interactive=True,
|
interactive=True,
|
||||||
)
|
)
|
||||||
v2 = gr.Checkbox(label='v2', value=False, interactive=True)
|
v2 = gr.Checkbox(label='v2', value=False, interactive=True)
|
||||||
|
126
library/git_caption_gui.py
Normal file
126
library/git_caption_gui.py
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
import gradio as gr
|
||||||
|
from easygui import msgbox
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
from .common_gui import get_folder_path, add_pre_postfix
|
||||||
|
|
||||||
|
|
||||||
|
def caption_images(
|
||||||
|
train_data_dir,
|
||||||
|
caption_ext,
|
||||||
|
batch_size,
|
||||||
|
max_data_loader_n_workers,
|
||||||
|
max_length,
|
||||||
|
model_id,
|
||||||
|
prefix,
|
||||||
|
postfix,
|
||||||
|
):
|
||||||
|
# Check for images_dir_input
|
||||||
|
if train_data_dir == '':
|
||||||
|
msgbox('Image folder is missing...')
|
||||||
|
return
|
||||||
|
|
||||||
|
if caption_ext == '':
|
||||||
|
msgbox('Please provide an extension for the caption files.')
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f'GIT captioning files in {train_data_dir}...')
|
||||||
|
run_cmd = f'.\\venv\\Scripts\\python.exe "finetune/make_captions.py"'
|
||||||
|
if not model_id == '':
|
||||||
|
run_cmd += f' --model_id="{model_id}"'
|
||||||
|
run_cmd += f' --batch_size="{int(batch_size)}"'
|
||||||
|
run_cmd += f' --max_data_loader_n_workers="{int(max_data_loader_n_workers)}"'
|
||||||
|
run_cmd += f' --max_length="{int(max_length)}"'
|
||||||
|
if caption_ext != '':
|
||||||
|
run_cmd += f' --caption_extension="{caption_ext}"'
|
||||||
|
run_cmd += f' "{train_data_dir}"'
|
||||||
|
|
||||||
|
print(run_cmd)
|
||||||
|
|
||||||
|
# Run the command
|
||||||
|
subprocess.run(run_cmd)
|
||||||
|
|
||||||
|
# Add prefix and postfix
|
||||||
|
add_pre_postfix(
|
||||||
|
folder=train_data_dir,
|
||||||
|
caption_file_ext=caption_ext,
|
||||||
|
prefix=prefix,
|
||||||
|
postfix=postfix,
|
||||||
|
)
|
||||||
|
|
||||||
|
print('...captioning done')
|
||||||
|
|
||||||
|
|
||||||
|
###
|
||||||
|
# Gradio UI
|
||||||
|
###
|
||||||
|
|
||||||
|
|
||||||
|
def gradio_git_caption_gui_tab():
|
||||||
|
with gr.Tab('GIT Captioning'):
|
||||||
|
gr.Markdown(
|
||||||
|
'This utility will use GIT to caption files for each images in a folder.'
|
||||||
|
)
|
||||||
|
with gr.Row():
|
||||||
|
train_data_dir = gr.Textbox(
|
||||||
|
label='Image folder to caption',
|
||||||
|
placeholder='Directory containing the images to caption',
|
||||||
|
interactive=True,
|
||||||
|
)
|
||||||
|
button_train_data_dir_input = gr.Button(
|
||||||
|
'📂', elem_id='open_folder_small'
|
||||||
|
)
|
||||||
|
button_train_data_dir_input.click(
|
||||||
|
get_folder_path, outputs=train_data_dir
|
||||||
|
)
|
||||||
|
with gr.Row():
|
||||||
|
caption_ext = gr.Textbox(
|
||||||
|
label='Caption file extension',
|
||||||
|
placeholder='Extention for caption file. eg: .caption, .txt',
|
||||||
|
value='.txt',
|
||||||
|
interactive=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
prefix = gr.Textbox(
|
||||||
|
label='Prefix to add to BLIP caption',
|
||||||
|
placeholder='(Optional)',
|
||||||
|
interactive=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
postfix = gr.Textbox(
|
||||||
|
label='Postfix to add to BLIP caption',
|
||||||
|
placeholder='(Optional)',
|
||||||
|
interactive=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
batch_size = gr.Number(
|
||||||
|
value=1, label='Batch size', interactive=True
|
||||||
|
)
|
||||||
|
|
||||||
|
with gr.Row():
|
||||||
|
max_data_loader_n_workers = gr.Number(
|
||||||
|
value=2, label='Number of workers', interactive=True
|
||||||
|
)
|
||||||
|
max_length = gr.Number(
|
||||||
|
value=75, label='Max length', interactive=True
|
||||||
|
)
|
||||||
|
model_id = gr.Textbox(
|
||||||
|
label="Model",
|
||||||
|
placeholder="(Optional) model id for GIT in Hugging Face", interactive=True
|
||||||
|
)
|
||||||
|
|
||||||
|
caption_button = gr.Button('Caption images')
|
||||||
|
|
||||||
|
caption_button.click(
|
||||||
|
caption_images,
|
||||||
|
inputs=[
|
||||||
|
train_data_dir,
|
||||||
|
caption_ext,
|
||||||
|
batch_size,
|
||||||
|
max_data_loader_n_workers,
|
||||||
|
max_length,
|
||||||
|
model_id,
|
||||||
|
prefix,
|
||||||
|
postfix,
|
||||||
|
],
|
||||||
|
)
|
@ -9,6 +9,7 @@ import argparse
|
|||||||
from library.basic_caption_gui import gradio_basic_caption_gui_tab
|
from library.basic_caption_gui import gradio_basic_caption_gui_tab
|
||||||
from library.convert_model_gui import gradio_convert_model_tab
|
from library.convert_model_gui import gradio_convert_model_tab
|
||||||
from library.blip_caption_gui import gradio_blip_caption_gui_tab
|
from library.blip_caption_gui import gradio_blip_caption_gui_tab
|
||||||
|
from library.git_caption_gui import gradio_git_caption_gui_tab
|
||||||
from library.wd14_caption_gui import gradio_wd14_caption_gui_tab
|
from library.wd14_caption_gui import gradio_wd14_caption_gui_tab
|
||||||
|
|
||||||
|
|
||||||
@ -23,6 +24,7 @@ def utilities_tab(
|
|||||||
with gr.Tab('Captioning'):
|
with gr.Tab('Captioning'):
|
||||||
gradio_basic_caption_gui_tab()
|
gradio_basic_caption_gui_tab()
|
||||||
gradio_blip_caption_gui_tab()
|
gradio_blip_caption_gui_tab()
|
||||||
|
gradio_git_caption_gui_tab()
|
||||||
gradio_wd14_caption_gui_tab()
|
gradio_wd14_caption_gui_tab()
|
||||||
gradio_convert_model_tab()
|
gradio_convert_model_tab()
|
||||||
|
|
||||||
|
45
lora_gui.py
45
lora_gui.py
@ -291,11 +291,11 @@ def train_model(
|
|||||||
if unet_lr == '':
|
if unet_lr == '':
|
||||||
unet_lr = 0
|
unet_lr = 0
|
||||||
|
|
||||||
if (float(text_encoder_lr) == 0) and (float(unet_lr) == 0):
|
# if (float(text_encoder_lr) == 0) and (float(unet_lr) == 0):
|
||||||
msgbox(
|
# msgbox(
|
||||||
'At least one Learning Rate value for "Text encoder" or "Unet" need to be provided'
|
# 'At least one Learning Rate value for "Text encoder" or "Unet" need to be provided'
|
||||||
)
|
# )
|
||||||
return
|
# return
|
||||||
|
|
||||||
# Get a list of all subfolders in train_data_dir
|
# Get a list of all subfolders in train_data_dir
|
||||||
subfolders = [
|
subfolders = [
|
||||||
@ -383,15 +383,26 @@ def train_model(
|
|||||||
if not float(prior_loss_weight) == 1.0:
|
if not float(prior_loss_weight) == 1.0:
|
||||||
run_cmd += f' --prior_loss_weight={prior_loss_weight}'
|
run_cmd += f' --prior_loss_weight={prior_loss_weight}'
|
||||||
run_cmd += f' --network_module=networks.lora'
|
run_cmd += f' --network_module=networks.lora'
|
||||||
if not float(text_encoder_lr) == 0:
|
|
||||||
|
if not (float(text_encoder_lr) == 0) or not (float(unet_lr) == 0):
|
||||||
|
if not (float(text_encoder_lr) == 0) and not (float(unet_lr) == 0):
|
||||||
run_cmd += f' --text_encoder_lr={text_encoder_lr}'
|
run_cmd += f' --text_encoder_lr={text_encoder_lr}'
|
||||||
else:
|
|
||||||
run_cmd += f' --network_train_unet_only'
|
|
||||||
if not float(unet_lr) == 0:
|
|
||||||
run_cmd += f' --unet_lr={unet_lr}'
|
run_cmd += f' --unet_lr={unet_lr}'
|
||||||
else:
|
elif not (float(text_encoder_lr) == 0):
|
||||||
|
run_cmd += f' --text_encoder_lr={text_encoder_lr}'
|
||||||
run_cmd += f' --network_train_text_encoder_only'
|
run_cmd += f' --network_train_text_encoder_only'
|
||||||
|
else:
|
||||||
|
run_cmd += f' --unet_lr={unet_lr}'
|
||||||
|
run_cmd += f' --network_train_unet_only'
|
||||||
|
else:
|
||||||
|
if float(text_encoder_lr) == 0:
|
||||||
|
msgbox(
|
||||||
|
'Please input learning rate values.'
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
run_cmd += f' --network_dim={network_dim}'
|
run_cmd += f' --network_dim={network_dim}'
|
||||||
|
|
||||||
if not lora_network_weights == '':
|
if not lora_network_weights == '':
|
||||||
run_cmd += f' --network_weights="{lora_network_weights}"'
|
run_cmd += f' --network_weights="{lora_network_weights}"'
|
||||||
if int(gradient_accumulation_steps) > 1:
|
if int(gradient_accumulation_steps) > 1:
|
||||||
@ -400,6 +411,8 @@ def train_model(
|
|||||||
run_cmd += f' --output_name="{output_name}"'
|
run_cmd += f' --output_name="{output_name}"'
|
||||||
if not lr_scheduler_num_cycles == '':
|
if not lr_scheduler_num_cycles == '':
|
||||||
run_cmd += f' --lr_scheduler_num_cycles="{lr_scheduler_num_cycles}"'
|
run_cmd += f' --lr_scheduler_num_cycles="{lr_scheduler_num_cycles}"'
|
||||||
|
else:
|
||||||
|
run_cmd += f' --lr_scheduler_num_cycles="{epoch}"'
|
||||||
if not lr_scheduler_power == '':
|
if not lr_scheduler_power == '':
|
||||||
run_cmd += f' --output_name="{lr_scheduler_power}"'
|
run_cmd += f' --output_name="{lr_scheduler_power}"'
|
||||||
|
|
||||||
@ -612,19 +625,19 @@ def lora_tab(
|
|||||||
placeholder='Optional',
|
placeholder='Optional',
|
||||||
)
|
)
|
||||||
network_dim = gr.Slider(
|
network_dim = gr.Slider(
|
||||||
minimum=1,
|
minimum=4,
|
||||||
maximum=128,
|
maximum=1024,
|
||||||
label='Network Rank (Dimension)',
|
label='Network Rank (Dimension)',
|
||||||
value=8,
|
value=8,
|
||||||
step=1,
|
step=4,
|
||||||
interactive=True,
|
interactive=True,
|
||||||
)
|
)
|
||||||
network_alpha = gr.Slider(
|
network_alpha = gr.Slider(
|
||||||
minimum=1,
|
minimum=4,
|
||||||
maximum=128,
|
maximum=1024,
|
||||||
label='Network Alpha',
|
label='Network Alpha',
|
||||||
value=1,
|
value=1,
|
||||||
step=1,
|
step=4,
|
||||||
interactive=True,
|
interactive=True,
|
||||||
)
|
)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
|
@ -9,13 +9,14 @@ pytorch_lightning
|
|||||||
bitsandbytes==0.35.0
|
bitsandbytes==0.35.0
|
||||||
tensorboard
|
tensorboard
|
||||||
safetensors==0.2.6
|
safetensors==0.2.6
|
||||||
gradio
|
gradio==3.16.2
|
||||||
altair
|
altair
|
||||||
easygui
|
easygui
|
||||||
|
tk
|
||||||
# for BLIP captioning
|
# for BLIP captioning
|
||||||
requests
|
requests
|
||||||
timm==0.4.12
|
timm
|
||||||
fairscale==0.4.4
|
fairscale
|
||||||
# for WD14 captioning
|
# for WD14 captioning
|
||||||
tensorflow<2.11
|
tensorflow<2.11
|
||||||
huggingface-hub
|
huggingface-hub
|
||||||
|
Loading…
Reference in New Issue
Block a user