KohyaSS/dreambooth_gui.py
2022-12-16 13:16:23 -05:00

856 lines
29 KiB
Python

# v1: initial release
# v2: add open and save folder icons
# v3: Add new Utilities tab for Dreambooth folder preparation
# v3.1: Adding captionning of images to utilities
import gradio as gr
import json
import math
import os
import subprocess
import pathlib
import shutil
from dreambooth_gui.dreambooth_folder_creation import gradio_dreambooth_folder_creation_tab
from dreambooth_gui.caption_gui import gradio_caption_gui_tab
from dreambooth_gui.common_gui import get_folder_path, remove_doublequote, get_file_path
from easygui import filesavebox, msgbox
# sys.path.insert(0, './dreambooth_gui')
def save_configuration(
save_as,
file_path,
pretrained_model_name_or_path,
v2,
v_parameterization,
logging_dir,
train_data_dir,
reg_data_dir,
output_dir,
max_resolution,
learning_rate,
lr_scheduler,
lr_warmup,
train_batch_size,
epoch,
save_every_n_epochs,
mixed_precision,
save_precision,
seed,
num_cpu_threads_per_process,
convert_to_safetensors,
convert_to_ckpt,
cache_latent,
caption_extention,
use_safetensors,
enable_bucket,
gradient_checkpointing,
full_fp16,
no_token_padding,
stop_text_encoder_training,
use_8bit_adam,
xformers,
):
original_file_path = file_path
save_as_bool = True if save_as.get("label") == "True" else False
if save_as_bool:
print("Save as...")
file_path = filesavebox(
"Select the config file to save",
default="finetune.json",
filetypes="*.json",
)
else:
print("Save...")
if file_path == None or file_path == "":
file_path = filesavebox(
"Select the config file to save",
default="finetune.json",
filetypes="*.json",
)
if file_path == None:
return original_file_path # In case a file_path was provided and the user decide to cancel the open action
# Return the values of the variables as a dictionary
variables = {
"pretrained_model_name_or_path": pretrained_model_name_or_path,
"v2": v2,
"v_parameterization": v_parameterization,
"logging_dir": logging_dir,
"train_data_dir": train_data_dir,
"reg_data_dir": reg_data_dir,
"output_dir": output_dir,
"max_resolution": max_resolution,
"learning_rate": learning_rate,
"lr_scheduler": lr_scheduler,
"lr_warmup": lr_warmup,
"train_batch_size": train_batch_size,
"epoch": epoch,
"save_every_n_epochs": save_every_n_epochs,
"mixed_precision": mixed_precision,
"save_precision": save_precision,
"seed": seed,
"num_cpu_threads_per_process": num_cpu_threads_per_process,
"convert_to_safetensors": convert_to_safetensors,
"convert_to_ckpt": convert_to_ckpt,
"cache_latent": cache_latent,
"caption_extention": caption_extention,
"use_safetensors": use_safetensors,
"enable_bucket": enable_bucket,
"gradient_checkpointing": gradient_checkpointing,
"full_fp16": full_fp16,
"no_token_padding": no_token_padding,
"stop_text_encoder_training": stop_text_encoder_training,
"use_8bit_adam": use_8bit_adam,
"xformers": xformers,
}
# Save the data to the selected file
with open(file_path, "w") as file:
json.dump(variables, file)
return file_path
def open_configuration(
file_path,
pretrained_model_name_or_path,
v2,
v_parameterization,
logging_dir,
train_data_dir,
reg_data_dir,
output_dir,
max_resolution,
learning_rate,
lr_scheduler,
lr_warmup,
train_batch_size,
epoch,
save_every_n_epochs,
mixed_precision,
save_precision,
seed,
num_cpu_threads_per_process,
convert_to_safetensors,
convert_to_ckpt,
cache_latent,
caption_extention,
use_safetensors,
enable_bucket,
gradient_checkpointing,
full_fp16,
no_token_padding,
stop_text_encoder_training,
use_8bit_adam,
xformers,
):
original_file_path = file_path
file_path = get_file_path(file_path)
if file_path != "" and file_path != None:
print(file_path)
# load variables from JSON file
with open(file_path, "r") as f:
my_data = json.load(f)
else:
file_path = original_file_path # In case a file_path was provided and the user decide to cancel the open action
my_data = {}
# Return the values of the variables as a dictionary
return (
file_path,
my_data.get("pretrained_model_name_or_path", pretrained_model_name_or_path),
my_data.get("v2", v2),
my_data.get("v_parameterization", v_parameterization),
my_data.get("logging_dir", logging_dir),
my_data.get("train_data_dir", train_data_dir),
my_data.get("reg_data_dir", reg_data_dir),
my_data.get("output_dir", output_dir),
my_data.get("max_resolution", max_resolution),
my_data.get("learning_rate", learning_rate),
my_data.get("lr_scheduler", lr_scheduler),
my_data.get("lr_warmup", lr_warmup),
my_data.get("train_batch_size", train_batch_size),
my_data.get("epoch", epoch),
my_data.get("save_every_n_epochs", save_every_n_epochs),
my_data.get("mixed_precision", mixed_precision),
my_data.get("save_precision", save_precision),
my_data.get("seed", seed),
my_data.get("num_cpu_threads_per_process", num_cpu_threads_per_process),
my_data.get("convert_to_safetensors", convert_to_safetensors),
my_data.get("convert_to_ckpt", convert_to_ckpt),
my_data.get("cache_latent", cache_latent),
my_data.get("caption_extention", caption_extention),
my_data.get("use_safetensors", use_safetensors),
my_data.get("enable_bucket", enable_bucket),
my_data.get("gradient_checkpointing", gradient_checkpointing),
my_data.get("full_fp16", full_fp16),
my_data.get("no_token_padding", no_token_padding),
my_data.get("stop_text_encoder_training", stop_text_encoder_training),
my_data.get("use_8bit_adam", use_8bit_adam),
my_data.get("xformers", xformers),
)
def train_model(
pretrained_model_name_or_path,
v2,
v_parameterization,
logging_dir,
train_data_dir,
reg_data_dir,
output_dir,
max_resolution,
learning_rate,
lr_scheduler,
lr_warmup,
train_batch_size,
epoch,
save_every_n_epochs,
mixed_precision,
save_precision,
seed,
num_cpu_threads_per_process,
convert_to_safetensors,
convert_to_ckpt,
cache_latent,
caption_extention,
use_safetensors,
enable_bucket,
gradient_checkpointing,
full_fp16,
no_token_padding,
stop_text_encoder_training_pct,
use_8bit_adam,
xformers,
):
def save_inference_file(output_dir, v2, v_parameterization):
# Copy inference model for v2 if required
if v2 and v_parameterization:
print(f"Saving v2-inference-v.yaml as {output_dir}/last.yaml")
shutil.copy(
f"./v2_inference/v2-inference-v.yaml",
f"{output_dir}/last.yaml",
)
elif v2:
print(f"Saving v2-inference.yaml as {output_dir}/last.yaml")
shutil.copy(
f"./v2_inference/v2-inference.yaml",
f"{output_dir}/last.yaml",
)
if pretrained_model_name_or_path == "":
msgbox("Source model information is missing")
return
if train_data_dir == "":
msgbox("Image folder path is missing")
return
if not os.path.exists(train_data_dir):
msgbox("Image folder does not exist")
return
if reg_data_dir != "":
if not os.path.exists(reg_data_dir):
msgbox("Regularisation folder does not exist")
return
if output_dir == "":
msgbox("Output folder path is missing")
return
# Get a list of all subfolders in train_data_dir
subfolders = [
f for f in os.listdir(train_data_dir)
if os.path.isdir(os.path.join(train_data_dir, f))
]
total_steps = 0
# Loop through each subfolder and extract the number of repeats
for folder in subfolders:
# Extract the number of repeats from the folder name
repeats = int(folder.split("_")[0])
# Count the number of images in the folder
num_images = len([
f for f in os.listdir(os.path.join(train_data_dir, folder))
if f.endswith(".jpg") or f.endswith(".jpeg") or f.endswith(".png")
or f.endswith(".webp")
])
# Calculate the total number of steps for this folder
steps = repeats * num_images
total_steps += steps
# Print the result
print(f"Folder {folder}: {steps} steps")
# Print the result
# print(f"{total_steps} total steps")
if reg_data_dir == "":
reg_factor = 1
else:
print(
"Regularisation images are used... Will double the number of steps required..."
)
reg_factor = 2
# calculate max_train_steps
max_train_steps = int(
math.ceil(
float(total_steps) / int(train_batch_size) * int(epoch) *
int(reg_factor)))
print(f"max_train_steps = {max_train_steps}")
# calculate stop encoder training
if stop_text_encoder_training_pct == None:
stop_text_encoder_training = 0
else:
stop_text_encoder_training = math.ceil(
float(max_train_steps) / 100 * int(stop_text_encoder_training_pct))
print(f"stop_text_encoder_training = {stop_text_encoder_training}")
lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100))
print(f"lr_warmup_steps = {lr_warmup_steps}")
run_cmd = f'accelerate launch --num_cpu_threads_per_process={num_cpu_threads_per_process} "train_db_fixed.py"'
if v2:
run_cmd += " --v2"
if v_parameterization:
run_cmd += " --v_parameterization"
if cache_latent:
run_cmd += " --cache_latents"
if use_safetensors:
run_cmd += " --use_safetensors"
if enable_bucket:
run_cmd += " --enable_bucket"
if gradient_checkpointing:
run_cmd += " --gradient_checkpointing"
if full_fp16:
run_cmd += " --full_fp16"
if no_token_padding:
run_cmd += " --no_token_padding"
if use_8bit_adam:
run_cmd += " --use_8bit_adam"
if xformers:
run_cmd += " --xformers"
run_cmd += f" --pretrained_model_name_or_path={pretrained_model_name_or_path}"
run_cmd += f' --train_data_dir="{train_data_dir}"'
if len(reg_data_dir):
run_cmd += f' --reg_data_dir="{reg_data_dir}"'
run_cmd += f" --resolution={max_resolution}"
run_cmd += f" --output_dir={output_dir}"
run_cmd += f" --train_batch_size={train_batch_size}"
run_cmd += f" --learning_rate={learning_rate}"
run_cmd += f" --lr_scheduler={lr_scheduler}"
run_cmd += f" --lr_warmup_steps={lr_warmup_steps}"
run_cmd += f" --max_train_steps={max_train_steps}"
run_cmd += f" --use_8bit_adam"
run_cmd += f" --xformers"
run_cmd += f" --mixed_precision={mixed_precision}"
run_cmd += f" --save_every_n_epochs={save_every_n_epochs}"
run_cmd += f" --seed={seed}"
run_cmd += f" --save_precision={save_precision}"
run_cmd += f" --logging_dir={logging_dir}"
run_cmd += f" --caption_extention={caption_extention}"
run_cmd += f" --stop_text_encoder_training={stop_text_encoder_training}"
print(run_cmd)
# Run the command
subprocess.run(run_cmd)
# check if output_dir/last is a directory... therefore it is a diffuser model
last_dir = pathlib.Path(f"{output_dir}/last")
print(last_dir)
if last_dir.is_dir():
if convert_to_ckpt:
print(f"Converting diffuser model {last_dir} to {last_dir}.ckpt")
os.system(
f"python ./tools/convert_diffusers20_original_sd.py {last_dir} {last_dir}.ckpt --{save_precision}"
)
save_inference_file(output_dir, v2, v_parameterization)
if convert_to_safetensors:
print(
f"Converting diffuser model {last_dir} to {last_dir}.safetensors"
)
os.system(
f"python ./tools/convert_diffusers20_original_sd.py {last_dir} {last_dir}.safetensors --{save_precision}"
)
save_inference_file(output_dir, v2, v_parameterization)
else:
# Copy inference model for v2 if required
save_inference_file(output_dir, v2, v_parameterization)
# Return the values of the variables as a dictionary
# return
def set_pretrained_model_name_or_path_input(value, v2, v_parameterization):
# define a list of substrings to search for
substrings_v2 = [
"stabilityai/stable-diffusion-2-1-base",
"stabilityai/stable-diffusion-2-base",
]
# check if $v2 and $v_parameterization are empty and if $pretrained_model_name_or_path contains any of the substrings in the v2 list
if str(value) in substrings_v2:
print("SD v2 model detected. Setting --v2 parameter")
v2 = True
v_parameterization = False
return value, v2, v_parameterization
# define a list of substrings to search for v-objective
substrings_v_parameterization = [
"stabilityai/stable-diffusion-2-1",
"stabilityai/stable-diffusion-2",
]
# check if $v2 and $v_parameterization are empty and if $pretrained_model_name_or_path contains any of the substrings in the v_parameterization list
if str(value) in substrings_v_parameterization:
print(
"SD v2 v_parameterization detected. Setting --v2 parameter and --v_parameterization"
)
v2 = True
v_parameterization = True
return value, v2, v_parameterization
# define a list of substrings to v1.x
substrings_v1_model = [
"CompVis/stable-diffusion-v1-4",
"runwayml/stable-diffusion-v1-5",
]
if str(value) in substrings_v1_model:
v2 = False
v_parameterization = False
return value, v2, v_parameterization
if value == "custom":
value = ""
v2 = False
v_parameterization = False
return value, v2, v_parameterization
css = ""
if os.path.exists("./style.css"):
with open(os.path.join("./style.css"), "r", encoding="utf8") as file:
print("Load CSS...")
css += file.read() + "\n"
interface = gr.Blocks(css=css)
with interface:
dummy_true = gr.Label(value=True, visible=False)
dummy_false = gr.Label(value=False, visible=False)
gr.Markdown("Enter kohya finetuner parameter using this interface.")
with gr.Accordion("Configuration File Load/Save", open=False):
with gr.Row():
button_open_config = gr.Button("Open 📂", elem_id="open_folder")
button_save_config = gr.Button("Save 💾", elem_id="open_folder")
button_save_as_config = gr.Button("Save as... 💾",
elem_id="open_folder")
config_file_name = gr.Textbox(
label="", placeholder="type the configuration file path or use the 'Open' button above to select it...")
config_file_name.change(remove_doublequote,
inputs=[config_file_name],
outputs=[config_file_name])
with gr.Tab("Source model"):
# Define the input elements
with gr.Row():
pretrained_model_name_or_path_input = gr.Textbox(
label="Pretrained model name or path",
placeholder=
"enter the path to custom model or name of pretrained model",
)
model_list = gr.Dropdown(
label="(Optional) Model Quick Pick",
choices=[
"custom",
"stabilityai/stable-diffusion-2-1-base",
"stabilityai/stable-diffusion-2-base",
"stabilityai/stable-diffusion-2-1",
"stabilityai/stable-diffusion-2",
"runwayml/stable-diffusion-v1-5",
"CompVis/stable-diffusion-v1-4",
],
)
with gr.Row():
v2_input = gr.Checkbox(label="v2", value=True)
v_parameterization_input = gr.Checkbox(label="v_parameterization",
value=False)
pretrained_model_name_or_path_input.change(
remove_doublequote,
inputs=[pretrained_model_name_or_path_input],
outputs=[pretrained_model_name_or_path_input],
)
model_list.change(
set_pretrained_model_name_or_path_input,
inputs=[model_list, v2_input, v_parameterization_input],
outputs=[
pretrained_model_name_or_path_input,
v2_input,
v_parameterization_input,
],
)
with gr.Tab("Directories"):
with gr.Row():
train_data_dir_input = gr.Textbox(
label="Image folder",
placeholder=
"Directory where the training folders containing the images are located",
)
train_data_dir_input_folder = gr.Button(
"📂", elem_id="open_folder_small")
train_data_dir_input_folder.click(get_folder_path,
outputs=train_data_dir_input)
reg_data_dir_input = gr.Textbox(
label="Regularisation folder",
placeholder=
"(Optional) Directory where where the regularization folders containing the images are located",
)
reg_data_dir_input_folder = gr.Button("📂",
elem_id="open_folder_small")
reg_data_dir_input_folder.click(get_folder_path,
outputs=reg_data_dir_input)
with gr.Row():
output_dir_input = gr.Textbox(
label="Output directory",
placeholder="Directory to output trained model",
)
output_dir_input_folder = gr.Button("📂",
elem_id="open_folder_small")
output_dir_input_folder.click(get_folder_path,
outputs=output_dir_input)
logging_dir_input = gr.Textbox(
label="Logging directory",
placeholder=
"Optional: enable logging and output TensorBoard log to this directory",
)
logging_dir_input_folder = gr.Button("📂",
elem_id="open_folder_small")
logging_dir_input_folder.click(get_folder_path,
outputs=logging_dir_input)
train_data_dir_input.change(
remove_doublequote,
inputs=[train_data_dir_input],
outputs=[train_data_dir_input],
)
reg_data_dir_input.change(
remove_doublequote,
inputs=[reg_data_dir_input],
outputs=[reg_data_dir_input],
)
output_dir_input.change(remove_doublequote,
inputs=[output_dir_input],
outputs=[output_dir_input])
logging_dir_input.change(remove_doublequote,
inputs=[logging_dir_input],
outputs=[logging_dir_input])
with gr.Tab("Training parameters"):
with gr.Row():
learning_rate_input = gr.Textbox(label="Learning rate", value=1e-6)
lr_scheduler_input = gr.Dropdown(
label="LR Scheduler",
choices=[
"constant",
"constant_with_warmup",
"cosine",
"cosine_with_restarts",
"linear",
"polynomial",
],
value="constant",
)
lr_warmup_input = gr.Textbox(label="LR warmup", value=0)
with gr.Row():
train_batch_size_input = gr.Slider(minimum=1,
maximum=32,
label="Train batch size",
value=1,
step=1)
epoch_input = gr.Textbox(label="Epoch", value=1)
save_every_n_epochs_input = gr.Textbox(label="Save every N epochs",
value=1)
with gr.Row():
mixed_precision_input = gr.Dropdown(
label="Mixed precision",
choices=[
"no",
"fp16",
"bf16",
],
value="fp16",
)
save_precision_input = gr.Dropdown(
label="Save precision",
choices=[
"float",
"fp16",
"bf16",
],
value="fp16",
)
num_cpu_threads_per_process_input = gr.Slider(
minimum=1,
maximum=os.cpu_count(),
step=1,
label="Number of CPU threads per process",
value=os.cpu_count(),
)
with gr.Row():
seed_input = gr.Textbox(label="Seed", value=1234)
max_resolution_input = gr.Textbox(label="Max resolution",
value="512,512",
placeholder="512,512")
with gr.Row():
caption_extention_input = gr.Textbox(
label="Caption Extension",
placeholder=
"(Optional) Extension for caption files. default: .caption",
)
stop_text_encoder_training_input = gr.Slider(
minimum=0,
maximum=100,
value=0,
step=1,
label="Stop text encoder training",
)
with gr.Row():
full_fp16_input = gr.Checkbox(
label="Full fp16 training (experimental)", value=False)
no_token_padding_input = gr.Checkbox(label="No token padding",
value=False)
use_safetensors_input = gr.Checkbox(
label="Use safetensor when saving", value=False)
gradient_checkpointing_input = gr.Checkbox(
label="Gradient checkpointing", value=False)
with gr.Row():
enable_bucket_input = gr.Checkbox(label="Enable buckets",
value=True)
cache_latent_input = gr.Checkbox(label="Cache latent", value=True)
use_8bit_adam_input = gr.Checkbox(label="Use 8bit adam",
value=True)
xformers_input = gr.Checkbox(label="Use xformers", value=True)
with gr.Tab("Model conversion"):
convert_to_safetensors_input = gr.Checkbox(
label="Convert to SafeTensors", value=True)
convert_to_ckpt_input = gr.Checkbox(label="Convert to CKPT",
value=False)
with gr.Tab("Utilities"):
# Dreambooth folder creation tab
gradio_dreambooth_folder_creation_tab(train_data_dir_input, reg_data_dir_input, output_dir_input, logging_dir_input)
# Captionning tab
gradio_caption_gui_tab()
button_run = gr.Button("Train model")
button_open_config.click(
open_configuration,
inputs=[
config_file_name,
pretrained_model_name_or_path_input,
v2_input,
v_parameterization_input,
logging_dir_input,
train_data_dir_input,
reg_data_dir_input,
output_dir_input,
max_resolution_input,
learning_rate_input,
lr_scheduler_input,
lr_warmup_input,
train_batch_size_input,
epoch_input,
save_every_n_epochs_input,
mixed_precision_input,
save_precision_input,
seed_input,
num_cpu_threads_per_process_input,
convert_to_safetensors_input,
convert_to_ckpt_input,
cache_latent_input,
caption_extention_input,
use_safetensors_input,
enable_bucket_input,
gradient_checkpointing_input,
full_fp16_input,
no_token_padding_input,
stop_text_encoder_training_input,
use_8bit_adam_input,
xformers_input,
],
outputs=[
config_file_name,
pretrained_model_name_or_path_input,
v2_input,
v_parameterization_input,
logging_dir_input,
train_data_dir_input,
reg_data_dir_input,
output_dir_input,
max_resolution_input,
learning_rate_input,
lr_scheduler_input,
lr_warmup_input,
train_batch_size_input,
epoch_input,
save_every_n_epochs_input,
mixed_precision_input,
save_precision_input,
seed_input,
num_cpu_threads_per_process_input,
convert_to_safetensors_input,
convert_to_ckpt_input,
cache_latent_input,
caption_extention_input,
use_safetensors_input,
enable_bucket_input,
gradient_checkpointing_input,
full_fp16_input,
no_token_padding_input,
stop_text_encoder_training_input,
use_8bit_adam_input,
xformers_input,
],
)
save_as = True
not_save_as = False
button_save_config.click(
save_configuration,
inputs=[
dummy_false,
config_file_name,
pretrained_model_name_or_path_input,
v2_input,
v_parameterization_input,
logging_dir_input,
train_data_dir_input,
reg_data_dir_input,
output_dir_input,
max_resolution_input,
learning_rate_input,
lr_scheduler_input,
lr_warmup_input,
train_batch_size_input,
epoch_input,
save_every_n_epochs_input,
mixed_precision_input,
save_precision_input,
seed_input,
num_cpu_threads_per_process_input,
convert_to_safetensors_input,
convert_to_ckpt_input,
cache_latent_input,
caption_extention_input,
use_safetensors_input,
enable_bucket_input,
gradient_checkpointing_input,
full_fp16_input,
no_token_padding_input,
stop_text_encoder_training_input,
use_8bit_adam_input,
xformers_input,
],
outputs=[config_file_name],
)
button_save_as_config.click(
save_configuration,
inputs=[
dummy_true,
config_file_name,
pretrained_model_name_or_path_input,
v2_input,
v_parameterization_input,
logging_dir_input,
train_data_dir_input,
reg_data_dir_input,
output_dir_input,
max_resolution_input,
learning_rate_input,
lr_scheduler_input,
lr_warmup_input,
train_batch_size_input,
epoch_input,
save_every_n_epochs_input,
mixed_precision_input,
save_precision_input,
seed_input,
num_cpu_threads_per_process_input,
convert_to_safetensors_input,
convert_to_ckpt_input,
cache_latent_input,
caption_extention_input,
use_safetensors_input,
enable_bucket_input,
gradient_checkpointing_input,
full_fp16_input,
no_token_padding_input,
stop_text_encoder_training_input,
use_8bit_adam_input,
xformers_input,
],
outputs=[config_file_name],
)
button_run.click(
train_model,
inputs=[
pretrained_model_name_or_path_input,
v2_input,
v_parameterization_input,
logging_dir_input,
train_data_dir_input,
reg_data_dir_input,
output_dir_input,
max_resolution_input,
learning_rate_input,
lr_scheduler_input,
lr_warmup_input,
train_batch_size_input,
epoch_input,
save_every_n_epochs_input,
mixed_precision_input,
save_precision_input,
seed_input,
num_cpu_threads_per_process_input,
convert_to_safetensors_input,
convert_to_ckpt_input,
cache_latent_input,
caption_extention_input,
use_safetensors_input,
enable_bucket_input,
gradient_checkpointing_input,
full_fp16_input,
no_token_padding_input,
stop_text_encoder_training_input,
use_8bit_adam_input,
xformers_input,
],
)
# Show the interface
interface.launch()