commit
9d53e7bd80
25
README.md
25
README.md
@ -143,12 +143,27 @@ Then redo the installation instruction within the kohya_ss venv.
|
|||||||
|
|
||||||
## Change history
|
## Change history
|
||||||
|
|
||||||
|
* 2023/02/06 (v20.7.0)
|
||||||
|
- ``--bucket_reso_steps`` and ``--bucket_no_upscale`` options are added to training scripts (fine tuning, DreamBooth, LoRA and Textual Inversion) and ``prepare_buckets_latents.py``.
|
||||||
|
- ``--bucket_reso_steps`` takes the steps for buckets in aspect ratio bucketing. Default is 64, same as before.
|
||||||
|
- Any value greater than or equal to 1 can be specified; 64 is highly recommended and a value divisible by 8 is recommended.
|
||||||
|
- If less than 64 is specified, padding will occur within U-Net. The result is unknown.
|
||||||
|
- If you specify a value that is not divisible by 8, it will be truncated to divisible by 8 inside VAE, because the size of the latent is 1/8 of the image size.
|
||||||
|
- If ``--bucket_no_upscale`` option is specified, images smaller than the bucket size will be processed without upscaling.
|
||||||
|
- Internally, a bucket smaller than the image size is created (for example, if the image is 300x300 and ``bucket_reso_steps=64``, the bucket is 256x256). The image will be trimmed.
|
||||||
|
- Implementation of [#130](https://github.com/kohya-ss/sd-scripts/issues/130).
|
||||||
|
- Images with an area larger than the maximum size specified by ``--resolution`` are downsampled to the max bucket size.
|
||||||
|
- Now the number of data in each batch is limited to the number of actual images (not duplicated). Because a certain bucket may contain smaller number of actual images, so the batch may contain same (duplicated) images.
|
||||||
|
- ``--random_crop`` now also works with buckets enabled.
|
||||||
|
- Instead of always cropping the center of the image, the image is shifted left, right, up, and down to be used as the training data. This is expected to train to the edges of the image.
|
||||||
|
- Implementation of discussion [#34](https://github.com/kohya-ss/sd-scripts/discussions/34).
|
||||||
* 2023/02/04 (v20.6.1)
|
* 2023/02/04 (v20.6.1)
|
||||||
- ``--persistent_data_loader_workers`` option is added to ``fine_tune.py``, ``train_db.py`` and ``train_network.py``. This option may significantly reduce the waiting time between epochs. Thanks to hitomi!
|
- Add new LoRA resize GUI
|
||||||
- ``--debug_dataset`` option is now working on non-Windows environment. Thanks to tsukimiya!
|
- ``--persistent_data_loader_workers`` option is added to ``fine_tune.py``, ``train_db.py`` and ``train_network.py``. This option may significantly reduce the waiting time between epochs. Thanks to hitomi!
|
||||||
- ``networks/resize_lora.py`` script is added. This can approximate the higher-rank (dim) LoRA model by a lower-rank LoRA model, e.g. 128 by 4. Thanks to mgz-dev!
|
- ``--debug_dataset`` option is now working on non-Windows environment. Thanks to tsukimiya!
|
||||||
- ``--help`` option shows usage.
|
- ``networks/resize_lora.py`` script is added. This can approximate the higher-rank (dim) LoRA model by a lower-rank LoRA model, e.g. 128 to 4. Thanks to mgz-dev!
|
||||||
- Currently the metadata is not copied. This will be fixed in the near future.
|
- ``--help`` option shows usage.
|
||||||
|
- Currently the metadata is not copied. This will be fixed in the near future.
|
||||||
* 2023/02/03 (v20.6.0)
|
* 2023/02/03 (v20.6.0)
|
||||||
- Increase max LoRA rank (dim) size to 1024.
|
- Increase max LoRA rank (dim) size to 1024.
|
||||||
- Update finetune preprocessing scripts.
|
- Update finetune preprocessing scripts.
|
||||||
|
@ -82,8 +82,12 @@ def save_configuration(
|
|||||||
max_data_loader_n_workers,
|
max_data_loader_n_workers,
|
||||||
mem_eff_attn,
|
mem_eff_attn,
|
||||||
gradient_accumulation_steps,
|
gradient_accumulation_steps,
|
||||||
model_list, keep_tokens,
|
model_list,
|
||||||
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
):
|
):
|
||||||
# Get list of function parameters and values
|
# Get list of function parameters and values
|
||||||
parameters = list(locals().items())
|
parameters = list(locals().items())
|
||||||
@ -167,8 +171,12 @@ def open_configuration(
|
|||||||
max_data_loader_n_workers,
|
max_data_loader_n_workers,
|
||||||
mem_eff_attn,
|
mem_eff_attn,
|
||||||
gradient_accumulation_steps,
|
gradient_accumulation_steps,
|
||||||
model_list, keep_tokens,
|
model_list,
|
||||||
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
):
|
):
|
||||||
# Get list of function parameters and values
|
# Get list of function parameters and values
|
||||||
parameters = list(locals().items())
|
parameters = list(locals().items())
|
||||||
@ -239,6 +247,9 @@ def train_model(
|
|||||||
model_list, # Keep this. Yes, it is unused here but required given the common list used
|
model_list, # Keep this. Yes, it is unused here but required given the common list used
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
):
|
):
|
||||||
if pretrained_model_name_or_path == '':
|
if pretrained_model_name_or_path == '':
|
||||||
msgbox('Source model information is missing')
|
msgbox('Source model information is missing')
|
||||||
@ -402,6 +413,9 @@ def train_model(
|
|||||||
use_8bit_adam=use_8bit_adam,
|
use_8bit_adam=use_8bit_adam,
|
||||||
keep_tokens=keep_tokens,
|
keep_tokens=keep_tokens,
|
||||||
persistent_data_loader_workers=persistent_data_loader_workers,
|
persistent_data_loader_workers=persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale=bucket_no_upscale,
|
||||||
|
random_crop=random_crop,
|
||||||
|
bucket_reso_steps=bucket_reso_steps,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(run_cmd)
|
print(run_cmd)
|
||||||
@ -610,6 +624,9 @@ def dreambooth_tab(
|
|||||||
max_data_loader_n_workers,
|
max_data_loader_n_workers,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
) = gradio_advanced_training()
|
) = gradio_advanced_training()
|
||||||
color_aug.change(
|
color_aug.change(
|
||||||
color_aug_changed,
|
color_aug_changed,
|
||||||
@ -675,6 +692,9 @@ def dreambooth_tab(
|
|||||||
model_list,
|
model_list,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
]
|
]
|
||||||
|
|
||||||
button_open_config.click(
|
button_open_config.click(
|
||||||
|
@ -33,6 +33,7 @@ def train(args):
|
|||||||
train_dataset = train_util.FineTuningDataset(args.in_json, args.train_batch_size, args.train_data_dir,
|
train_dataset = train_util.FineTuningDataset(args.in_json, args.train_batch_size, args.train_data_dir,
|
||||||
tokenizer, args.max_token_length, args.shuffle_caption, args.keep_tokens,
|
tokenizer, args.max_token_length, args.shuffle_caption, args.keep_tokens,
|
||||||
args.resolution, args.enable_bucket, args.min_bucket_reso, args.max_bucket_reso,
|
args.resolution, args.enable_bucket, args.min_bucket_reso, args.max_bucket_reso,
|
||||||
|
args.bucket_reso_steps, args.bucket_no_upscale,
|
||||||
args.flip_aug, args.color_aug, args.face_crop_aug_range, args.random_crop,
|
args.flip_aug, args.color_aug, args.face_crop_aug_range, args.random_crop,
|
||||||
args.dataset_repeats, args.debug_dataset)
|
args.dataset_repeats, args.debug_dataset)
|
||||||
train_dataset.make_buckets()
|
train_dataset.make_buckets()
|
||||||
@ -163,7 +164,7 @@ def train(args):
|
|||||||
# DataLoaderのプロセス数:0はメインプロセスになる
|
# DataLoaderのプロセス数:0はメインプロセスになる
|
||||||
n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで
|
n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで
|
||||||
train_dataloader = torch.utils.data.DataLoader(
|
train_dataloader = torch.utils.data.DataLoader(
|
||||||
train_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn, num_workers=n_workers)
|
train_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn, num_workers=n_workers, persistent_workers=args.persistent_data_loader_workers)
|
||||||
|
|
||||||
# 学習ステップ数を計算する
|
# 学習ステップ数を計算する
|
||||||
if args.max_train_epochs is not None:
|
if args.max_train_epochs is not None:
|
||||||
@ -200,6 +201,8 @@ def train(args):
|
|||||||
# epoch数を計算する
|
# epoch数を計算する
|
||||||
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
|
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
|
||||||
num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
|
num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
|
||||||
|
if (args.save_n_epoch_ratio is not None) and (args.save_n_epoch_ratio > 0):
|
||||||
|
args.save_every_n_epochs = math.floor(num_train_epochs / args.save_n_epoch_ratio) or 1
|
||||||
|
|
||||||
# 学習する
|
# 学習する
|
||||||
total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
|
total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
|
||||||
|
@ -52,6 +52,10 @@ def get_npz_filename_wo_ext(data_dir, image_key, is_full_path, flip):
|
|||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
|
# assert args.bucket_reso_steps % 8 == 0, f"bucket_reso_steps must be divisible by 8 / bucket_reso_stepは8で割り切れる必要があります"
|
||||||
|
if args.bucket_reso_steps % 8 > 0:
|
||||||
|
print(f"resolution of buckets in training time is a multiple of 8 / 学習時の各bucketの解像度は8単位になります")
|
||||||
|
|
||||||
image_paths = train_util.glob_images(args.train_data_dir)
|
image_paths = train_util.glob_images(args.train_data_dir)
|
||||||
print(f"found {len(image_paths)} images.")
|
print(f"found {len(image_paths)} images.")
|
||||||
|
|
||||||
@ -77,32 +81,41 @@ def main(args):
|
|||||||
max_reso = tuple([int(t) for t in args.max_resolution.split(',')])
|
max_reso = tuple([int(t) for t in args.max_resolution.split(',')])
|
||||||
assert len(max_reso) == 2, f"illegal resolution (not 'width,height') / 画像サイズに誤りがあります。'幅,高さ'で指定してください: {args.max_resolution}"
|
assert len(max_reso) == 2, f"illegal resolution (not 'width,height') / 画像サイズに誤りがあります。'幅,高さ'で指定してください: {args.max_resolution}"
|
||||||
|
|
||||||
bucket_resos, bucket_aspect_ratios = model_util.make_bucket_resolutions(
|
bucket_manager = train_util.BucketManager(args.bucket_no_upscale, max_reso,
|
||||||
max_reso, args.min_bucket_reso, args.max_bucket_reso)
|
args.min_bucket_reso, args.max_bucket_reso, args.bucket_reso_steps)
|
||||||
|
if not args.bucket_no_upscale:
|
||||||
|
bucket_manager.make_buckets()
|
||||||
|
else:
|
||||||
|
print("min_bucket_reso and max_bucket_reso are ignored if bucket_no_upscale is set, because bucket reso is defined by image size automatically / bucket_no_upscaleが指定された場合は、bucketの解像度は画像サイズから自動計算されるため、min_bucket_resoとmax_bucket_resoは無視されます")
|
||||||
|
|
||||||
# 画像をひとつずつ適切なbucketに割り当てながらlatentを計算する
|
# 画像をひとつずつ適切なbucketに割り当てながらlatentを計算する
|
||||||
bucket_aspect_ratios = np.array(bucket_aspect_ratios)
|
|
||||||
buckets_imgs = [[] for _ in range(len(bucket_resos))]
|
|
||||||
bucket_counts = [0 for _ in range(len(bucket_resos))]
|
|
||||||
img_ar_errors = []
|
img_ar_errors = []
|
||||||
|
|
||||||
def process_batch(is_last):
|
def process_batch(is_last):
|
||||||
for j in range(len(buckets_imgs)):
|
for bucket in bucket_manager.buckets:
|
||||||
bucket = buckets_imgs[j]
|
|
||||||
if (is_last and len(bucket) > 0) or len(bucket) >= args.batch_size:
|
if (is_last and len(bucket) > 0) or len(bucket) >= args.batch_size:
|
||||||
latents = get_latents(vae, [img for _, _, img in bucket], weight_dtype)
|
latents = get_latents(vae, [img for _, img in bucket], weight_dtype)
|
||||||
|
assert latents.shape[2] == bucket[0][1].shape[0] // 8 and latents.shape[3] == bucket[0][1].shape[1] // 8, \
|
||||||
|
f"latent shape {latents.shape}, {bucket[0][1].shape}"
|
||||||
|
|
||||||
for (image_key, _, _), latent in zip(bucket, latents):
|
for (image_key, _), latent in zip(bucket, latents):
|
||||||
npz_file_name = get_npz_filename_wo_ext(args.train_data_dir, image_key, args.full_path, False)
|
npz_file_name = get_npz_filename_wo_ext(args.train_data_dir, image_key, args.full_path, False)
|
||||||
np.savez(npz_file_name, latent)
|
np.savez(npz_file_name, latent)
|
||||||
|
|
||||||
# flip
|
# flip
|
||||||
if args.flip_aug:
|
if args.flip_aug:
|
||||||
latents = get_latents(vae, [img[:, ::-1].copy() for _, _, img in bucket], weight_dtype) # copyがないとTensor変換できない
|
latents = get_latents(vae, [img[:, ::-1].copy() for _, img in bucket], weight_dtype) # copyがないとTensor変換できない
|
||||||
|
|
||||||
for (image_key, _, _), latent in zip(bucket, latents):
|
for (image_key, _), latent in zip(bucket, latents):
|
||||||
npz_file_name = get_npz_filename_wo_ext(args.train_data_dir, image_key, args.full_path, True)
|
npz_file_name = get_npz_filename_wo_ext(args.train_data_dir, image_key, args.full_path, True)
|
||||||
np.savez(npz_file_name, latent)
|
np.savez(npz_file_name, latent)
|
||||||
|
else:
|
||||||
|
# remove existing flipped npz
|
||||||
|
for image_key, _ in bucket:
|
||||||
|
npz_file_name = get_npz_filename_wo_ext(args.train_data_dir, image_key, args.full_path, True) + ".npz"
|
||||||
|
if os.path.isfile(npz_file_name):
|
||||||
|
print(f"remove existing flipped npz / 既存のflipされたnpzファイルを削除します: {npz_file_name}")
|
||||||
|
os.remove(npz_file_name)
|
||||||
|
|
||||||
bucket.clear()
|
bucket.clear()
|
||||||
|
|
||||||
@ -114,6 +127,7 @@ def main(args):
|
|||||||
else:
|
else:
|
||||||
data = [[(None, ip)] for ip in image_paths]
|
data = [[(None, ip)] for ip in image_paths]
|
||||||
|
|
||||||
|
bucket_counts = {}
|
||||||
for data_entry in tqdm(data, smoothing=0.0):
|
for data_entry in tqdm(data, smoothing=0.0):
|
||||||
if data_entry[0] is None:
|
if data_entry[0] is None:
|
||||||
continue
|
continue
|
||||||
@ -134,29 +148,24 @@ def main(args):
|
|||||||
if image_key not in metadata:
|
if image_key not in metadata:
|
||||||
metadata[image_key] = {}
|
metadata[image_key] = {}
|
||||||
|
|
||||||
# 本当はこの部分もDataSetに持っていけば高速化できるがいろいろ大変
|
# 本当はこのあとの部分もDataSetに持っていけば高速化できるがいろいろ大変
|
||||||
aspect_ratio = image.width / image.height
|
|
||||||
ar_errors = bucket_aspect_ratios - aspect_ratio
|
reso, resized_size, ar_error = bucket_manager.select_bucket(image.width, image.height)
|
||||||
bucket_id = np.abs(ar_errors).argmin()
|
|
||||||
reso = bucket_resos[bucket_id]
|
|
||||||
ar_error = ar_errors[bucket_id]
|
|
||||||
img_ar_errors.append(abs(ar_error))
|
img_ar_errors.append(abs(ar_error))
|
||||||
|
bucket_counts[reso] = bucket_counts.get(reso, 0) + 1
|
||||||
|
|
||||||
# どのサイズにリサイズするか→トリミングする方向で
|
# メタデータに記録する解像度はlatent単位とするので、8単位で切り捨て
|
||||||
if ar_error <= 0: # 横が長い→縦を合わせる
|
metadata[image_key]['train_resolution'] = (reso[0] - reso[0] % 8, reso[1] - reso[1] % 8)
|
||||||
scale = reso[1] / image.height
|
|
||||||
else:
|
|
||||||
scale = reso[0] / image.width
|
|
||||||
|
|
||||||
resized_size = (int(image.width * scale + .5), int(image.height * scale + .5))
|
if not args.bucket_no_upscale:
|
||||||
|
# upscaleを行わないときには、resize後のサイズは、bucketのサイズと、縦横どちらかが同じであることを確認する
|
||||||
|
assert resized_size[0] == reso[0] or resized_size[1] == reso[
|
||||||
|
1], f"internal error, resized size not match: {reso}, {resized_size}, {image.width}, {image.height}"
|
||||||
|
assert resized_size[0] >= reso[0] and resized_size[1] >= reso[
|
||||||
|
1], f"internal error, resized size too small: {reso}, {resized_size}, {image.width}, {image.height}"
|
||||||
|
|
||||||
# print(image.width, image.height, bucket_id, bucket_resos[bucket_id], ar_errors[bucket_id], resized_size,
|
|
||||||
# bucket_resos[bucket_id][0] - resized_size[0], bucket_resos[bucket_id][1] - resized_size[1])
|
|
||||||
|
|
||||||
assert resized_size[0] == reso[0] or resized_size[1] == reso[
|
|
||||||
1], f"internal error, resized size not match: {reso}, {resized_size}, {image.width}, {image.height}"
|
|
||||||
assert resized_size[0] >= reso[0] and resized_size[1] >= reso[
|
assert resized_size[0] >= reso[0] and resized_size[1] >= reso[
|
||||||
1], f"internal error, resized size too small: {reso}, {resized_size}, {image.width}, {image.height}"
|
1], f"internal error resized size is small: {resized_size}, {reso}"
|
||||||
|
|
||||||
# 既に存在するファイルがあればshapeを確認して同じならskipする
|
# 既に存在するファイルがあればshapeを確認して同じならskipする
|
||||||
if args.skip_existing:
|
if args.skip_existing:
|
||||||
@ -180,22 +189,24 @@ def main(args):
|
|||||||
# 画像をリサイズしてトリミングする
|
# 画像をリサイズしてトリミングする
|
||||||
# PILにinter_areaがないのでcv2で……
|
# PILにinter_areaがないのでcv2で……
|
||||||
image = np.array(image)
|
image = np.array(image)
|
||||||
image = cv2.resize(image, resized_size, interpolation=cv2.INTER_AREA)
|
if resized_size[0] != image.shape[1] or resized_size[1] != image.shape[0]: # リサイズ処理が必要?
|
||||||
|
image = cv2.resize(image, resized_size, interpolation=cv2.INTER_AREA)
|
||||||
|
|
||||||
if resized_size[0] > reso[0]:
|
if resized_size[0] > reso[0]:
|
||||||
trim_size = resized_size[0] - reso[0]
|
trim_size = resized_size[0] - reso[0]
|
||||||
image = image[:, trim_size//2:trim_size//2 + reso[0]]
|
image = image[:, trim_size//2:trim_size//2 + reso[0]]
|
||||||
elif resized_size[1] > reso[1]:
|
|
||||||
|
if resized_size[1] > reso[1]:
|
||||||
trim_size = resized_size[1] - reso[1]
|
trim_size = resized_size[1] - reso[1]
|
||||||
image = image[trim_size//2:trim_size//2 + reso[1]]
|
image = image[trim_size//2:trim_size//2 + reso[1]]
|
||||||
|
|
||||||
assert image.shape[0] == reso[1] and image.shape[1] == reso[0], f"internal error, illegal trimmed size: {image.shape}, {reso}"
|
assert image.shape[0] == reso[1] and image.shape[1] == reso[0], f"internal error, illegal trimmed size: {image.shape}, {reso}"
|
||||||
|
|
||||||
# # debug
|
# # debug
|
||||||
# cv2.imwrite(f"r:\\test\\img_{i:05d}.jpg", image[:, :, ::-1])
|
# cv2.imwrite(f"r:\\test\\img_{len(img_ar_errors)}.jpg", image[:, :, ::-1])
|
||||||
|
|
||||||
# バッチへ追加
|
# バッチへ追加
|
||||||
buckets_imgs[bucket_id].append((image_key, reso, image))
|
bucket_manager.add_image(reso, (image_key, image))
|
||||||
bucket_counts[bucket_id] += 1
|
|
||||||
metadata[image_key]['train_resolution'] = reso
|
|
||||||
|
|
||||||
# バッチを推論するか判定して推論する
|
# バッチを推論するか判定して推論する
|
||||||
process_batch(False)
|
process_batch(False)
|
||||||
@ -203,8 +214,11 @@ def main(args):
|
|||||||
# 残りを処理する
|
# 残りを処理する
|
||||||
process_batch(True)
|
process_batch(True)
|
||||||
|
|
||||||
for i, (reso, count) in enumerate(zip(bucket_resos, bucket_counts)):
|
bucket_manager.sort()
|
||||||
print(f"bucket {i} {reso}: {count}")
|
for i, reso in enumerate(bucket_manager.resos):
|
||||||
|
count = bucket_counts.get(reso, 0)
|
||||||
|
if count > 0:
|
||||||
|
print(f"bucket {i} {reso}: {count}")
|
||||||
img_ar_errors = np.array(img_ar_errors)
|
img_ar_errors = np.array(img_ar_errors)
|
||||||
print(f"mean ar error: {np.mean(img_ar_errors)}")
|
print(f"mean ar error: {np.mean(img_ar_errors)}")
|
||||||
|
|
||||||
@ -230,6 +244,10 @@ if __name__ == '__main__':
|
|||||||
help="max resolution in fine tuning (width,height) / fine tuning時の最大画像サイズ 「幅,高さ」(使用メモリ量に関係します)")
|
help="max resolution in fine tuning (width,height) / fine tuning時の最大画像サイズ 「幅,高さ」(使用メモリ量に関係します)")
|
||||||
parser.add_argument("--min_bucket_reso", type=int, default=256, help="minimum resolution for buckets / bucketの最小解像度")
|
parser.add_argument("--min_bucket_reso", type=int, default=256, help="minimum resolution for buckets / bucketの最小解像度")
|
||||||
parser.add_argument("--max_bucket_reso", type=int, default=1024, help="maximum resolution for buckets / bucketの最小解像度")
|
parser.add_argument("--max_bucket_reso", type=int, default=1024, help="maximum resolution for buckets / bucketの最小解像度")
|
||||||
|
parser.add_argument("--bucket_reso_steps", type=int, default=64,
|
||||||
|
help="steps of resolution for buckets, divisible by 8 is recommended / bucketの解像度の単位、8で割り切れる値を推奨します")
|
||||||
|
parser.add_argument("--bucket_no_upscale", action="store_true",
|
||||||
|
help="make bucket for each image without upscaling / 画像を拡大せずbucketを作成します")
|
||||||
parser.add_argument("--mixed_precision", type=str, default="no",
|
parser.add_argument("--mixed_precision", type=str, default="no",
|
||||||
choices=["no", "fp16", "bf16"], help="use mixed precision / 混合精度を使う場合、その精度")
|
choices=["no", "fp16", "bf16"], help="use mixed precision / 混合精度を使う場合、その精度")
|
||||||
parser.add_argument("--full_path", action="store_true",
|
parser.add_argument("--full_path", action="store_true",
|
||||||
|
@ -78,8 +78,12 @@ def save_configuration(
|
|||||||
color_aug,
|
color_aug,
|
||||||
model_list,
|
model_list,
|
||||||
cache_latents,
|
cache_latents,
|
||||||
use_latent_files, keep_tokens,
|
use_latent_files,
|
||||||
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
):
|
):
|
||||||
# Get list of function parameters and values
|
# Get list of function parameters and values
|
||||||
parameters = list(locals().items())
|
parameters = list(locals().items())
|
||||||
@ -169,8 +173,12 @@ def open_config_file(
|
|||||||
color_aug,
|
color_aug,
|
||||||
model_list,
|
model_list,
|
||||||
cache_latents,
|
cache_latents,
|
||||||
use_latent_files, keep_tokens,
|
use_latent_files,
|
||||||
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
):
|
):
|
||||||
# Get list of function parameters and values
|
# Get list of function parameters and values
|
||||||
parameters = list(locals().items())
|
parameters = list(locals().items())
|
||||||
@ -245,8 +253,12 @@ def train_model(
|
|||||||
color_aug,
|
color_aug,
|
||||||
model_list, # Keep this. Yes, it is unused here but required given the common list used
|
model_list, # Keep this. Yes, it is unused here but required given the common list used
|
||||||
cache_latents,
|
cache_latents,
|
||||||
use_latent_files, keep_tokens,
|
use_latent_files,
|
||||||
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
):
|
):
|
||||||
# create caption json file
|
# create caption json file
|
||||||
if generate_caption_database:
|
if generate_caption_database:
|
||||||
@ -295,7 +307,11 @@ def train_model(
|
|||||||
subprocess.run(run_cmd)
|
subprocess.run(run_cmd)
|
||||||
|
|
||||||
image_num = len(
|
image_num = len(
|
||||||
[f for f in os.listdir(image_folder) if f.endswith('.jpg') or f.endswith('.png') or f.endswith('.webp')]
|
[
|
||||||
|
f
|
||||||
|
for f in os.listdir(image_folder)
|
||||||
|
if f.endswith('.jpg') or f.endswith('.png') or f.endswith('.webp')
|
||||||
|
]
|
||||||
)
|
)
|
||||||
print(f'image_num = {image_num}')
|
print(f'image_num = {image_num}')
|
||||||
|
|
||||||
@ -386,6 +402,9 @@ def train_model(
|
|||||||
use_8bit_adam=use_8bit_adam,
|
use_8bit_adam=use_8bit_adam,
|
||||||
keep_tokens=keep_tokens,
|
keep_tokens=keep_tokens,
|
||||||
persistent_data_loader_workers=persistent_data_loader_workers,
|
persistent_data_loader_workers=persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale=bucket_no_upscale,
|
||||||
|
random_crop=random_crop,
|
||||||
|
bucket_reso_steps=bucket_reso_steps,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(run_cmd)
|
print(run_cmd)
|
||||||
@ -592,6 +611,9 @@ def finetune_tab():
|
|||||||
max_data_loader_n_workers,
|
max_data_loader_n_workers,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
) = gradio_advanced_training()
|
) = gradio_advanced_training()
|
||||||
color_aug.change(
|
color_aug.change(
|
||||||
color_aug_changed,
|
color_aug_changed,
|
||||||
@ -653,6 +675,9 @@ def finetune_tab():
|
|||||||
use_latent_files,
|
use_latent_files,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
]
|
]
|
||||||
|
|
||||||
button_run.click(train_model, inputs=settings_list)
|
button_run.click(train_model, inputs=settings_list)
|
||||||
|
@ -19,7 +19,7 @@ def UI(username, password):
|
|||||||
print('Load CSS...')
|
print('Load CSS...')
|
||||||
css += file.read() + '\n'
|
css += file.read() + '\n'
|
||||||
|
|
||||||
interface = gr.Blocks(css=css, title="Kohya_ss GUI")
|
interface = gr.Blocks(css=css, title='Kohya_ss GUI')
|
||||||
|
|
||||||
with interface:
|
with interface:
|
||||||
with gr.Tab('Dreambooth'):
|
with gr.Tab('Dreambooth'):
|
||||||
|
@ -10,13 +10,15 @@ def caption_images(
|
|||||||
overwrite_input,
|
overwrite_input,
|
||||||
caption_file_ext,
|
caption_file_ext,
|
||||||
prefix,
|
prefix,
|
||||||
postfix, find, replace
|
postfix,
|
||||||
|
find,
|
||||||
|
replace,
|
||||||
):
|
):
|
||||||
# Check for images_dir_input
|
# Check for images_dir_input
|
||||||
if images_dir_input == '':
|
if images_dir_input == '':
|
||||||
msgbox('Image folder is missing...')
|
msgbox('Image folder is missing...')
|
||||||
return
|
return
|
||||||
|
|
||||||
if caption_file_ext == '':
|
if caption_file_ext == '':
|
||||||
msgbox('Please provide an extension for the caption files.')
|
msgbox('Please provide an extension for the caption files.')
|
||||||
return
|
return
|
||||||
@ -39,7 +41,7 @@ def caption_images(
|
|||||||
subprocess.run(run_cmd)
|
subprocess.run(run_cmd)
|
||||||
|
|
||||||
if overwrite_input:
|
if overwrite_input:
|
||||||
if not prefix=='' or not postfix=='':
|
if not prefix == '' or not postfix == '':
|
||||||
# Add prefix and postfix
|
# Add prefix and postfix
|
||||||
add_pre_postfix(
|
add_pre_postfix(
|
||||||
folder=images_dir_input,
|
folder=images_dir_input,
|
||||||
@ -47,7 +49,7 @@ def caption_images(
|
|||||||
prefix=prefix,
|
prefix=prefix,
|
||||||
postfix=postfix,
|
postfix=postfix,
|
||||||
)
|
)
|
||||||
if not find=='':
|
if not find == '':
|
||||||
find_replace(
|
find_replace(
|
||||||
folder=images_dir_input,
|
folder=images_dir_input,
|
||||||
caption_file_ext=caption_file_ext,
|
caption_file_ext=caption_file_ext,
|
||||||
@ -134,6 +136,7 @@ def gradio_basic_caption_gui_tab():
|
|||||||
caption_file_ext,
|
caption_file_ext,
|
||||||
prefix,
|
prefix,
|
||||||
postfix,
|
postfix,
|
||||||
find, replace
|
find,
|
||||||
|
replace,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -26,7 +26,7 @@ def caption_images(
|
|||||||
if train_data_dir == '':
|
if train_data_dir == '':
|
||||||
msgbox('Image folder is missing...')
|
msgbox('Image folder is missing...')
|
||||||
return
|
return
|
||||||
|
|
||||||
if caption_file_ext == '':
|
if caption_file_ext == '':
|
||||||
msgbox('Please provide an extension for the caption files.')
|
msgbox('Please provide an extension for the caption files.')
|
||||||
return
|
return
|
||||||
|
@ -9,6 +9,7 @@ refresh_symbol = '\U0001f504' # 🔄
|
|||||||
save_style_symbol = '\U0001f4be' # 💾
|
save_style_symbol = '\U0001f4be' # 💾
|
||||||
document_symbol = '\U0001F4C4' # 📄
|
document_symbol = '\U0001F4C4' # 📄
|
||||||
|
|
||||||
|
|
||||||
def get_dir_and_file(file_path):
|
def get_dir_and_file(file_path):
|
||||||
dir_path, file_name = os.path.split(file_path)
|
dir_path, file_name = os.path.split(file_path)
|
||||||
return (dir_path, file_name)
|
return (dir_path, file_name)
|
||||||
@ -200,7 +201,7 @@ def find_replace(folder='', caption_file_ext='.caption', find='', replace=''):
|
|||||||
|
|
||||||
files = [f for f in os.listdir(folder) if f.endswith(caption_file_ext)]
|
files = [f for f in os.listdir(folder) if f.endswith(caption_file_ext)]
|
||||||
for file in files:
|
for file in files:
|
||||||
with open(os.path.join(folder, file), 'r', errors="ignore") as f:
|
with open(os.path.join(folder, file), 'r', errors='ignore') as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
f.close
|
f.close
|
||||||
content = content.replace(find, replace)
|
content = content.replace(find, replace)
|
||||||
@ -304,7 +305,8 @@ def set_pretrained_model_name_or_path_input(value, v2, v_parameterization):
|
|||||||
###
|
###
|
||||||
### Gradio common GUI section
|
### Gradio common GUI section
|
||||||
###
|
###
|
||||||
|
|
||||||
|
|
||||||
def gradio_config():
|
def gradio_config():
|
||||||
with gr.Accordion('Configuration file', open=False):
|
with gr.Accordion('Configuration file', open=False):
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
@ -318,7 +320,13 @@ def gradio_config():
|
|||||||
placeholder="type the configuration file path or use the 'Open' button above to select it...",
|
placeholder="type the configuration file path or use the 'Open' button above to select it...",
|
||||||
interactive=True,
|
interactive=True,
|
||||||
)
|
)
|
||||||
return (button_open_config, button_save_config, button_save_as_config, config_file_name)
|
return (
|
||||||
|
button_open_config,
|
||||||
|
button_save_config,
|
||||||
|
button_save_as_config,
|
||||||
|
config_file_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def gradio_source_model():
|
def gradio_source_model():
|
||||||
with gr.Tab('Source model'):
|
with gr.Tab('Source model'):
|
||||||
@ -382,9 +390,20 @@ def gradio_source_model():
|
|||||||
v_parameterization,
|
v_parameterization,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
return (pretrained_model_name_or_path, v2, v_parameterization, save_model_as, model_list)
|
return (
|
||||||
|
pretrained_model_name_or_path,
|
||||||
|
v2,
|
||||||
|
v_parameterization,
|
||||||
|
save_model_as,
|
||||||
|
model_list,
|
||||||
|
)
|
||||||
|
|
||||||
def gradio_training(learning_rate_value='1e-6', lr_scheduler_value='constant', lr_warmup_value='0'):
|
|
||||||
|
def gradio_training(
|
||||||
|
learning_rate_value='1e-6',
|
||||||
|
lr_scheduler_value='constant',
|
||||||
|
lr_warmup_value='0',
|
||||||
|
):
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
train_batch_size = gr.Slider(
|
train_batch_size = gr.Slider(
|
||||||
minimum=1,
|
minimum=1,
|
||||||
@ -394,9 +413,7 @@ def gradio_training(learning_rate_value='1e-6', lr_scheduler_value='constant', l
|
|||||||
step=1,
|
step=1,
|
||||||
)
|
)
|
||||||
epoch = gr.Textbox(label='Epoch', value=1)
|
epoch = gr.Textbox(label='Epoch', value=1)
|
||||||
save_every_n_epochs = gr.Textbox(
|
save_every_n_epochs = gr.Textbox(label='Save every N epochs', value=1)
|
||||||
label='Save every N epochs', value=1
|
|
||||||
)
|
|
||||||
caption_extension = gr.Textbox(
|
caption_extension = gr.Textbox(
|
||||||
label='Caption Extension',
|
label='Caption Extension',
|
||||||
placeholder='(Optional) Extension for caption files. default: .caption',
|
placeholder='(Optional) Extension for caption files. default: .caption',
|
||||||
@ -429,7 +446,9 @@ def gradio_training(learning_rate_value='1e-6', lr_scheduler_value='constant', l
|
|||||||
)
|
)
|
||||||
seed = gr.Textbox(label='Seed', value=1234)
|
seed = gr.Textbox(label='Seed', value=1234)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
learning_rate = gr.Textbox(label='Learning rate', value=learning_rate_value)
|
learning_rate = gr.Textbox(
|
||||||
|
label='Learning rate', value=learning_rate_value
|
||||||
|
)
|
||||||
lr_scheduler = gr.Dropdown(
|
lr_scheduler = gr.Dropdown(
|
||||||
label='LR Scheduler',
|
label='LR Scheduler',
|
||||||
choices=[
|
choices=[
|
||||||
@ -442,7 +461,9 @@ def gradio_training(learning_rate_value='1e-6', lr_scheduler_value='constant', l
|
|||||||
],
|
],
|
||||||
value=lr_scheduler_value,
|
value=lr_scheduler_value,
|
||||||
)
|
)
|
||||||
lr_warmup = gr.Textbox(label='LR warmup (% of steps)', value=lr_warmup_value)
|
lr_warmup = gr.Textbox(
|
||||||
|
label='LR warmup (% of steps)', value=lr_warmup_value
|
||||||
|
)
|
||||||
cache_latents = gr.Checkbox(label='Cache latent', value=True)
|
cache_latents = gr.Checkbox(label='Cache latent', value=True)
|
||||||
return (
|
return (
|
||||||
learning_rate,
|
learning_rate,
|
||||||
@ -459,50 +480,38 @@ def gradio_training(learning_rate_value='1e-6', lr_scheduler_value='constant', l
|
|||||||
cache_latents,
|
cache_latents,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def run_cmd_training(**kwargs):
|
def run_cmd_training(**kwargs):
|
||||||
options = [
|
options = [
|
||||||
f' --learning_rate="{kwargs.get("learning_rate", "")}"'
|
f' --learning_rate="{kwargs.get("learning_rate", "")}"'
|
||||||
if kwargs.get('learning_rate')
|
if kwargs.get('learning_rate')
|
||||||
else '',
|
else '',
|
||||||
|
|
||||||
f' --lr_scheduler="{kwargs.get("lr_scheduler", "")}"'
|
f' --lr_scheduler="{kwargs.get("lr_scheduler", "")}"'
|
||||||
if kwargs.get('lr_scheduler')
|
if kwargs.get('lr_scheduler')
|
||||||
else '',
|
else '',
|
||||||
|
|
||||||
f' --lr_warmup_steps="{kwargs.get("lr_warmup_steps", "")}"'
|
f' --lr_warmup_steps="{kwargs.get("lr_warmup_steps", "")}"'
|
||||||
if kwargs.get('lr_warmup_steps')
|
if kwargs.get('lr_warmup_steps')
|
||||||
else '',
|
else '',
|
||||||
|
|
||||||
f' --train_batch_size="{kwargs.get("train_batch_size", "")}"'
|
f' --train_batch_size="{kwargs.get("train_batch_size", "")}"'
|
||||||
if kwargs.get('train_batch_size')
|
if kwargs.get('train_batch_size')
|
||||||
else '',
|
else '',
|
||||||
|
|
||||||
f' --max_train_steps="{kwargs.get("max_train_steps", "")}"'
|
f' --max_train_steps="{kwargs.get("max_train_steps", "")}"'
|
||||||
if kwargs.get('max_train_steps')
|
if kwargs.get('max_train_steps')
|
||||||
else '',
|
else '',
|
||||||
|
|
||||||
f' --save_every_n_epochs="{kwargs.get("save_every_n_epochs", "")}"'
|
f' --save_every_n_epochs="{kwargs.get("save_every_n_epochs", "")}"'
|
||||||
if kwargs.get('save_every_n_epochs')
|
if kwargs.get('save_every_n_epochs')
|
||||||
else '',
|
else '',
|
||||||
|
|
||||||
f' --mixed_precision="{kwargs.get("mixed_precision", "")}"'
|
f' --mixed_precision="{kwargs.get("mixed_precision", "")}"'
|
||||||
if kwargs.get('mixed_precision')
|
if kwargs.get('mixed_precision')
|
||||||
else '',
|
else '',
|
||||||
|
|
||||||
f' --save_precision="{kwargs.get("save_precision", "")}"'
|
f' --save_precision="{kwargs.get("save_precision", "")}"'
|
||||||
if kwargs.get('save_precision')
|
if kwargs.get('save_precision')
|
||||||
else '',
|
else '',
|
||||||
|
f' --seed="{kwargs.get("seed", "")}"' if kwargs.get('seed') else '',
|
||||||
f' --seed="{kwargs.get("seed", "")}"'
|
|
||||||
if kwargs.get('seed')
|
|
||||||
else '',
|
|
||||||
|
|
||||||
f' --caption_extension="{kwargs.get("caption_extension", "")}"'
|
f' --caption_extension="{kwargs.get("caption_extension", "")}"'
|
||||||
if kwargs.get('caption_extension')
|
if kwargs.get('caption_extension')
|
||||||
else '',
|
else '',
|
||||||
|
|
||||||
' --cache_latents' if kwargs.get('cache_latents') else '',
|
' --cache_latents' if kwargs.get('cache_latents') else '',
|
||||||
|
|
||||||
]
|
]
|
||||||
run_cmd = ''.join(options)
|
run_cmd = ''.join(options)
|
||||||
return run_cmd
|
return run_cmd
|
||||||
@ -532,9 +541,7 @@ def gradio_advanced_training():
|
|||||||
gradient_checkpointing = gr.Checkbox(
|
gradient_checkpointing = gr.Checkbox(
|
||||||
label='Gradient checkpointing', value=False
|
label='Gradient checkpointing', value=False
|
||||||
)
|
)
|
||||||
shuffle_caption = gr.Checkbox(
|
shuffle_caption = gr.Checkbox(label='Shuffle caption', value=False)
|
||||||
label='Shuffle caption', value=False
|
|
||||||
)
|
|
||||||
persistent_data_loader_workers = gr.Checkbox(
|
persistent_data_loader_workers = gr.Checkbox(
|
||||||
label='Persistent data loader', value=False
|
label='Persistent data loader', value=False
|
||||||
)
|
)
|
||||||
@ -544,10 +551,18 @@ def gradio_advanced_training():
|
|||||||
with gr.Row():
|
with gr.Row():
|
||||||
use_8bit_adam = gr.Checkbox(label='Use 8bit adam', value=True)
|
use_8bit_adam = gr.Checkbox(label='Use 8bit adam', value=True)
|
||||||
xformers = gr.Checkbox(label='Use xformers', value=True)
|
xformers = gr.Checkbox(label='Use xformers', value=True)
|
||||||
color_aug = gr.Checkbox(
|
color_aug = gr.Checkbox(label='Color augmentation', value=False)
|
||||||
label='Color augmentation', value=False
|
|
||||||
)
|
|
||||||
flip_aug = gr.Checkbox(label='Flip augmentation', value=False)
|
flip_aug = gr.Checkbox(label='Flip augmentation', value=False)
|
||||||
|
with gr.Row():
|
||||||
|
bucket_no_upscale = gr.Checkbox(
|
||||||
|
label="Don't upscale bucket resolution", value=True
|
||||||
|
)
|
||||||
|
bucket_reso_steps = gr.Number(
|
||||||
|
label='Bucket resolution steps', value=64
|
||||||
|
)
|
||||||
|
random_crop = gr.Checkbox(
|
||||||
|
label='Random crop instead of center crop', value=False
|
||||||
|
)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
save_state = gr.Checkbox(label='Save training state', value=False)
|
save_state = gr.Checkbox(label='Save training state', value=False)
|
||||||
resume = gr.Textbox(
|
resume = gr.Textbox(
|
||||||
@ -581,55 +596,53 @@ def gradio_advanced_training():
|
|||||||
max_data_loader_n_workers,
|
max_data_loader_n_workers,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def run_cmd_advanced_training(**kwargs):
|
def run_cmd_advanced_training(**kwargs):
|
||||||
options = [
|
options = [
|
||||||
f' --max_train_epochs="{kwargs.get("max_train_epochs", "")}"'
|
f' --max_train_epochs="{kwargs.get("max_train_epochs", "")}"'
|
||||||
if kwargs.get('max_train_epochs')
|
if kwargs.get('max_train_epochs')
|
||||||
else '',
|
else '',
|
||||||
|
|
||||||
f' --max_data_loader_n_workers="{kwargs.get("max_data_loader_n_workers", "")}"'
|
f' --max_data_loader_n_workers="{kwargs.get("max_data_loader_n_workers", "")}"'
|
||||||
if kwargs.get('max_data_loader_n_workers')
|
if kwargs.get('max_data_loader_n_workers')
|
||||||
else '',
|
else '',
|
||||||
|
|
||||||
f' --max_token_length={kwargs.get("max_token_length", "")}'
|
f' --max_token_length={kwargs.get("max_token_length", "")}'
|
||||||
if int(kwargs.get('max_token_length', 75)) > 75
|
if int(kwargs.get('max_token_length', 75)) > 75
|
||||||
else '',
|
else '',
|
||||||
|
|
||||||
f' --clip_skip={kwargs.get("clip_skip", "")}'
|
f' --clip_skip={kwargs.get("clip_skip", "")}'
|
||||||
if int(kwargs.get('clip_skip', 1)) > 1
|
if int(kwargs.get('clip_skip', 1)) > 1
|
||||||
else '',
|
else '',
|
||||||
|
|
||||||
f' --resume="{kwargs.get("resume", "")}"'
|
f' --resume="{kwargs.get("resume", "")}"'
|
||||||
if kwargs.get('resume')
|
if kwargs.get('resume')
|
||||||
else '',
|
else '',
|
||||||
|
|
||||||
f' --keep_tokens="{kwargs.get("keep_tokens", "")}"'
|
f' --keep_tokens="{kwargs.get("keep_tokens", "")}"'
|
||||||
if int(kwargs.get('keep_tokens', 0)) > 0
|
if int(kwargs.get('keep_tokens', 0)) > 0
|
||||||
else '',
|
else '',
|
||||||
|
|
||||||
|
f' --bucket_reso_steps={int(kwargs.get("bucket_reso_steps", 1))}'
|
||||||
|
if int(kwargs.get('bucket_reso_steps', 64)) >= 1
|
||||||
|
else '',
|
||||||
|
|
||||||
' --save_state' if kwargs.get('save_state') else '',
|
' --save_state' if kwargs.get('save_state') else '',
|
||||||
|
|
||||||
' --mem_eff_attn' if kwargs.get('mem_eff_attn') else '',
|
' --mem_eff_attn' if kwargs.get('mem_eff_attn') else '',
|
||||||
|
|
||||||
' --color_aug' if kwargs.get('color_aug') else '',
|
' --color_aug' if kwargs.get('color_aug') else '',
|
||||||
|
|
||||||
' --flip_aug' if kwargs.get('flip_aug') else '',
|
' --flip_aug' if kwargs.get('flip_aug') else '',
|
||||||
|
|
||||||
' --shuffle_caption' if kwargs.get('shuffle_caption') else '',
|
' --shuffle_caption' if kwargs.get('shuffle_caption') else '',
|
||||||
|
' --gradient_checkpointing'
|
||||||
' --gradient_checkpointing' if kwargs.get('gradient_checkpointing') else '',
|
if kwargs.get('gradient_checkpointing')
|
||||||
|
else '',
|
||||||
' --full_fp16' if kwargs.get('full_fp16') else '',
|
' --full_fp16' if kwargs.get('full_fp16') else '',
|
||||||
|
|
||||||
' --xformers' if kwargs.get('xformers') else '',
|
' --xformers' if kwargs.get('xformers') else '',
|
||||||
|
|
||||||
' --use_8bit_adam' if kwargs.get('use_8bit_adam') else '',
|
' --use_8bit_adam' if kwargs.get('use_8bit_adam') else '',
|
||||||
|
' --persistent_data_loader_workers'
|
||||||
' --persistent_data_loader_workers' if kwargs.get('persistent_data_loader_workers') else '',
|
if kwargs.get('persistent_data_loader_workers')
|
||||||
|
else '',
|
||||||
|
' --bucket_no_upscale' if kwargs.get('bucket_no_upscale') else '',
|
||||||
|
' --random_crop' if kwargs.get('random_crop') else '',
|
||||||
]
|
]
|
||||||
run_cmd = ''.join(options)
|
run_cmd = ''.join(options)
|
||||||
return run_cmd
|
return run_cmd
|
||||||
|
|
||||||
|
@ -191,9 +191,7 @@ def gradio_dreambooth_folder_creation_tab(
|
|||||||
util_training_dir_output,
|
util_training_dir_output,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
button_copy_info_to_Folders_tab = gr.Button(
|
button_copy_info_to_Folders_tab = gr.Button('Copy info to Folders Tab')
|
||||||
'Copy info to Folders Tab'
|
|
||||||
)
|
|
||||||
button_copy_info_to_Folders_tab.click(
|
button_copy_info_to_Folders_tab.click(
|
||||||
copy_info_to_Folders_tab,
|
copy_info_to_Folders_tab,
|
||||||
inputs=[util_training_dir_output],
|
inputs=[util_training_dir_output],
|
||||||
|
@ -2,7 +2,11 @@ import gradio as gr
|
|||||||
from easygui import msgbox
|
from easygui import msgbox
|
||||||
import subprocess
|
import subprocess
|
||||||
import os
|
import os
|
||||||
from .common_gui import get_saveasfilename_path, get_any_file_path, get_file_path
|
from .common_gui import (
|
||||||
|
get_saveasfilename_path,
|
||||||
|
get_any_file_path,
|
||||||
|
get_file_path,
|
||||||
|
)
|
||||||
|
|
||||||
folder_symbol = '\U0001f4c2' # 📂
|
folder_symbol = '\U0001f4c2' # 📂
|
||||||
refresh_symbol = '\U0001f504' # 🔄
|
refresh_symbol = '\U0001f504' # 🔄
|
||||||
@ -11,13 +15,18 @@ document_symbol = '\U0001F4C4' # 📄
|
|||||||
|
|
||||||
|
|
||||||
def extract_lora(
|
def extract_lora(
|
||||||
model_tuned, model_org, save_to, save_precision, dim, v2,
|
model_tuned,
|
||||||
|
model_org,
|
||||||
|
save_to,
|
||||||
|
save_precision,
|
||||||
|
dim,
|
||||||
|
v2,
|
||||||
):
|
):
|
||||||
# Check for caption_text_input
|
# Check for caption_text_input
|
||||||
if model_tuned == '':
|
if model_tuned == '':
|
||||||
msgbox('Invalid finetuned model file')
|
msgbox('Invalid finetuned model file')
|
||||||
return
|
return
|
||||||
|
|
||||||
if model_org == '':
|
if model_org == '':
|
||||||
msgbox('Invalid base model file')
|
msgbox('Invalid base model file')
|
||||||
return
|
return
|
||||||
@ -26,12 +35,14 @@ def extract_lora(
|
|||||||
if not os.path.isfile(model_tuned):
|
if not os.path.isfile(model_tuned):
|
||||||
msgbox('The provided finetuned model is not a file')
|
msgbox('The provided finetuned model is not a file')
|
||||||
return
|
return
|
||||||
|
|
||||||
if not os.path.isfile(model_org):
|
if not os.path.isfile(model_org):
|
||||||
msgbox('The provided base model is not a file')
|
msgbox('The provided base model is not a file')
|
||||||
return
|
return
|
||||||
|
|
||||||
run_cmd = f'.\\venv\Scripts\python.exe "networks\extract_lora_from_models.py"'
|
run_cmd = (
|
||||||
|
f'.\\venv\Scripts\python.exe "networks\extract_lora_from_models.py"'
|
||||||
|
)
|
||||||
run_cmd += f' --save_precision {save_precision}'
|
run_cmd += f' --save_precision {save_precision}'
|
||||||
run_cmd += f' --save_to "{save_to}"'
|
run_cmd += f' --save_to "{save_to}"'
|
||||||
run_cmd += f' --model_org "{model_org}"'
|
run_cmd += f' --model_org "{model_org}"'
|
||||||
@ -60,7 +71,7 @@ def gradio_extract_lora_tab():
|
|||||||
lora_ext_name = gr.Textbox(value='LoRA model types', visible=False)
|
lora_ext_name = gr.Textbox(value='LoRA model types', visible=False)
|
||||||
model_ext = gr.Textbox(value='*.ckpt *.safetensors', visible=False)
|
model_ext = gr.Textbox(value='*.ckpt *.safetensors', visible=False)
|
||||||
model_ext_name = gr.Textbox(value='Model types', visible=False)
|
model_ext_name = gr.Textbox(value='Model types', visible=False)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
model_tuned = gr.Textbox(
|
model_tuned = gr.Textbox(
|
||||||
label='Finetuned model',
|
label='Finetuned model',
|
||||||
@ -75,7 +86,7 @@ def gradio_extract_lora_tab():
|
|||||||
inputs=[model_tuned, model_ext, model_ext_name],
|
inputs=[model_tuned, model_ext, model_ext_name],
|
||||||
outputs=model_tuned,
|
outputs=model_tuned,
|
||||||
)
|
)
|
||||||
|
|
||||||
model_org = gr.Textbox(
|
model_org = gr.Textbox(
|
||||||
label='Stable Diffusion base model',
|
label='Stable Diffusion base model',
|
||||||
placeholder='Stable Diffusion original model: ckpt or safetensors file',
|
placeholder='Stable Diffusion original model: ckpt or safetensors file',
|
||||||
@ -99,7 +110,9 @@ def gradio_extract_lora_tab():
|
|||||||
folder_symbol, elem_id='open_folder_small'
|
folder_symbol, elem_id='open_folder_small'
|
||||||
)
|
)
|
||||||
button_save_to.click(
|
button_save_to.click(
|
||||||
get_saveasfilename_path, inputs=[save_to, lora_ext, lora_ext_name], outputs=save_to
|
get_saveasfilename_path,
|
||||||
|
inputs=[save_to, lora_ext, lora_ext_name],
|
||||||
|
outputs=save_to,
|
||||||
)
|
)
|
||||||
save_precision = gr.Dropdown(
|
save_precision = gr.Dropdown(
|
||||||
label='Save precison',
|
label='Save precison',
|
||||||
@ -122,6 +135,5 @@ def gradio_extract_lora_tab():
|
|||||||
|
|
||||||
extract_button.click(
|
extract_button.click(
|
||||||
extract_lora,
|
extract_lora,
|
||||||
inputs=[model_tuned, model_org, save_to, save_precision, dim, v2
|
inputs=[model_tuned, model_org, save_to, save_precision, dim, v2],
|
||||||
],
|
|
||||||
)
|
)
|
||||||
|
@ -15,11 +15,11 @@ def caption_images(
|
|||||||
prefix,
|
prefix,
|
||||||
postfix,
|
postfix,
|
||||||
):
|
):
|
||||||
# Check for images_dir_input
|
# Check for images_dir_input
|
||||||
if train_data_dir == '':
|
if train_data_dir == '':
|
||||||
msgbox('Image folder is missing...')
|
msgbox('Image folder is missing...')
|
||||||
return
|
return
|
||||||
|
|
||||||
if caption_ext == '':
|
if caption_ext == '':
|
||||||
msgbox('Please provide an extension for the caption files.')
|
msgbox('Please provide an extension for the caption files.')
|
||||||
return
|
return
|
||||||
@ -29,7 +29,9 @@ def caption_images(
|
|||||||
if not model_id == '':
|
if not model_id == '':
|
||||||
run_cmd += f' --model_id="{model_id}"'
|
run_cmd += f' --model_id="{model_id}"'
|
||||||
run_cmd += f' --batch_size="{int(batch_size)}"'
|
run_cmd += f' --batch_size="{int(batch_size)}"'
|
||||||
run_cmd += f' --max_data_loader_n_workers="{int(max_data_loader_n_workers)}"'
|
run_cmd += (
|
||||||
|
f' --max_data_loader_n_workers="{int(max_data_loader_n_workers)}"'
|
||||||
|
)
|
||||||
run_cmd += f' --max_length="{int(max_length)}"'
|
run_cmd += f' --max_length="{int(max_length)}"'
|
||||||
if caption_ext != '':
|
if caption_ext != '':
|
||||||
run_cmd += f' --caption_extension="{caption_ext}"'
|
run_cmd += f' --caption_extension="{caption_ext}"'
|
||||||
@ -105,8 +107,9 @@ def gradio_git_caption_gui_tab():
|
|||||||
value=75, label='Max length', interactive=True
|
value=75, label='Max length', interactive=True
|
||||||
)
|
)
|
||||||
model_id = gr.Textbox(
|
model_id = gr.Textbox(
|
||||||
label="Model",
|
label='Model',
|
||||||
placeholder="(Optional) model id for GIT in Hugging Face", interactive=True
|
placeholder='(Optional) model id for GIT in Hugging Face',
|
||||||
|
interactive=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
caption_button = gr.Button('Caption images')
|
caption_button = gr.Button('Caption images')
|
||||||
|
@ -2,7 +2,11 @@ import gradio as gr
|
|||||||
from easygui import msgbox
|
from easygui import msgbox
|
||||||
import subprocess
|
import subprocess
|
||||||
import os
|
import os
|
||||||
from .common_gui import get_saveasfilename_path, get_any_file_path, get_file_path
|
from .common_gui import (
|
||||||
|
get_saveasfilename_path,
|
||||||
|
get_any_file_path,
|
||||||
|
get_file_path,
|
||||||
|
)
|
||||||
|
|
||||||
folder_symbol = '\U0001f4c2' # 📂
|
folder_symbol = '\U0001f4c2' # 📂
|
||||||
refresh_symbol = '\U0001f504' # 🔄
|
refresh_symbol = '\U0001f504' # 🔄
|
||||||
@ -11,13 +15,18 @@ document_symbol = '\U0001F4C4' # 📄
|
|||||||
|
|
||||||
|
|
||||||
def merge_lora(
|
def merge_lora(
|
||||||
lora_a_model, lora_b_model, ratio, save_to, precision, save_precision,
|
lora_a_model,
|
||||||
|
lora_b_model,
|
||||||
|
ratio,
|
||||||
|
save_to,
|
||||||
|
precision,
|
||||||
|
save_precision,
|
||||||
):
|
):
|
||||||
# Check for caption_text_input
|
# Check for caption_text_input
|
||||||
if lora_a_model == '':
|
if lora_a_model == '':
|
||||||
msgbox('Invalid model A file')
|
msgbox('Invalid model A file')
|
||||||
return
|
return
|
||||||
|
|
||||||
if lora_b_model == '':
|
if lora_b_model == '':
|
||||||
msgbox('Invalid model B file')
|
msgbox('Invalid model B file')
|
||||||
return
|
return
|
||||||
@ -26,7 +35,7 @@ def merge_lora(
|
|||||||
if not os.path.isfile(lora_a_model):
|
if not os.path.isfile(lora_a_model):
|
||||||
msgbox('The provided model A is not a file')
|
msgbox('The provided model A is not a file')
|
||||||
return
|
return
|
||||||
|
|
||||||
if not os.path.isfile(lora_b_model):
|
if not os.path.isfile(lora_b_model):
|
||||||
msgbox('The provided model B is not a file')
|
msgbox('The provided model B is not a file')
|
||||||
return
|
return
|
||||||
@ -54,13 +63,11 @@ def merge_lora(
|
|||||||
|
|
||||||
def gradio_merge_lora_tab():
|
def gradio_merge_lora_tab():
|
||||||
with gr.Tab('Merge LoRA'):
|
with gr.Tab('Merge LoRA'):
|
||||||
gr.Markdown(
|
gr.Markdown('This utility can merge two LoRA networks together.')
|
||||||
'This utility can merge two LoRA networks together.'
|
|
||||||
)
|
|
||||||
|
|
||||||
lora_ext = gr.Textbox(value='*.pt *.safetensors', visible=False)
|
lora_ext = gr.Textbox(value='*.pt *.safetensors', visible=False)
|
||||||
lora_ext_name = gr.Textbox(value='LoRA model types', visible=False)
|
lora_ext_name = gr.Textbox(value='LoRA model types', visible=False)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
lora_a_model = gr.Textbox(
|
lora_a_model = gr.Textbox(
|
||||||
label='LoRA model "A"',
|
label='LoRA model "A"',
|
||||||
@ -75,7 +82,7 @@ def gradio_merge_lora_tab():
|
|||||||
inputs=[lora_a_model, lora_ext, lora_ext_name],
|
inputs=[lora_a_model, lora_ext, lora_ext_name],
|
||||||
outputs=lora_a_model,
|
outputs=lora_a_model,
|
||||||
)
|
)
|
||||||
|
|
||||||
lora_b_model = gr.Textbox(
|
lora_b_model = gr.Textbox(
|
||||||
label='LoRA model "B"',
|
label='LoRA model "B"',
|
||||||
placeholder='Path to the LoRA B model',
|
placeholder='Path to the LoRA B model',
|
||||||
@ -90,9 +97,15 @@ def gradio_merge_lora_tab():
|
|||||||
outputs=lora_b_model,
|
outputs=lora_b_model,
|
||||||
)
|
)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
ratio = gr.Slider(label="Merge ratio (eg: 0.7 mean 70% of model A and 30% of model B", minimum=0, maximum=1, step=0.01, value=0.5,
|
ratio = gr.Slider(
|
||||||
interactive=True,)
|
label='Merge ratio (eg: 0.7 mean 70% of model A and 30% of model B',
|
||||||
|
minimum=0,
|
||||||
|
maximum=1,
|
||||||
|
step=0.01,
|
||||||
|
value=0.5,
|
||||||
|
interactive=True,
|
||||||
|
)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
save_to = gr.Textbox(
|
save_to = gr.Textbox(
|
||||||
label='Save to',
|
label='Save to',
|
||||||
@ -103,7 +116,9 @@ def gradio_merge_lora_tab():
|
|||||||
folder_symbol, elem_id='open_folder_small'
|
folder_symbol, elem_id='open_folder_small'
|
||||||
)
|
)
|
||||||
button_save_to.click(
|
button_save_to.click(
|
||||||
get_saveasfilename_path, inputs=[save_to, lora_ext, lora_ext_name], outputs=save_to
|
get_saveasfilename_path,
|
||||||
|
inputs=[save_to, lora_ext, lora_ext_name],
|
||||||
|
outputs=save_to,
|
||||||
)
|
)
|
||||||
precision = gr.Dropdown(
|
precision = gr.Dropdown(
|
||||||
label='Merge precison',
|
label='Merge precison',
|
||||||
@ -122,6 +137,12 @@ def gradio_merge_lora_tab():
|
|||||||
|
|
||||||
convert_button.click(
|
convert_button.click(
|
||||||
merge_lora,
|
merge_lora,
|
||||||
inputs=[lora_a_model, lora_b_model, ratio, save_to, precision, save_precision,
|
inputs=[
|
||||||
|
lora_a_model,
|
||||||
|
lora_b_model,
|
||||||
|
ratio,
|
||||||
|
save_to,
|
||||||
|
precision,
|
||||||
|
save_precision,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -1163,15 +1163,14 @@ def make_bucket_resolutions(max_reso, min_size=256, max_size=1024, divisible=64)
|
|||||||
|
|
||||||
resos = list(resos)
|
resos = list(resos)
|
||||||
resos.sort()
|
resos.sort()
|
||||||
|
return resos
|
||||||
aspect_ratios = [w / h for w, h in resos]
|
|
||||||
return resos, aspect_ratios
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
resos, aspect_ratios = make_bucket_resolutions((512, 768))
|
resos = make_bucket_resolutions((512, 768))
|
||||||
print(len(resos))
|
print(len(resos))
|
||||||
print(resos)
|
print(resos)
|
||||||
|
aspect_ratios = [w / h for w, h in resos]
|
||||||
print(aspect_ratios)
|
print(aspect_ratios)
|
||||||
|
|
||||||
ars = set()
|
ars = set()
|
||||||
|
@ -11,7 +11,11 @@ document_symbol = '\U0001F4C4' # 📄
|
|||||||
|
|
||||||
|
|
||||||
def resize_lora(
|
def resize_lora(
|
||||||
model, new_rank, save_to, save_precision, device,
|
model,
|
||||||
|
new_rank,
|
||||||
|
save_to,
|
||||||
|
save_precision,
|
||||||
|
device,
|
||||||
):
|
):
|
||||||
# Check for caption_text_input
|
# Check for caption_text_input
|
||||||
if model == '':
|
if model == '':
|
||||||
@ -22,7 +26,7 @@ def resize_lora(
|
|||||||
if not os.path.isfile(model):
|
if not os.path.isfile(model):
|
||||||
msgbox('The provided model is not a file')
|
msgbox('The provided model is not a file')
|
||||||
return
|
return
|
||||||
|
|
||||||
if device == '':
|
if device == '':
|
||||||
device = 'cuda'
|
device = 'cuda'
|
||||||
|
|
||||||
@ -46,13 +50,11 @@ def resize_lora(
|
|||||||
|
|
||||||
def gradio_resize_lora_tab():
|
def gradio_resize_lora_tab():
|
||||||
with gr.Tab('Resize LoRA'):
|
with gr.Tab('Resize LoRA'):
|
||||||
gr.Markdown(
|
gr.Markdown('This utility can resize a LoRA.')
|
||||||
'This utility can resize a LoRA.'
|
|
||||||
)
|
|
||||||
|
|
||||||
lora_ext = gr.Textbox(value='*.pt *.safetensors', visible=False)
|
lora_ext = gr.Textbox(value='*.pt *.safetensors', visible=False)
|
||||||
lora_ext_name = gr.Textbox(value='LoRA model types', visible=False)
|
lora_ext_name = gr.Textbox(value='LoRA model types', visible=False)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
model = gr.Textbox(
|
model = gr.Textbox(
|
||||||
label='Source LoRA',
|
label='Source LoRA',
|
||||||
@ -68,9 +70,15 @@ def gradio_resize_lora_tab():
|
|||||||
outputs=model,
|
outputs=model,
|
||||||
)
|
)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
new_rank = gr.Slider(label="Desired LoRA rank", minimum=1, maximum=1024, step=1, value=4,
|
new_rank = gr.Slider(
|
||||||
interactive=True,)
|
label='Desired LoRA rank',
|
||||||
|
minimum=1,
|
||||||
|
maximum=1024,
|
||||||
|
step=1,
|
||||||
|
value=4,
|
||||||
|
interactive=True,
|
||||||
|
)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
save_to = gr.Textbox(
|
save_to = gr.Textbox(
|
||||||
label='Save to',
|
label='Save to',
|
||||||
@ -81,7 +89,9 @@ def gradio_resize_lora_tab():
|
|||||||
folder_symbol, elem_id='open_folder_small'
|
folder_symbol, elem_id='open_folder_small'
|
||||||
)
|
)
|
||||||
button_save_to.click(
|
button_save_to.click(
|
||||||
get_saveasfilename_path, inputs=[save_to, lora_ext, lora_ext_name], outputs=save_to
|
get_saveasfilename_path,
|
||||||
|
inputs=[save_to, lora_ext, lora_ext_name],
|
||||||
|
outputs=save_to,
|
||||||
)
|
)
|
||||||
save_precision = gr.Dropdown(
|
save_precision = gr.Dropdown(
|
||||||
label='Save precison',
|
label='Save precison',
|
||||||
@ -99,6 +109,11 @@ def gradio_resize_lora_tab():
|
|||||||
|
|
||||||
convert_button.click(
|
convert_button.click(
|
||||||
resize_lora,
|
resize_lora,
|
||||||
inputs=[model, new_rank, save_to, save_precision, device,
|
inputs=[
|
||||||
|
model,
|
||||||
|
new_rank,
|
||||||
|
save_to,
|
||||||
|
save_precision,
|
||||||
|
device,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -4,7 +4,7 @@ import argparse
|
|||||||
import json
|
import json
|
||||||
import shutil
|
import shutil
|
||||||
import time
|
import time
|
||||||
from typing import NamedTuple
|
from typing import Dict, List, NamedTuple, Tuple
|
||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
from torch.autograd.function import Function
|
from torch.autograd.function import Function
|
||||||
import glob
|
import glob
|
||||||
@ -55,16 +55,142 @@ class ImageInfo():
|
|||||||
self.caption: str = caption
|
self.caption: str = caption
|
||||||
self.is_reg: bool = is_reg
|
self.is_reg: bool = is_reg
|
||||||
self.absolute_path: str = absolute_path
|
self.absolute_path: str = absolute_path
|
||||||
self.image_size: tuple[int, int] = None
|
self.image_size: Tuple[int, int] = None
|
||||||
self.bucket_reso: tuple[int, int] = None
|
self.resized_size: Tuple[int, int] = None
|
||||||
|
self.bucket_reso: Tuple[int, int] = None
|
||||||
self.latents: torch.Tensor = None
|
self.latents: torch.Tensor = None
|
||||||
self.latents_flipped: torch.Tensor = None
|
self.latents_flipped: torch.Tensor = None
|
||||||
self.latents_npz: str = None
|
self.latents_npz: str = None
|
||||||
self.latents_npz_flipped: str = None
|
self.latents_npz_flipped: str = None
|
||||||
|
|
||||||
|
|
||||||
|
class BucketManager():
|
||||||
|
def __init__(self, no_upscale, max_reso, min_size, max_size, reso_steps) -> None:
|
||||||
|
self.no_upscale = no_upscale
|
||||||
|
if max_reso is None:
|
||||||
|
self.max_reso = None
|
||||||
|
self.max_area = None
|
||||||
|
else:
|
||||||
|
self.max_reso = max_reso
|
||||||
|
self.max_area = max_reso[0] * max_reso[1]
|
||||||
|
self.min_size = min_size
|
||||||
|
self.max_size = max_size
|
||||||
|
self.reso_steps = reso_steps
|
||||||
|
|
||||||
|
self.resos = []
|
||||||
|
self.reso_to_id = {}
|
||||||
|
self.buckets = [] # 前処理時は (image_key, image)、学習時は image_key
|
||||||
|
|
||||||
|
def add_image(self, reso, image):
|
||||||
|
bucket_id = self.reso_to_id[reso]
|
||||||
|
self.buckets[bucket_id].append(image)
|
||||||
|
|
||||||
|
def shuffle(self):
|
||||||
|
for bucket in self.buckets:
|
||||||
|
random.shuffle(bucket)
|
||||||
|
|
||||||
|
def sort(self):
|
||||||
|
# 解像度順にソートする(表示時、メタデータ格納時の見栄えをよくするためだけ)。bucketsも入れ替えてreso_to_idも振り直す
|
||||||
|
sorted_resos = self.resos.copy()
|
||||||
|
sorted_resos.sort()
|
||||||
|
|
||||||
|
sorted_buckets = []
|
||||||
|
sorted_reso_to_id = {}
|
||||||
|
for i, reso in enumerate(sorted_resos):
|
||||||
|
bucket_id = self.reso_to_id[reso]
|
||||||
|
sorted_buckets.append(self.buckets[bucket_id])
|
||||||
|
sorted_reso_to_id[reso] = i
|
||||||
|
|
||||||
|
self.resos = sorted_resos
|
||||||
|
self.buckets = sorted_buckets
|
||||||
|
self.reso_to_id = sorted_reso_to_id
|
||||||
|
|
||||||
|
def make_buckets(self):
|
||||||
|
resos = model_util.make_bucket_resolutions(self.max_reso, self.min_size, self.max_size, self.reso_steps)
|
||||||
|
self.set_predefined_resos(resos)
|
||||||
|
|
||||||
|
def set_predefined_resos(self, resos):
|
||||||
|
# 規定サイズから選ぶ場合の解像度、aspect ratioの情報を格納しておく
|
||||||
|
self.predefined_resos = resos.copy()
|
||||||
|
self.predefined_resos_set = set(resos)
|
||||||
|
self.predifined_aspect_ratios = np.array([w / h for w, h in resos])
|
||||||
|
|
||||||
|
def add_if_new_reso(self, reso):
|
||||||
|
if reso not in self.reso_to_id:
|
||||||
|
bucket_id = len(self.resos)
|
||||||
|
self.reso_to_id[reso] = bucket_id
|
||||||
|
self.resos.append(reso)
|
||||||
|
self.buckets.append([])
|
||||||
|
# print(reso, bucket_id, len(self.buckets))
|
||||||
|
|
||||||
|
def round_to_steps(self, x):
|
||||||
|
x = int(x + .5)
|
||||||
|
return x - x % self.reso_steps
|
||||||
|
|
||||||
|
def select_bucket(self, image_width, image_height):
|
||||||
|
aspect_ratio = image_width / image_height
|
||||||
|
if not self.no_upscale:
|
||||||
|
# 同じaspect ratioがあるかもしれないので(fine tuningで、no_upscale=Trueで前処理した場合)、解像度が同じものを優先する
|
||||||
|
reso = (image_width, image_height)
|
||||||
|
if reso in self.predefined_resos_set:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
ar_errors = self.predifined_aspect_ratios - aspect_ratio
|
||||||
|
predefined_bucket_id = np.abs(ar_errors).argmin() # 当該解像度以外でaspect ratio errorが最も少ないもの
|
||||||
|
reso = self.predefined_resos[predefined_bucket_id]
|
||||||
|
|
||||||
|
ar_reso = reso[0] / reso[1]
|
||||||
|
if aspect_ratio > ar_reso: # 横が長い→縦を合わせる
|
||||||
|
scale = reso[1] / image_height
|
||||||
|
else:
|
||||||
|
scale = reso[0] / image_width
|
||||||
|
|
||||||
|
resized_size = (int(image_width * scale + .5), int(image_height * scale + .5))
|
||||||
|
# print("use predef", image_width, image_height, reso, resized_size)
|
||||||
|
else:
|
||||||
|
if image_width * image_height > self.max_area:
|
||||||
|
# 画像が大きすぎるのでアスペクト比を保ったまま縮小することを前提にbucketを決める
|
||||||
|
resized_width = math.sqrt(self.max_area * aspect_ratio)
|
||||||
|
resized_height = self.max_area / resized_width
|
||||||
|
assert abs(resized_width / resized_height - aspect_ratio) < 1e-2, "aspect is illegal"
|
||||||
|
|
||||||
|
# リサイズ後の短辺または長辺をreso_steps単位にする:aspect ratioの差が少ないほうを選ぶ
|
||||||
|
# 元のbucketingと同じロジック
|
||||||
|
b_width_rounded = self.round_to_steps(resized_width)
|
||||||
|
b_height_in_wr = self.round_to_steps(b_width_rounded / aspect_ratio)
|
||||||
|
ar_width_rounded = b_width_rounded / b_height_in_wr
|
||||||
|
|
||||||
|
b_height_rounded = self.round_to_steps(resized_height)
|
||||||
|
b_width_in_hr = self.round_to_steps(b_height_rounded * aspect_ratio)
|
||||||
|
ar_height_rounded = b_width_in_hr / b_height_rounded
|
||||||
|
|
||||||
|
# print(b_width_rounded, b_height_in_wr, ar_width_rounded)
|
||||||
|
# print(b_width_in_hr, b_height_rounded, ar_height_rounded)
|
||||||
|
|
||||||
|
if abs(ar_width_rounded - aspect_ratio) < abs(ar_height_rounded - aspect_ratio):
|
||||||
|
resized_size = (b_width_rounded, int(b_width_rounded / aspect_ratio + .5))
|
||||||
|
else:
|
||||||
|
resized_size = (int(b_height_rounded * aspect_ratio + .5), b_height_rounded)
|
||||||
|
# print(resized_size)
|
||||||
|
else:
|
||||||
|
resized_size = (image_width, image_height) # リサイズは不要
|
||||||
|
|
||||||
|
# 画像のサイズ未満をbucketのサイズとする(paddingせずにcroppingする)
|
||||||
|
bucket_width = resized_size[0] - resized_size[0] % self.reso_steps
|
||||||
|
bucket_height = resized_size[1] - resized_size[1] % self.reso_steps
|
||||||
|
# print("use arbitrary", image_width, image_height, resized_size, bucket_width, bucket_height)
|
||||||
|
|
||||||
|
reso = (bucket_width, bucket_height)
|
||||||
|
|
||||||
|
self.add_if_new_reso(reso)
|
||||||
|
|
||||||
|
ar_error = (reso[0] / reso[1]) - aspect_ratio
|
||||||
|
return reso, resized_size, ar_error
|
||||||
|
|
||||||
|
|
||||||
class BucketBatchIndex(NamedTuple):
|
class BucketBatchIndex(NamedTuple):
|
||||||
bucket_index: int
|
bucket_index: int
|
||||||
|
bucket_batch_size: int
|
||||||
batch_index: int
|
batch_index: int
|
||||||
|
|
||||||
|
|
||||||
@ -85,11 +211,15 @@ class BaseDataset(torch.utils.data.Dataset):
|
|||||||
self.token_padding_disabled = False
|
self.token_padding_disabled = False
|
||||||
self.dataset_dirs_info = {}
|
self.dataset_dirs_info = {}
|
||||||
self.reg_dataset_dirs_info = {}
|
self.reg_dataset_dirs_info = {}
|
||||||
|
self.tag_frequency = {}
|
||||||
|
|
||||||
self.enable_bucket = False
|
self.enable_bucket = False
|
||||||
|
self.bucket_manager: BucketManager = None # not initialized
|
||||||
self.min_bucket_reso = None
|
self.min_bucket_reso = None
|
||||||
self.max_bucket_reso = None
|
self.max_bucket_reso = None
|
||||||
self.tag_frequency = {}
|
self.bucket_reso_steps = None
|
||||||
self.bucket_info = None
|
self.bucket_no_upscale = None
|
||||||
|
self.bucket_info = None # for metadata
|
||||||
|
|
||||||
self.tokenizer_max_length = self.tokenizer.model_max_length if max_token_length is None else max_token_length + 2
|
self.tokenizer_max_length = self.tokenizer.model_max_length if max_token_length is None else max_token_length + 2
|
||||||
|
|
||||||
@ -113,7 +243,7 @@ class BaseDataset(torch.utils.data.Dataset):
|
|||||||
|
|
||||||
self.image_transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5], [0.5]), ])
|
self.image_transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5], [0.5]), ])
|
||||||
|
|
||||||
self.image_data: dict[str, ImageInfo] = {}
|
self.image_data: Dict[str, ImageInfo] = {}
|
||||||
|
|
||||||
self.replacements = {}
|
self.replacements = {}
|
||||||
|
|
||||||
@ -215,66 +345,72 @@ class BaseDataset(torch.utils.data.Dataset):
|
|||||||
else:
|
else:
|
||||||
print("prepare dataset")
|
print("prepare dataset")
|
||||||
|
|
||||||
bucket_resos = self.bucket_resos
|
# bucketを作成し、画像をbucketに振り分ける
|
||||||
bucket_aspect_ratios = np.array(self.bucket_aspect_ratios)
|
|
||||||
|
|
||||||
# bucketを作成する
|
|
||||||
if self.enable_bucket:
|
if self.enable_bucket:
|
||||||
|
if self.bucket_manager is None: # fine tuningの場合でmetadataに定義がある場合は、すでに初期化済み
|
||||||
|
self.bucket_manager = BucketManager(self.bucket_no_upscale, (self.width, self.height),
|
||||||
|
self.min_bucket_reso, self.max_bucket_reso, self.bucket_reso_steps)
|
||||||
|
if not self.bucket_no_upscale:
|
||||||
|
self.bucket_manager.make_buckets()
|
||||||
|
else:
|
||||||
|
print("min_bucket_reso and max_bucket_reso are ignored if bucket_no_upscale is set, because bucket reso is defined by image size automatically / bucket_no_upscaleが指定された場合は、bucketの解像度は画像サイズから自動計算されるため、min_bucket_resoとmax_bucket_resoは無視されます")
|
||||||
|
|
||||||
img_ar_errors = []
|
img_ar_errors = []
|
||||||
for image_info in self.image_data.values():
|
for image_info in self.image_data.values():
|
||||||
# bucketを決める
|
|
||||||
image_width, image_height = image_info.image_size
|
image_width, image_height = image_info.image_size
|
||||||
aspect_ratio = image_width / image_height
|
image_info.bucket_reso, image_info.resized_size, ar_error = self.bucket_manager.select_bucket(image_width, image_height)
|
||||||
ar_errors = bucket_aspect_ratios - aspect_ratio
|
|
||||||
|
|
||||||
bucket_id = np.abs(ar_errors).argmin()
|
# print(image_info.image_key, image_info.bucket_reso)
|
||||||
image_info.bucket_reso = bucket_resos[bucket_id]
|
img_ar_errors.append(abs(ar_error))
|
||||||
|
|
||||||
ar_error = ar_errors[bucket_id]
|
self.bucket_manager.sort()
|
||||||
img_ar_errors.append(ar_error)
|
|
||||||
else:
|
else:
|
||||||
|
self.bucket_manager = BucketManager(False, (self.width, self.height), None, None, None)
|
||||||
|
self.bucket_manager.set_predefined_resos([(self.width, self.height)]) # ひとつの固定サイズbucketのみ
|
||||||
for image_info in self.image_data.values():
|
for image_info in self.image_data.values():
|
||||||
image_info.bucket_reso = bucket_resos[0] # bucket_resos contains (width, height) only
|
image_width, image_height = image_info.image_size
|
||||||
|
image_info.bucket_reso, image_info.resized_size, _ = self.bucket_manager.select_bucket(image_width, image_height)
|
||||||
# 画像をbucketに分割する
|
|
||||||
self.buckets: list[str] = [[] for _ in range(len(bucket_resos))]
|
|
||||||
reso_to_index = {}
|
|
||||||
for i, reso in enumerate(bucket_resos):
|
|
||||||
reso_to_index[reso] = i
|
|
||||||
|
|
||||||
for image_info in self.image_data.values():
|
for image_info in self.image_data.values():
|
||||||
bucket_index = reso_to_index[image_info.bucket_reso]
|
|
||||||
for _ in range(image_info.num_repeats):
|
for _ in range(image_info.num_repeats):
|
||||||
self.buckets[bucket_index].append(image_info.image_key)
|
self.bucket_manager.add_image(image_info.bucket_reso, image_info.image_key)
|
||||||
|
|
||||||
|
# bucket情報を表示、格納する
|
||||||
if self.enable_bucket:
|
if self.enable_bucket:
|
||||||
self.bucket_info = {"buckets": {}}
|
self.bucket_info = {"buckets": {}}
|
||||||
print("number of images (including repeats) / 各bucketの画像枚数(繰り返し回数を含む)")
|
print("number of images (including repeats) / 各bucketの画像枚数(繰り返し回数を含む)")
|
||||||
for i, (reso, img_keys) in enumerate(zip(bucket_resos, self.buckets)):
|
for i, (reso, bucket) in enumerate(zip(self.bucket_manager.resos, self.bucket_manager.buckets)):
|
||||||
self.bucket_info["buckets"][i] = {"resolution": reso, "count": len(img_keys)}
|
count = len(bucket)
|
||||||
# only show bucket info if there is an actual image in it
|
if count > 0:
|
||||||
if len(img_keys) > 0:
|
self.bucket_info["buckets"][i] = {"resolution": reso, "count": len(bucket)}
|
||||||
print(f"bucket {i}: resolution {reso}, count: {len(img_keys)}")
|
print(f"bucket {i}: resolution {reso}, count: {len(bucket)}")
|
||||||
|
|
||||||
img_ar_errors = np.array(img_ar_errors)
|
img_ar_errors = np.array(img_ar_errors)
|
||||||
mean_img_ar_error = np.mean(np.abs(img_ar_errors))
|
mean_img_ar_error = np.mean(np.abs(img_ar_errors))
|
||||||
self.bucket_info["mean_img_ar_error"] = mean_img_ar_error
|
self.bucket_info["mean_img_ar_error"] = mean_img_ar_error
|
||||||
print(f"mean ar error (without repeats): {mean_img_ar_error}")
|
print(f"mean ar error (without repeats): {mean_img_ar_error}")
|
||||||
|
|
||||||
# 参照用indexを作る
|
# データ参照用indexを作る。このindexはdatasetのshuffleに用いられる
|
||||||
self.buckets_indices: list(BucketBatchIndex) = []
|
self.buckets_indices: List(BucketBatchIndex) = []
|
||||||
for bucket_index, bucket in enumerate(self.buckets):
|
for bucket_index, bucket in enumerate(self.bucket_manager.buckets):
|
||||||
batch_count = int(math.ceil(len(bucket) / self.batch_size))
|
# bucketが細分化されることにより、ひとつのbucketに一種類の画像のみというケースが増え、つまりそれは
|
||||||
|
# ひとつのbatchが同じ画像で占められることになるので、さすがに良くないであろう
|
||||||
|
# そのためバッチサイズを画像種類までに制限する
|
||||||
|
# ただそれでも同一画像が同一バッチに含まれる可能性はあるので、繰り返し回数が少ないほうがshuffleの品質は良くなることは間違いない?
|
||||||
|
# TODO 正則化画像をepochまたがりで利用する仕組み
|
||||||
|
num_of_image_types = len(set(bucket))
|
||||||
|
bucket_batch_size = min(self.batch_size, num_of_image_types)
|
||||||
|
batch_count = int(math.ceil(len(bucket) / bucket_batch_size))
|
||||||
|
# print(bucket_index, num_of_image_types, bucket_batch_size, batch_count)
|
||||||
for batch_index in range(batch_count):
|
for batch_index in range(batch_count):
|
||||||
self.buckets_indices.append(BucketBatchIndex(bucket_index, batch_index))
|
self.buckets_indices.append(BucketBatchIndex(bucket_index, bucket_batch_size, batch_index))
|
||||||
|
|
||||||
self.shuffle_buckets()
|
self.shuffle_buckets()
|
||||||
self._length = len(self.buckets_indices)
|
self._length = len(self.buckets_indices)
|
||||||
|
|
||||||
def shuffle_buckets(self):
|
def shuffle_buckets(self):
|
||||||
random.shuffle(self.buckets_indices)
|
random.shuffle(self.buckets_indices)
|
||||||
for bucket in self.buckets:
|
self.bucket_manager.shuffle()
|
||||||
random.shuffle(bucket)
|
|
||||||
|
|
||||||
def load_image(self, image_path):
|
def load_image(self, image_path):
|
||||||
image = Image.open(image_path)
|
image = Image.open(image_path)
|
||||||
@ -283,28 +419,30 @@ class BaseDataset(torch.utils.data.Dataset):
|
|||||||
img = np.array(image, np.uint8)
|
img = np.array(image, np.uint8)
|
||||||
return img
|
return img
|
||||||
|
|
||||||
def resize_and_trim(self, image, reso):
|
def trim_and_resize_if_required(self, image, reso, resized_size):
|
||||||
image_height, image_width = image.shape[0:2]
|
image_height, image_width = image.shape[0:2]
|
||||||
ar_img = image_width / image_height
|
|
||||||
ar_reso = reso[0] / reso[1]
|
|
||||||
if ar_img > ar_reso: # 横が長い→縦を合わせる
|
|
||||||
scale = reso[1] / image_height
|
|
||||||
else:
|
|
||||||
scale = reso[0] / image_width
|
|
||||||
resized_size = (int(image_width * scale + .5), int(image_height * scale + .5))
|
|
||||||
|
|
||||||
image = cv2.resize(image, resized_size, interpolation=cv2.INTER_AREA) # INTER_AREAでやりたいのでcv2でリサイズ
|
if image_width != resized_size[0] or image_height != resized_size[1]:
|
||||||
if resized_size[0] > reso[0]:
|
# リサイズする
|
||||||
trim_size = resized_size[0] - reso[0]
|
image = cv2.resize(image, resized_size, interpolation=cv2.INTER_AREA) # INTER_AREAでやりたいのでcv2でリサイズ
|
||||||
image = image[:, trim_size//2:trim_size//2 + reso[0]]
|
|
||||||
elif resized_size[1] > reso[1]:
|
image_height, image_width = image.shape[0:2]
|
||||||
trim_size = resized_size[1] - reso[1]
|
if image_width > reso[0]:
|
||||||
image = image[trim_size//2:trim_size//2 + reso[1]]
|
trim_size = image_width - reso[0]
|
||||||
assert image.shape[0] == reso[1] and image.shape[1] == reso[0], \
|
p = trim_size // 2 if not self.random_crop else random.randint(0, trim_size)
|
||||||
f"internal error, illegal trimmed size: {image.shape}, {reso}"
|
# print("w", trim_size, p)
|
||||||
|
image = image[:, p:p + reso[0]]
|
||||||
|
if image_height > reso[1]:
|
||||||
|
trim_size = image_height - reso[1]
|
||||||
|
p = trim_size // 2 if not self.random_crop else random.randint(0, trim_size)
|
||||||
|
# print("h", trim_size, p)
|
||||||
|
image = image[p:p + reso[1]]
|
||||||
|
|
||||||
|
assert image.shape[0] == reso[1] and image.shape[1] == reso[0], f"internal error, illegal trimmed size: {image.shape}, {reso}"
|
||||||
return image
|
return image
|
||||||
|
|
||||||
def cache_latents(self, vae):
|
def cache_latents(self, vae):
|
||||||
|
# TODO ここを高速化したい
|
||||||
print("caching latents.")
|
print("caching latents.")
|
||||||
for info in tqdm(self.image_data.values()):
|
for info in tqdm(self.image_data.values()):
|
||||||
if info.latents_npz is not None:
|
if info.latents_npz is not None:
|
||||||
@ -316,7 +454,7 @@ class BaseDataset(torch.utils.data.Dataset):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
image = self.load_image(info.absolute_path)
|
image = self.load_image(info.absolute_path)
|
||||||
image = self.resize_and_trim(image, info.bucket_reso)
|
image = self.trim_and_resize_if_required(image, info.bucket_reso, info.resized_size)
|
||||||
|
|
||||||
img_tensor = self.image_transforms(image)
|
img_tensor = self.image_transforms(image)
|
||||||
img_tensor = img_tensor.unsqueeze(0).to(device=vae.device, dtype=vae.dtype)
|
img_tensor = img_tensor.unsqueeze(0).to(device=vae.device, dtype=vae.dtype)
|
||||||
@ -406,8 +544,9 @@ class BaseDataset(torch.utils.data.Dataset):
|
|||||||
if index == 0:
|
if index == 0:
|
||||||
self.shuffle_buckets()
|
self.shuffle_buckets()
|
||||||
|
|
||||||
bucket = self.buckets[self.buckets_indices[index].bucket_index]
|
bucket = self.bucket_manager.buckets[self.buckets_indices[index].bucket_index]
|
||||||
image_index = self.buckets_indices[index].batch_index * self.batch_size
|
bucket_batch_size = self.buckets_indices[index].bucket_batch_size
|
||||||
|
image_index = self.buckets_indices[index].batch_index * bucket_batch_size
|
||||||
|
|
||||||
loss_weights = []
|
loss_weights = []
|
||||||
captions = []
|
captions = []
|
||||||
@ -415,7 +554,7 @@ class BaseDataset(torch.utils.data.Dataset):
|
|||||||
latents_list = []
|
latents_list = []
|
||||||
images = []
|
images = []
|
||||||
|
|
||||||
for image_key in bucket[image_index:image_index + self.batch_size]:
|
for image_key in bucket[image_index:image_index + bucket_batch_size]:
|
||||||
image_info = self.image_data[image_key]
|
image_info = self.image_data[image_key]
|
||||||
loss_weights.append(self.prior_loss_weight if image_info.is_reg else 1.0)
|
loss_weights.append(self.prior_loss_weight if image_info.is_reg else 1.0)
|
||||||
|
|
||||||
@ -433,7 +572,7 @@ class BaseDataset(torch.utils.data.Dataset):
|
|||||||
im_h, im_w = img.shape[0:2]
|
im_h, im_w = img.shape[0:2]
|
||||||
|
|
||||||
if self.enable_bucket:
|
if self.enable_bucket:
|
||||||
img = self.resize_and_trim(img, image_info.bucket_reso)
|
img = self.trim_and_resize_if_required(img, image_info.bucket_reso, image_info.resized_size)
|
||||||
else:
|
else:
|
||||||
if face_cx > 0: # 顔位置情報あり
|
if face_cx > 0: # 顔位置情報あり
|
||||||
img = self.crop_target(img, face_cx, face_cy, face_w, face_h)
|
img = self.crop_target(img, face_cx, face_cy, face_w, face_h)
|
||||||
@ -490,7 +629,7 @@ class BaseDataset(torch.utils.data.Dataset):
|
|||||||
|
|
||||||
|
|
||||||
class DreamBoothDataset(BaseDataset):
|
class DreamBoothDataset(BaseDataset):
|
||||||
def __init__(self, batch_size, train_data_dir, reg_data_dir, tokenizer, max_token_length, caption_extension, shuffle_caption, shuffle_keep_tokens, resolution, enable_bucket, min_bucket_reso, max_bucket_reso, prior_loss_weight, flip_aug, color_aug, face_crop_aug_range, random_crop, debug_dataset) -> None:
|
def __init__(self, batch_size, train_data_dir, reg_data_dir, tokenizer, max_token_length, caption_extension, shuffle_caption, shuffle_keep_tokens, resolution, enable_bucket, min_bucket_reso, max_bucket_reso, bucket_reso_steps, bucket_no_upscale, prior_loss_weight, flip_aug, color_aug, face_crop_aug_range, random_crop, debug_dataset) -> None:
|
||||||
super().__init__(tokenizer, max_token_length, shuffle_caption, shuffle_keep_tokens,
|
super().__init__(tokenizer, max_token_length, shuffle_caption, shuffle_keep_tokens,
|
||||||
resolution, flip_aug, color_aug, face_crop_aug_range, random_crop, debug_dataset)
|
resolution, flip_aug, color_aug, face_crop_aug_range, random_crop, debug_dataset)
|
||||||
|
|
||||||
@ -505,13 +644,15 @@ class DreamBoothDataset(BaseDataset):
|
|||||||
if self.enable_bucket:
|
if self.enable_bucket:
|
||||||
assert min(resolution) >= min_bucket_reso, f"min_bucket_reso must be equal or less than resolution / min_bucket_resoは最小解像度より大きくできません。解像度を大きくするかmin_bucket_resoを小さくしてください"
|
assert min(resolution) >= min_bucket_reso, f"min_bucket_reso must be equal or less than resolution / min_bucket_resoは最小解像度より大きくできません。解像度を大きくするかmin_bucket_resoを小さくしてください"
|
||||||
assert max(resolution) <= max_bucket_reso, f"max_bucket_reso must be equal or greater than resolution / max_bucket_resoは最大解像度より小さくできません。解像度を小さくするかmin_bucket_resoを大きくしてください"
|
assert max(resolution) <= max_bucket_reso, f"max_bucket_reso must be equal or greater than resolution / max_bucket_resoは最大解像度より小さくできません。解像度を小さくするかmin_bucket_resoを大きくしてください"
|
||||||
self.bucket_resos, self.bucket_aspect_ratios = model_util.make_bucket_resolutions(
|
|
||||||
(self.width, self.height), min_bucket_reso, max_bucket_reso)
|
|
||||||
self.min_bucket_reso = min_bucket_reso
|
self.min_bucket_reso = min_bucket_reso
|
||||||
self.max_bucket_reso = max_bucket_reso
|
self.max_bucket_reso = max_bucket_reso
|
||||||
|
self.bucket_reso_steps = bucket_reso_steps
|
||||||
|
self.bucket_no_upscale = bucket_no_upscale
|
||||||
else:
|
else:
|
||||||
self.bucket_resos = [(self.width, self.height)]
|
self.min_bucket_reso = None
|
||||||
self.bucket_aspect_ratios = [self.width / self.height]
|
self.max_bucket_reso = None
|
||||||
|
self.bucket_reso_steps = None # この情報は使われない
|
||||||
|
self.bucket_no_upscale = False
|
||||||
|
|
||||||
def read_caption(img_path):
|
def read_caption(img_path):
|
||||||
# captionの候補ファイル名を作る
|
# captionの候補ファイル名を作る
|
||||||
@ -582,7 +723,7 @@ class DreamBoothDataset(BaseDataset):
|
|||||||
num_reg_images = 0
|
num_reg_images = 0
|
||||||
if reg_data_dir:
|
if reg_data_dir:
|
||||||
print("prepare reg images.")
|
print("prepare reg images.")
|
||||||
reg_infos: list[ImageInfo] = []
|
reg_infos: List[ImageInfo] = []
|
||||||
|
|
||||||
reg_dirs = os.listdir(reg_data_dir)
|
reg_dirs = os.listdir(reg_data_dir)
|
||||||
for dir in reg_dirs:
|
for dir in reg_dirs:
|
||||||
@ -621,7 +762,7 @@ class DreamBoothDataset(BaseDataset):
|
|||||||
|
|
||||||
|
|
||||||
class FineTuningDataset(BaseDataset):
|
class FineTuningDataset(BaseDataset):
|
||||||
def __init__(self, json_file_name, batch_size, train_data_dir, tokenizer, max_token_length, shuffle_caption, shuffle_keep_tokens, resolution, enable_bucket, min_bucket_reso, max_bucket_reso, flip_aug, color_aug, face_crop_aug_range, random_crop, dataset_repeats, debug_dataset) -> None:
|
def __init__(self, json_file_name, batch_size, train_data_dir, tokenizer, max_token_length, shuffle_caption, shuffle_keep_tokens, resolution, enable_bucket, min_bucket_reso, max_bucket_reso, bucket_reso_steps, bucket_no_upscale, flip_aug, color_aug, face_crop_aug_range, random_crop, dataset_repeats, debug_dataset) -> None:
|
||||||
super().__init__(tokenizer, max_token_length, shuffle_caption, shuffle_keep_tokens,
|
super().__init__(tokenizer, max_token_length, shuffle_caption, shuffle_keep_tokens,
|
||||||
resolution, flip_aug, color_aug, face_crop_aug_range, random_crop, debug_dataset)
|
resolution, flip_aug, color_aug, face_crop_aug_range, random_crop, debug_dataset)
|
||||||
|
|
||||||
@ -660,7 +801,7 @@ class FineTuningDataset(BaseDataset):
|
|||||||
image_info = ImageInfo(image_key, dataset_repeats, caption, False, abs_path)
|
image_info = ImageInfo(image_key, dataset_repeats, caption, False, abs_path)
|
||||||
image_info.image_size = img_md.get('train_resolution')
|
image_info.image_size = img_md.get('train_resolution')
|
||||||
|
|
||||||
if not self.color_aug:
|
if not self.color_aug and not self.random_crop:
|
||||||
# if npz exists, use them
|
# if npz exists, use them
|
||||||
image_info.latents_npz, image_info.latents_npz_flipped = self.image_key_to_npz_file(image_key)
|
image_info.latents_npz, image_info.latents_npz_flipped = self.image_key_to_npz_file(image_key)
|
||||||
|
|
||||||
@ -672,7 +813,8 @@ class FineTuningDataset(BaseDataset):
|
|||||||
self.dataset_dirs_info[os.path.basename(json_file_name)] = {"n_repeats": dataset_repeats, "img_count": len(metadata)}
|
self.dataset_dirs_info[os.path.basename(json_file_name)] = {"n_repeats": dataset_repeats, "img_count": len(metadata)}
|
||||||
|
|
||||||
# check existence of all npz files
|
# check existence of all npz files
|
||||||
if not self.color_aug:
|
use_npz_latents = not (self.color_aug or self.random_crop)
|
||||||
|
if use_npz_latents:
|
||||||
npz_any = False
|
npz_any = False
|
||||||
npz_all = True
|
npz_all = True
|
||||||
for image_info in self.image_data.values():
|
for image_info in self.image_data.values():
|
||||||
@ -687,13 +829,15 @@ class FineTuningDataset(BaseDataset):
|
|||||||
break
|
break
|
||||||
|
|
||||||
if not npz_any:
|
if not npz_any:
|
||||||
print(f"npz file does not exist. make latents with VAE / npzファイルが見つからないためVAEを使ってlatentsを取得します")
|
use_npz_latents = False
|
||||||
|
print(f"npz file does not exist. ignore npz files / npzファイルが見つからないためnpzファイルを無視します")
|
||||||
elif not npz_all:
|
elif not npz_all:
|
||||||
|
use_npz_latents = False
|
||||||
print(f"some of npz file does not exist. ignore npz files / いくつかのnpzファイルが見つからないためnpzファイルを無視します")
|
print(f"some of npz file does not exist. ignore npz files / いくつかのnpzファイルが見つからないためnpzファイルを無視します")
|
||||||
if self.flip_aug:
|
if self.flip_aug:
|
||||||
print("maybe no flipped files / 反転されたnpzファイルがないのかもしれません")
|
print("maybe no flipped files / 反転されたnpzファイルがないのかもしれません")
|
||||||
for image_info in self.image_data.values():
|
# else:
|
||||||
image_info.latents_npz = image_info.latents_npz_flipped = None
|
# print("npz files are not used with color_aug and/or random_crop / color_augまたはrandom_cropが指定されているためnpzファイルは使用されません")
|
||||||
|
|
||||||
# check min/max bucket size
|
# check min/max bucket size
|
||||||
sizes = set()
|
sizes = set()
|
||||||
@ -707,30 +851,34 @@ class FineTuningDataset(BaseDataset):
|
|||||||
resos.add(tuple(image_info.image_size))
|
resos.add(tuple(image_info.image_size))
|
||||||
|
|
||||||
if sizes is None:
|
if sizes is None:
|
||||||
|
if use_npz_latents:
|
||||||
|
use_npz_latents = False
|
||||||
|
print(f"npz files exist, but no bucket info in metadata. ignore npz files / メタデータにbucket情報がないためnpzファイルを無視します")
|
||||||
|
|
||||||
assert resolution is not None, "if metadata doesn't have bucket info, resolution is required / メタデータにbucket情報がない場合はresolutionを指定してください"
|
assert resolution is not None, "if metadata doesn't have bucket info, resolution is required / メタデータにbucket情報がない場合はresolutionを指定してください"
|
||||||
|
|
||||||
self.enable_bucket = enable_bucket
|
self.enable_bucket = enable_bucket
|
||||||
if self.enable_bucket:
|
if self.enable_bucket:
|
||||||
assert min(resolution) >= min_bucket_reso, f"min_bucket_reso must be equal or less than resolution / min_bucket_resoは最小解像度より大きくできません。解像度を大きくするかmin_bucket_resoを小さくしてください"
|
|
||||||
assert max(resolution) <= max_bucket_reso, f"max_bucket_reso must be equal or greater than resolution / max_bucket_resoは最大解像度より小さくできません。解像度を小さくするかmin_bucket_resoを大きくしてください"
|
|
||||||
self.bucket_resos, self.bucket_aspect_ratios = model_util.make_bucket_resolutions(
|
|
||||||
(self.width, self.height), min_bucket_reso, max_bucket_reso)
|
|
||||||
self.min_bucket_reso = min_bucket_reso
|
self.min_bucket_reso = min_bucket_reso
|
||||||
self.max_bucket_reso = max_bucket_reso
|
self.max_bucket_reso = max_bucket_reso
|
||||||
else:
|
self.bucket_reso_steps = bucket_reso_steps
|
||||||
self.bucket_resos = [(self.width, self.height)]
|
self.bucket_no_upscale = bucket_no_upscale
|
||||||
self.bucket_aspect_ratios = [self.width / self.height]
|
|
||||||
else:
|
else:
|
||||||
if not enable_bucket:
|
if not enable_bucket:
|
||||||
print("metadata has bucket info, enable bucketing / メタデータにbucket情報があるためbucketを有効にします")
|
print("metadata has bucket info, enable bucketing / メタデータにbucket情報があるためbucketを有効にします")
|
||||||
print("using bucket info in metadata / メタデータ内のbucket情報を使います")
|
print("using bucket info in metadata / メタデータ内のbucket情報を使います")
|
||||||
self.enable_bucket = True
|
self.enable_bucket = True
|
||||||
self.bucket_resos = list(resos)
|
|
||||||
self.bucket_resos.sort()
|
|
||||||
self.bucket_aspect_ratios = [w / h for w, h in self.bucket_resos]
|
|
||||||
|
|
||||||
self.min_bucket_reso = min([min(reso) for reso in resos])
|
assert not bucket_no_upscale, "if metadata has bucket info, bucket reso is precalculated, so bucket_no_upscale cannot be used / メタデータ内にbucket情報がある場合はbucketの解像度は計算済みのため、bucket_no_upscaleは使えません"
|
||||||
self.max_bucket_reso = max([max(reso) for reso in resos])
|
|
||||||
|
# bucket情報を初期化しておく、make_bucketsで再作成しない
|
||||||
|
self.bucket_manager = BucketManager(False, None, None, None, None)
|
||||||
|
self.bucket_manager.set_predefined_resos(resos)
|
||||||
|
|
||||||
|
# npz情報をきれいにしておく
|
||||||
|
if not use_npz_latents:
|
||||||
|
for image_info in self.image_data.values():
|
||||||
|
image_info.latents_npz = image_info.latents_npz_flipped = None
|
||||||
|
|
||||||
def image_key_to_npz_file(self, image_key):
|
def image_key_to_npz_file(self, image_key):
|
||||||
base_name = os.path.splitext(image_key)[0]
|
base_name = os.path.splitext(image_key)[0]
|
||||||
@ -760,15 +908,16 @@ def debug_dataset(train_dataset, show_input_ids=False):
|
|||||||
print(f"Total dataset length (steps) / データセットの長さ(ステップ数): {len(train_dataset)}")
|
print(f"Total dataset length (steps) / データセットの長さ(ステップ数): {len(train_dataset)}")
|
||||||
print("Escape for exit. / Escキーで中断、終了します")
|
print("Escape for exit. / Escキーで中断、終了します")
|
||||||
k = 0
|
k = 0
|
||||||
for example in train_dataset:
|
for i, example in enumerate(train_dataset):
|
||||||
if example['latents'] is not None:
|
if example['latents'] is not None:
|
||||||
print("sample has latents from npz file")
|
print(f"sample has latents from npz file: {example['latents'].size()}")
|
||||||
for j, (ik, cap, lw, iid) in enumerate(zip(example['image_keys'], example['captions'], example['loss_weights'], example['input_ids'])):
|
for j, (ik, cap, lw, iid) in enumerate(zip(example['image_keys'], example['captions'], example['loss_weights'], example['input_ids'])):
|
||||||
print(f'{ik}, size: {train_dataset.image_data[ik].image_size}, caption: "{cap}", loss weight: {lw}')
|
print(f'{ik}, size: {train_dataset.image_data[ik].image_size}, loss weight: {lw}, caption: "{cap}"')
|
||||||
if show_input_ids:
|
if show_input_ids:
|
||||||
print(f"input ids: {iid}")
|
print(f"input ids: {iid}")
|
||||||
if example['images'] is not None:
|
if example['images'] is not None:
|
||||||
im = example['images'][j]
|
im = example['images'][j]
|
||||||
|
print(f"image size: {im.size()}")
|
||||||
im = ((im.numpy() + 1.0) * 127.5).astype(np.uint8)
|
im = ((im.numpy() + 1.0) * 127.5).astype(np.uint8)
|
||||||
im = np.transpose(im, (1, 2, 0)) # c,H,W -> H,W,c
|
im = np.transpose(im, (1, 2, 0)) # c,H,W -> H,W,c
|
||||||
im = im[:, :, ::-1] # RGB -> BGR (OpenCV)
|
im = im[:, :, ::-1] # RGB -> BGR (OpenCV)
|
||||||
@ -778,7 +927,7 @@ def debug_dataset(train_dataset, show_input_ids=False):
|
|||||||
cv2.destroyAllWindows()
|
cv2.destroyAllWindows()
|
||||||
if k == 27:
|
if k == 27:
|
||||||
break
|
break
|
||||||
if k == 27 or example['images'] is None:
|
if k == 27 or (example['images'] is None and i >= 8):
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
@ -1254,6 +1403,10 @@ def add_dataset_arguments(parser: argparse.ArgumentParser, support_dreambooth: b
|
|||||||
help="enable buckets for multi aspect ratio training / 複数解像度学習のためのbucketを有効にする")
|
help="enable buckets for multi aspect ratio training / 複数解像度学習のためのbucketを有効にする")
|
||||||
parser.add_argument("--min_bucket_reso", type=int, default=256, help="minimum resolution for buckets / bucketの最小解像度")
|
parser.add_argument("--min_bucket_reso", type=int, default=256, help="minimum resolution for buckets / bucketの最小解像度")
|
||||||
parser.add_argument("--max_bucket_reso", type=int, default=1024, help="maximum resolution for buckets / bucketの最大解像度")
|
parser.add_argument("--max_bucket_reso", type=int, default=1024, help="maximum resolution for buckets / bucketの最大解像度")
|
||||||
|
parser.add_argument("--bucket_reso_steps", type=int, default=64,
|
||||||
|
help="steps of resolution for buckets, divisible by 8 is recommended / bucketの解像度の単位、8で割り切れる値を推奨します")
|
||||||
|
parser.add_argument("--bucket_no_upscale", action="store_true",
|
||||||
|
help="make bucket for each image without upscaling / 画像を拡大せずbucketを作成します")
|
||||||
|
|
||||||
if support_dreambooth:
|
if support_dreambooth:
|
||||||
# DreamBooth dataset
|
# DreamBooth dataset
|
||||||
@ -1285,6 +1438,7 @@ def prepare_dataset_args(args: argparse.Namespace, support_metadata: bool):
|
|||||||
|
|
||||||
if args.cache_latents:
|
if args.cache_latents:
|
||||||
assert not args.color_aug, "when caching latents, color_aug cannot be used / latentをキャッシュするときはcolor_augは使えません"
|
assert not args.color_aug, "when caching latents, color_aug cannot be used / latentをキャッシュするときはcolor_augは使えません"
|
||||||
|
assert not args.random_crop, "when caching latents, random_crop cannot be used / latentをキャッシュするときはrandom_cropは使えません"
|
||||||
|
|
||||||
# assert args.resolution is not None, f"resolution is required / resolution(解像度)を指定してください"
|
# assert args.resolution is not None, f"resolution is required / resolution(解像度)を指定してください"
|
||||||
if args.resolution is not None:
|
if args.resolution is not None:
|
||||||
@ -1296,14 +1450,14 @@ def prepare_dataset_args(args: argparse.Namespace, support_metadata: bool):
|
|||||||
|
|
||||||
if args.face_crop_aug_range is not None:
|
if args.face_crop_aug_range is not None:
|
||||||
args.face_crop_aug_range = tuple([float(r) for r in args.face_crop_aug_range.split(',')])
|
args.face_crop_aug_range = tuple([float(r) for r in args.face_crop_aug_range.split(',')])
|
||||||
assert len(args.face_crop_aug_range) == 2, \
|
assert len(args.face_crop_aug_range) == 2 and args.face_crop_aug_range[0] <= args.face_crop_aug_range[1], \
|
||||||
f"face_crop_aug_range must be two floats / face_crop_aug_rangeは'下限,上限'で指定してください: {args.face_crop_aug_range}"
|
f"face_crop_aug_range must be two floats / face_crop_aug_rangeは'下限,上限'で指定してください: {args.face_crop_aug_range}"
|
||||||
else:
|
else:
|
||||||
args.face_crop_aug_range = None
|
args.face_crop_aug_range = None
|
||||||
|
|
||||||
if support_metadata:
|
if support_metadata:
|
||||||
if args.in_json is not None and args.color_aug:
|
if args.in_json is not None and (args.color_aug or args.random_crop):
|
||||||
print(f"latents in npz is ignored when color_aug is True / color_augを有効にした場合、npzファイルのlatentsは無視されます")
|
print(f"latents in npz is ignored when color_aug or random_crop is True / color_augまたはrandom_cropを有効にした場合、npzファイルのlatentsは無視されます")
|
||||||
|
|
||||||
|
|
||||||
def load_tokenizer(args: argparse.Namespace):
|
def load_tokenizer(args: argparse.Namespace):
|
||||||
@ -1564,4 +1718,4 @@ class ImageLoadingDataset(torch.utils.data.Dataset):
|
|||||||
return (tensor_pil, img_path)
|
return (tensor_pil, img_path)
|
||||||
|
|
||||||
|
|
||||||
# endregion
|
# endregion
|
@ -2,7 +2,11 @@ import gradio as gr
|
|||||||
from easygui import msgbox
|
from easygui import msgbox
|
||||||
import subprocess
|
import subprocess
|
||||||
import os
|
import os
|
||||||
from .common_gui import get_saveasfilename_path, get_any_file_path, get_file_path
|
from .common_gui import (
|
||||||
|
get_saveasfilename_path,
|
||||||
|
get_any_file_path,
|
||||||
|
get_file_path,
|
||||||
|
)
|
||||||
|
|
||||||
folder_symbol = '\U0001f4c2' # 📂
|
folder_symbol = '\U0001f4c2' # 📂
|
||||||
refresh_symbol = '\U0001f504' # 🔄
|
refresh_symbol = '\U0001f504' # 🔄
|
||||||
@ -30,9 +34,11 @@ def verify_lora(
|
|||||||
|
|
||||||
# Run the command
|
# Run the command
|
||||||
subprocess.run(run_cmd)
|
subprocess.run(run_cmd)
|
||||||
process = subprocess.Popen(run_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
process = subprocess.Popen(
|
||||||
|
run_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||||
|
)
|
||||||
output, error = process.communicate()
|
output, error = process.communicate()
|
||||||
|
|
||||||
return (output.decode(), error.decode())
|
return (output.decode(), error.decode())
|
||||||
|
|
||||||
|
|
||||||
@ -46,10 +52,10 @@ def gradio_verify_lora_tab():
|
|||||||
gr.Markdown(
|
gr.Markdown(
|
||||||
'This utility can verify a LoRA network to make sure it is properly trained.'
|
'This utility can verify a LoRA network to make sure it is properly trained.'
|
||||||
)
|
)
|
||||||
|
|
||||||
lora_ext = gr.Textbox(value='*.pt *.safetensors', visible=False)
|
lora_ext = gr.Textbox(value='*.pt *.safetensors', visible=False)
|
||||||
lora_ext_name = gr.Textbox(value='LoRA model types', visible=False)
|
lora_ext_name = gr.Textbox(value='LoRA model types', visible=False)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
lora_model = gr.Textbox(
|
lora_model = gr.Textbox(
|
||||||
label='LoRA model',
|
label='LoRA model',
|
||||||
@ -64,7 +70,7 @@ def gradio_verify_lora_tab():
|
|||||||
inputs=[lora_model, lora_ext, lora_ext_name],
|
inputs=[lora_model, lora_ext, lora_ext_name],
|
||||||
outputs=lora_model,
|
outputs=lora_model,
|
||||||
)
|
)
|
||||||
verify_button = gr.Button('Verify', variant="primary")
|
verify_button = gr.Button('Verify', variant='primary')
|
||||||
|
|
||||||
lora_model_verif_output = gr.Textbox(
|
lora_model_verif_output = gr.Textbox(
|
||||||
label='Output',
|
label='Output',
|
||||||
@ -73,7 +79,7 @@ def gradio_verify_lora_tab():
|
|||||||
lines=1,
|
lines=1,
|
||||||
max_lines=10,
|
max_lines=10,
|
||||||
)
|
)
|
||||||
|
|
||||||
lora_model_verif_error = gr.Textbox(
|
lora_model_verif_error = gr.Textbox(
|
||||||
label='Error',
|
label='Error',
|
||||||
placeholder='Verification error',
|
placeholder='Verification error',
|
||||||
@ -87,5 +93,5 @@ def gradio_verify_lora_tab():
|
|||||||
inputs=[
|
inputs=[
|
||||||
lora_model,
|
lora_model,
|
||||||
],
|
],
|
||||||
outputs=[lora_model_verif_output, lora_model_verif_error]
|
outputs=[lora_model_verif_output, lora_model_verif_error],
|
||||||
)
|
)
|
||||||
|
@ -14,7 +14,7 @@ def caption_images(train_data_dir, caption_extension, batch_size, thresh):
|
|||||||
if train_data_dir == '':
|
if train_data_dir == '':
|
||||||
msgbox('Image folder is missing...')
|
msgbox('Image folder is missing...')
|
||||||
return
|
return
|
||||||
|
|
||||||
if caption_extension == '':
|
if caption_extension == '':
|
||||||
msgbox('Please provide an extension for the caption files.')
|
msgbox('Please provide an extension for the caption files.')
|
||||||
return
|
return
|
||||||
|
75
lora_gui.py
75
lora_gui.py
@ -91,9 +91,14 @@ def save_configuration(
|
|||||||
max_train_epochs,
|
max_train_epochs,
|
||||||
max_data_loader_n_workers,
|
max_data_loader_n_workers,
|
||||||
network_alpha,
|
network_alpha,
|
||||||
training_comment, keep_tokens,
|
training_comment,
|
||||||
lr_scheduler_num_cycles, lr_scheduler_power,
|
keep_tokens,
|
||||||
|
lr_scheduler_num_cycles,
|
||||||
|
lr_scheduler_power,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
):
|
):
|
||||||
# Get list of function parameters and values
|
# Get list of function parameters and values
|
||||||
parameters = list(locals().items())
|
parameters = list(locals().items())
|
||||||
@ -182,9 +187,14 @@ def open_configuration(
|
|||||||
max_train_epochs,
|
max_train_epochs,
|
||||||
max_data_loader_n_workers,
|
max_data_loader_n_workers,
|
||||||
network_alpha,
|
network_alpha,
|
||||||
training_comment, keep_tokens,
|
training_comment,
|
||||||
lr_scheduler_num_cycles, lr_scheduler_power,
|
keep_tokens,
|
||||||
|
lr_scheduler_num_cycles,
|
||||||
|
lr_scheduler_power,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
):
|
):
|
||||||
# Get list of function parameters and values
|
# Get list of function parameters and values
|
||||||
parameters = list(locals().items())
|
parameters = list(locals().items())
|
||||||
@ -257,9 +267,14 @@ def train_model(
|
|||||||
max_train_epochs,
|
max_train_epochs,
|
||||||
max_data_loader_n_workers,
|
max_data_loader_n_workers,
|
||||||
network_alpha,
|
network_alpha,
|
||||||
training_comment, keep_tokens,
|
training_comment,
|
||||||
lr_scheduler_num_cycles, lr_scheduler_power,
|
keep_tokens,
|
||||||
|
lr_scheduler_num_cycles,
|
||||||
|
lr_scheduler_power,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
):
|
):
|
||||||
if pretrained_model_name_or_path == '':
|
if pretrained_model_name_or_path == '':
|
||||||
msgbox('Source model information is missing')
|
msgbox('Source model information is missing')
|
||||||
@ -281,12 +296,18 @@ def train_model(
|
|||||||
if output_dir == '':
|
if output_dir == '':
|
||||||
msgbox('Output folder path is missing')
|
msgbox('Output folder path is missing')
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if int(bucket_reso_steps) < 1:
|
||||||
|
msgbox('Bucket resolution steps need to be greater than 0')
|
||||||
|
return
|
||||||
|
|
||||||
if not os.path.exists(output_dir):
|
if not os.path.exists(output_dir):
|
||||||
os.makedirs(output_dir)
|
os.makedirs(output_dir)
|
||||||
|
|
||||||
if stop_text_encoder_training_pct > 0:
|
if stop_text_encoder_training_pct > 0:
|
||||||
msgbox('Output "stop text encoder training" is not yet supported. Ignoring')
|
msgbox(
|
||||||
|
'Output "stop text encoder training" is not yet supported. Ignoring'
|
||||||
|
)
|
||||||
stop_text_encoder_training_pct = 0
|
stop_text_encoder_training_pct = 0
|
||||||
|
|
||||||
# If string is empty set string to 0.
|
# If string is empty set string to 0.
|
||||||
@ -358,6 +379,9 @@ def train_model(
|
|||||||
print(f'lr_warmup_steps = {lr_warmup_steps}')
|
print(f'lr_warmup_steps = {lr_warmup_steps}')
|
||||||
|
|
||||||
run_cmd = f'accelerate launch --num_cpu_threads_per_process={num_cpu_threads_per_process} "train_network.py"'
|
run_cmd = f'accelerate launch --num_cpu_threads_per_process={num_cpu_threads_per_process} "train_network.py"'
|
||||||
|
|
||||||
|
run_cmd += f' --bucket_reso_steps=1 --bucket_no_upscale' # --random_crop'
|
||||||
|
|
||||||
if v2:
|
if v2:
|
||||||
run_cmd += ' --v2'
|
run_cmd += ' --v2'
|
||||||
if v_parameterization:
|
if v_parameterization:
|
||||||
@ -387,7 +411,7 @@ def train_model(
|
|||||||
if not float(prior_loss_weight) == 1.0:
|
if not float(prior_loss_weight) == 1.0:
|
||||||
run_cmd += f' --prior_loss_weight={prior_loss_weight}'
|
run_cmd += f' --prior_loss_weight={prior_loss_weight}'
|
||||||
run_cmd += f' --network_module=networks.lora'
|
run_cmd += f' --network_module=networks.lora'
|
||||||
|
|
||||||
if not (float(text_encoder_lr) == 0) or not (float(unet_lr) == 0):
|
if not (float(text_encoder_lr) == 0) or not (float(unet_lr) == 0):
|
||||||
if not (float(text_encoder_lr) == 0) and not (float(unet_lr) == 0):
|
if not (float(text_encoder_lr) == 0) and not (float(unet_lr) == 0):
|
||||||
run_cmd += f' --text_encoder_lr={text_encoder_lr}'
|
run_cmd += f' --text_encoder_lr={text_encoder_lr}'
|
||||||
@ -399,14 +423,12 @@ def train_model(
|
|||||||
run_cmd += f' --unet_lr={unet_lr}'
|
run_cmd += f' --unet_lr={unet_lr}'
|
||||||
run_cmd += f' --network_train_unet_only'
|
run_cmd += f' --network_train_unet_only'
|
||||||
else:
|
else:
|
||||||
if float(text_encoder_lr) == 0:
|
if float(text_encoder_lr) == 0:
|
||||||
msgbox(
|
msgbox('Please input learning rate values.')
|
||||||
'Please input learning rate values.'
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
run_cmd += f' --network_dim={network_dim}'
|
run_cmd += f' --network_dim={network_dim}'
|
||||||
|
|
||||||
if not lora_network_weights == '':
|
if not lora_network_weights == '':
|
||||||
run_cmd += f' --network_weights="{lora_network_weights}"'
|
run_cmd += f' --network_weights="{lora_network_weights}"'
|
||||||
if int(gradient_accumulation_steps) > 1:
|
if int(gradient_accumulation_steps) > 1:
|
||||||
@ -451,6 +473,9 @@ def train_model(
|
|||||||
use_8bit_adam=use_8bit_adam,
|
use_8bit_adam=use_8bit_adam,
|
||||||
keep_tokens=keep_tokens,
|
keep_tokens=keep_tokens,
|
||||||
persistent_data_loader_workers=persistent_data_loader_workers,
|
persistent_data_loader_workers=persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale=bucket_no_upscale,
|
||||||
|
random_crop=random_crop,
|
||||||
|
bucket_reso_steps=bucket_reso_steps,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(run_cmd)
|
print(run_cmd)
|
||||||
@ -672,11 +697,13 @@ def lora_tab(
|
|||||||
label='Prior loss weight', value=1.0
|
label='Prior loss weight', value=1.0
|
||||||
)
|
)
|
||||||
lr_scheduler_num_cycles = gr.Textbox(
|
lr_scheduler_num_cycles = gr.Textbox(
|
||||||
label='LR number of cycles', placeholder='(Optional) For Cosine with restart and polynomial only'
|
label='LR number of cycles',
|
||||||
|
placeholder='(Optional) For Cosine with restart and polynomial only',
|
||||||
)
|
)
|
||||||
|
|
||||||
lr_scheduler_power = gr.Textbox(
|
lr_scheduler_power = gr.Textbox(
|
||||||
label='LR power', placeholder='(Optional) For Cosine with restart and polynomial only'
|
label='LR power',
|
||||||
|
placeholder='(Optional) For Cosine with restart and polynomial only',
|
||||||
)
|
)
|
||||||
(
|
(
|
||||||
use_8bit_adam,
|
use_8bit_adam,
|
||||||
@ -695,6 +722,9 @@ def lora_tab(
|
|||||||
max_data_loader_n_workers,
|
max_data_loader_n_workers,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
) = gradio_advanced_training()
|
) = gradio_advanced_training()
|
||||||
color_aug.change(
|
color_aug.change(
|
||||||
color_aug_changed,
|
color_aug_changed,
|
||||||
@ -716,7 +746,6 @@ def lora_tab(
|
|||||||
gradio_merge_lora_tab()
|
gradio_merge_lora_tab()
|
||||||
gradio_resize_lora_tab()
|
gradio_resize_lora_tab()
|
||||||
gradio_verify_lora_tab()
|
gradio_verify_lora_tab()
|
||||||
|
|
||||||
|
|
||||||
button_run = gr.Button('Train model')
|
button_run = gr.Button('Train model')
|
||||||
|
|
||||||
@ -770,8 +799,12 @@ def lora_tab(
|
|||||||
network_alpha,
|
network_alpha,
|
||||||
training_comment,
|
training_comment,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
lr_scheduler_num_cycles, lr_scheduler_power,
|
lr_scheduler_num_cycles,
|
||||||
|
lr_scheduler_power,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
]
|
]
|
||||||
|
|
||||||
button_open_config.click(
|
button_open_config.click(
|
||||||
|
@ -82,8 +82,18 @@ def save_configuration(
|
|||||||
max_data_loader_n_workers,
|
max_data_loader_n_workers,
|
||||||
mem_eff_attn,
|
mem_eff_attn,
|
||||||
gradient_accumulation_steps,
|
gradient_accumulation_steps,
|
||||||
model_list, token_string, init_word, num_vectors_per_token, max_train_steps, weights, template, keep_tokens,
|
model_list,
|
||||||
|
token_string,
|
||||||
|
init_word,
|
||||||
|
num_vectors_per_token,
|
||||||
|
max_train_steps,
|
||||||
|
weights,
|
||||||
|
template,
|
||||||
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
):
|
):
|
||||||
# Get list of function parameters and values
|
# Get list of function parameters and values
|
||||||
parameters = list(locals().items())
|
parameters = list(locals().items())
|
||||||
@ -171,8 +181,18 @@ def open_configuration(
|
|||||||
max_data_loader_n_workers,
|
max_data_loader_n_workers,
|
||||||
mem_eff_attn,
|
mem_eff_attn,
|
||||||
gradient_accumulation_steps,
|
gradient_accumulation_steps,
|
||||||
model_list, token_string, init_word, num_vectors_per_token, max_train_steps, weights, template, keep_tokens,
|
model_list,
|
||||||
|
token_string,
|
||||||
|
init_word,
|
||||||
|
num_vectors_per_token,
|
||||||
|
max_train_steps,
|
||||||
|
weights,
|
||||||
|
template,
|
||||||
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
):
|
):
|
||||||
# Get list of function parameters and values
|
# Get list of function parameters and values
|
||||||
parameters = list(locals().items())
|
parameters = list(locals().items())
|
||||||
@ -241,8 +261,17 @@ def train_model(
|
|||||||
mem_eff_attn,
|
mem_eff_attn,
|
||||||
gradient_accumulation_steps,
|
gradient_accumulation_steps,
|
||||||
model_list, # Keep this. Yes, it is unused here but required given the common list used
|
model_list, # Keep this. Yes, it is unused here but required given the common list used
|
||||||
token_string, init_word, num_vectors_per_token, max_train_steps, weights, template, keep_tokens,
|
token_string,
|
||||||
|
init_word,
|
||||||
|
num_vectors_per_token,
|
||||||
|
max_train_steps,
|
||||||
|
weights,
|
||||||
|
template,
|
||||||
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
):
|
):
|
||||||
if pretrained_model_name_or_path == '':
|
if pretrained_model_name_or_path == '':
|
||||||
msgbox('Source model information is missing')
|
msgbox('Source model information is missing')
|
||||||
@ -264,15 +293,15 @@ def train_model(
|
|||||||
if output_dir == '':
|
if output_dir == '':
|
||||||
msgbox('Output folder path is missing')
|
msgbox('Output folder path is missing')
|
||||||
return
|
return
|
||||||
|
|
||||||
if token_string == '':
|
if token_string == '':
|
||||||
msgbox('Token string is missing')
|
msgbox('Token string is missing')
|
||||||
return
|
return
|
||||||
|
|
||||||
if init_word == '':
|
if init_word == '':
|
||||||
msgbox('Init word is missing')
|
msgbox('Init word is missing')
|
||||||
return
|
return
|
||||||
|
|
||||||
if not os.path.exists(output_dir):
|
if not os.path.exists(output_dir):
|
||||||
os.makedirs(output_dir)
|
os.makedirs(output_dir)
|
||||||
|
|
||||||
@ -332,7 +361,7 @@ def train_model(
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
max_train_steps = int(max_train_steps)
|
max_train_steps = int(max_train_steps)
|
||||||
|
|
||||||
print(f'max_train_steps = {max_train_steps}')
|
print(f'max_train_steps = {max_train_steps}')
|
||||||
|
|
||||||
# calculate stop encoder training
|
# calculate stop encoder training
|
||||||
@ -421,6 +450,9 @@ def train_model(
|
|||||||
use_8bit_adam=use_8bit_adam,
|
use_8bit_adam=use_8bit_adam,
|
||||||
keep_tokens=keep_tokens,
|
keep_tokens=keep_tokens,
|
||||||
persistent_data_loader_workers=persistent_data_loader_workers,
|
persistent_data_loader_workers=persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale=bucket_no_upscale,
|
||||||
|
random_crop=random_crop,
|
||||||
|
bucket_reso_steps=bucket_reso_steps,
|
||||||
)
|
)
|
||||||
run_cmd += f' --token_string="{token_string}"'
|
run_cmd += f' --token_string="{token_string}"'
|
||||||
run_cmd += f' --init_word="{init_word}"'
|
run_cmd += f' --init_word="{init_word}"'
|
||||||
@ -431,7 +463,7 @@ def train_model(
|
|||||||
run_cmd += f' --use_object_template'
|
run_cmd += f' --use_object_template'
|
||||||
elif template == 'style template':
|
elif template == 'style template':
|
||||||
run_cmd += f' --use_style_template'
|
run_cmd += f' --use_style_template'
|
||||||
|
|
||||||
print(run_cmd)
|
print(run_cmd)
|
||||||
# Run the command
|
# Run the command
|
||||||
subprocess.run(run_cmd)
|
subprocess.run(run_cmd)
|
||||||
@ -576,9 +608,7 @@ def ti_tab(
|
|||||||
label='Resume TI training',
|
label='Resume TI training',
|
||||||
placeholder='(Optional) Path to existing TI embeding file to keep training',
|
placeholder='(Optional) Path to existing TI embeding file to keep training',
|
||||||
)
|
)
|
||||||
weights_file_input = gr.Button(
|
weights_file_input = gr.Button('📂', elem_id='open_folder_small')
|
||||||
'📂', elem_id='open_folder_small'
|
|
||||||
)
|
|
||||||
weights_file_input.click(get_file_path, outputs=weights)
|
weights_file_input.click(get_file_path, outputs=weights)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
token_string = gr.Textbox(
|
token_string = gr.Textbox(
|
||||||
@ -676,6 +706,9 @@ def ti_tab(
|
|||||||
max_data_loader_n_workers,
|
max_data_loader_n_workers,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
) = gradio_advanced_training()
|
) = gradio_advanced_training()
|
||||||
color_aug.change(
|
color_aug.change(
|
||||||
color_aug_changed,
|
color_aug_changed,
|
||||||
@ -739,9 +772,17 @@ def ti_tab(
|
|||||||
mem_eff_attn,
|
mem_eff_attn,
|
||||||
gradient_accumulation_steps,
|
gradient_accumulation_steps,
|
||||||
model_list,
|
model_list,
|
||||||
token_string, init_word, num_vectors_per_token, max_train_steps, weights, template,
|
token_string,
|
||||||
|
init_word,
|
||||||
|
num_vectors_per_token,
|
||||||
|
max_train_steps,
|
||||||
|
weights,
|
||||||
|
template,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
persistent_data_loader_workers,
|
persistent_data_loader_workers,
|
||||||
|
bucket_no_upscale,
|
||||||
|
random_crop,
|
||||||
|
bucket_reso_steps,
|
||||||
]
|
]
|
||||||
|
|
||||||
button_open_config.click(
|
button_open_config.click(
|
||||||
|
@ -1,66 +0,0 @@
|
|||||||
import os
|
|
||||||
import cv2
|
|
||||||
import argparse
|
|
||||||
import shutil
|
|
||||||
import math
|
|
||||||
|
|
||||||
def resize_images(src_img_folder, dst_img_folder, max_resolution="512x512", divisible_by=2):
|
|
||||||
# Calculate max_pixels from max_resolution string
|
|
||||||
max_pixels = int(max_resolution.split("x")[0]) * int(max_resolution.split("x")[1])
|
|
||||||
|
|
||||||
# Create destination folder if it does not exist
|
|
||||||
if not os.path.exists(dst_img_folder):
|
|
||||||
os.makedirs(dst_img_folder)
|
|
||||||
|
|
||||||
# Iterate through all files in src_img_folder
|
|
||||||
for filename in os.listdir(src_img_folder):
|
|
||||||
# Check if the image is png, jpg or webp
|
|
||||||
if not filename.endswith(('.png', '.jpg', '.webp')):
|
|
||||||
# Copy the file to the destination folder if not png, jpg or webp
|
|
||||||
shutil.copy(os.path.join(src_img_folder, filename), os.path.join(dst_img_folder, filename))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Load image
|
|
||||||
img = cv2.imread(os.path.join(src_img_folder, filename))
|
|
||||||
|
|
||||||
# Calculate current number of pixels
|
|
||||||
current_pixels = img.shape[0] * img.shape[1]
|
|
||||||
|
|
||||||
# Check if the image needs resizing
|
|
||||||
if current_pixels > max_pixels:
|
|
||||||
# Calculate scaling factor
|
|
||||||
scale_factor = max_pixels / current_pixels
|
|
||||||
|
|
||||||
# Calculate new dimensions
|
|
||||||
new_height = int(img.shape[0] * math.sqrt(scale_factor))
|
|
||||||
new_width = int(img.shape[1] * math.sqrt(scale_factor))
|
|
||||||
|
|
||||||
# Resize image
|
|
||||||
img = cv2.resize(img, (new_width, new_height))
|
|
||||||
|
|
||||||
# Calculate the new height and width that are divisible by divisible_by
|
|
||||||
new_height = new_height if new_height % divisible_by == 0 else new_height - new_height % divisible_by
|
|
||||||
new_width = new_width if new_width % divisible_by == 0 else new_width - new_width % divisible_by
|
|
||||||
|
|
||||||
# Center crop the image to the calculated dimensions
|
|
||||||
y = int((img.shape[0] - new_height) / 2)
|
|
||||||
x = int((img.shape[1] - new_width) / 2)
|
|
||||||
img = img[y:y + new_height, x:x + new_width]
|
|
||||||
|
|
||||||
# Save resized image in dst_img_folder
|
|
||||||
cv2.imwrite(os.path.join(dst_img_folder, filename), img, [cv2.IMWRITE_JPEG_QUALITY, 100])
|
|
||||||
|
|
||||||
print(f"Resized image: {filename} with size {img.shape[0]}x{img.shape[1]}")
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Resize images in a folder to a specified max resolution')
|
|
||||||
parser.add_argument('src_img_folder', type=str, help='Source folder containing the images')
|
|
||||||
parser.add_argument('dst_img_folder', type=str, help='Destination folder to save the resized images')
|
|
||||||
parser.add_argument('--max_resolution', type=str, help='Maximum resolution in the format "512x512"', default="512x512")
|
|
||||||
parser.add_argument('--divisible_by', type=int, help='Ensure new dimensions are divisible by this value', default=2)
|
|
||||||
args = parser.parse_args()
|
|
||||||
resize_images(args.src_img_folder, args.dst_img_folder, args.max_resolution)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
76
tools/resize_images_to_resolutions.py
Normal file
76
tools/resize_images_to_resolutions.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
import os
|
||||||
|
import cv2
|
||||||
|
import argparse
|
||||||
|
import shutil
|
||||||
|
import math
|
||||||
|
|
||||||
|
def resize_images(src_img_folder, dst_img_folder, max_resolution="512x512", divisible_by=2):
|
||||||
|
# Split the max_resolution string by "," and strip any whitespaces
|
||||||
|
max_resolutions = [res.strip() for res in max_resolution.split(',')]
|
||||||
|
|
||||||
|
# # Calculate max_pixels from max_resolution string
|
||||||
|
# max_pixels = int(max_resolution.split("x")[0]) * int(max_resolution.split("x")[1])
|
||||||
|
|
||||||
|
# Create destination folder if it does not exist
|
||||||
|
if not os.path.exists(dst_img_folder):
|
||||||
|
os.makedirs(dst_img_folder)
|
||||||
|
|
||||||
|
# Iterate through all files in src_img_folder
|
||||||
|
for filename in os.listdir(src_img_folder):
|
||||||
|
# Check if the image is png, jpg or webp
|
||||||
|
if not filename.endswith(('.png', '.jpg', '.webp')):
|
||||||
|
# Copy the file to the destination folder if not png, jpg or webp
|
||||||
|
shutil.copy(os.path.join(src_img_folder, filename), os.path.join(dst_img_folder, filename))
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Load image
|
||||||
|
img = cv2.imread(os.path.join(src_img_folder, filename))
|
||||||
|
|
||||||
|
for max_resolution in max_resolutions:
|
||||||
|
# Calculate max_pixels from max_resolution string
|
||||||
|
max_pixels = int(max_resolution.split("x")[0]) * int(max_resolution.split("x")[1])
|
||||||
|
|
||||||
|
# Calculate current number of pixels
|
||||||
|
current_pixels = img.shape[0] * img.shape[1]
|
||||||
|
|
||||||
|
# Check if the image needs resizing
|
||||||
|
if current_pixels > max_pixels:
|
||||||
|
# Calculate scaling factor
|
||||||
|
scale_factor = max_pixels / current_pixels
|
||||||
|
|
||||||
|
# Calculate new dimensions
|
||||||
|
new_height = int(img.shape[0] * math.sqrt(scale_factor))
|
||||||
|
new_width = int(img.shape[1] * math.sqrt(scale_factor))
|
||||||
|
|
||||||
|
# Resize image
|
||||||
|
img = cv2.resize(img, (new_width, new_height))
|
||||||
|
|
||||||
|
# Calculate the new height and width that are divisible by divisible_by
|
||||||
|
new_height = new_height if new_height % divisible_by == 0 else new_height - new_height % divisible_by
|
||||||
|
new_width = new_width if new_width % divisible_by == 0 else new_width - new_width % divisible_by
|
||||||
|
|
||||||
|
# Center crop the image to the calculated dimensions
|
||||||
|
y = int((img.shape[0] - new_height) / 2)
|
||||||
|
x = int((img.shape[1] - new_width) / 2)
|
||||||
|
img = img[y:y + new_height, x:x + new_width]
|
||||||
|
|
||||||
|
# Split filename into base and extension
|
||||||
|
base, ext = os.path.splitext(filename)
|
||||||
|
new_filename = base + '+' + max_resolution + '.jpg'
|
||||||
|
|
||||||
|
# Save resized image in dst_img_folder
|
||||||
|
cv2.imwrite(os.path.join(dst_img_folder, new_filename), img, [cv2.IMWRITE_JPEG_QUALITY, 100])
|
||||||
|
print(f"Resized image: {filename} with size {img.shape[0]}x{img.shape[1]} as {new_filename}")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description='Resize images in a folder to a specified max resolution(s)')
|
||||||
|
parser.add_argument('src_img_folder', type=str, help='Source folder containing the images')
|
||||||
|
parser.add_argument('dst_img_folder', type=str, help='Destination folder to save the resized images')
|
||||||
|
parser.add_argument('--max_resolution', type=str, help='Maximum resolution(s) in the format "512x512,448x448,384x384, etc, etc"', default="512x512,448x448,384x384")
|
||||||
|
parser.add_argument('--divisible_by', type=int, help='Ensure new dimensions are divisible by this value', default=1)
|
||||||
|
args = parser.parse_args()
|
||||||
|
resize_images(args.src_img_folder, args.dst_img_folder, args.max_resolution)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -35,8 +35,9 @@ def train(args):
|
|||||||
|
|
||||||
train_dataset = DreamBoothDataset(args.train_batch_size, args.train_data_dir, args.reg_data_dir,
|
train_dataset = DreamBoothDataset(args.train_batch_size, args.train_data_dir, args.reg_data_dir,
|
||||||
tokenizer, args.max_token_length, args.caption_extension, args.shuffle_caption, args.keep_tokens,
|
tokenizer, args.max_token_length, args.caption_extension, args.shuffle_caption, args.keep_tokens,
|
||||||
args.resolution, args.enable_bucket, args.min_bucket_reso, args.max_bucket_reso, args.prior_loss_weight,
|
args.resolution, args.enable_bucket, args.min_bucket_reso, args.max_bucket_reso,
|
||||||
args.flip_aug, args.color_aug, args.face_crop_aug_range, args.random_crop, args.debug_dataset)
|
args.bucket_reso_steps, args.bucket_no_upscale,
|
||||||
|
args.prior_loss_weight, args.flip_aug, args.color_aug, args.face_crop_aug_range, args.random_crop, args.debug_dataset)
|
||||||
if args.no_token_padding:
|
if args.no_token_padding:
|
||||||
train_dataset.disable_token_padding()
|
train_dataset.disable_token_padding()
|
||||||
train_dataset.make_buckets()
|
train_dataset.make_buckets()
|
||||||
|
@ -120,13 +120,16 @@ def train(args):
|
|||||||
print("Use DreamBooth method.")
|
print("Use DreamBooth method.")
|
||||||
train_dataset = DreamBoothDataset(args.train_batch_size, args.train_data_dir, args.reg_data_dir,
|
train_dataset = DreamBoothDataset(args.train_batch_size, args.train_data_dir, args.reg_data_dir,
|
||||||
tokenizer, args.max_token_length, args.caption_extension, args.shuffle_caption, args.keep_tokens,
|
tokenizer, args.max_token_length, args.caption_extension, args.shuffle_caption, args.keep_tokens,
|
||||||
args.resolution, args.enable_bucket, args.min_bucket_reso, args.max_bucket_reso, args.prior_loss_weight,
|
args.resolution, args.enable_bucket, args.min_bucket_reso, args.max_bucket_reso,
|
||||||
args.flip_aug, args.color_aug, args.face_crop_aug_range, args.random_crop, args.debug_dataset)
|
args.bucket_reso_steps, args.bucket_no_upscale,
|
||||||
|
args.prior_loss_weight, args.flip_aug, args.color_aug, args.face_crop_aug_range,
|
||||||
|
args.random_crop, args.debug_dataset)
|
||||||
else:
|
else:
|
||||||
print("Train with captions.")
|
print("Train with captions.")
|
||||||
train_dataset = FineTuningDataset(args.in_json, args.train_batch_size, args.train_data_dir,
|
train_dataset = FineTuningDataset(args.in_json, args.train_batch_size, args.train_data_dir,
|
||||||
tokenizer, args.max_token_length, args.shuffle_caption, args.keep_tokens,
|
tokenizer, args.max_token_length, args.shuffle_caption, args.keep_tokens,
|
||||||
args.resolution, args.enable_bucket, args.min_bucket_reso, args.max_bucket_reso,
|
args.resolution, args.enable_bucket, args.min_bucket_reso, args.max_bucket_reso,
|
||||||
|
args.bucket_reso_steps, args.bucket_no_upscale,
|
||||||
args.flip_aug, args.color_aug, args.face_crop_aug_range, args.random_crop,
|
args.flip_aug, args.color_aug, args.face_crop_aug_range, args.random_crop,
|
||||||
args.dataset_repeats, args.debug_dataset)
|
args.dataset_repeats, args.debug_dataset)
|
||||||
train_dataset.make_buckets()
|
train_dataset.make_buckets()
|
||||||
|
@ -143,13 +143,15 @@ def train(args):
|
|||||||
print("Use DreamBooth method.")
|
print("Use DreamBooth method.")
|
||||||
train_dataset = DreamBoothDataset(args.train_batch_size, args.train_data_dir, args.reg_data_dir,
|
train_dataset = DreamBoothDataset(args.train_batch_size, args.train_data_dir, args.reg_data_dir,
|
||||||
tokenizer, args.max_token_length, args.caption_extension, args.shuffle_caption, args.keep_tokens,
|
tokenizer, args.max_token_length, args.caption_extension, args.shuffle_caption, args.keep_tokens,
|
||||||
args.resolution, args.enable_bucket, args.min_bucket_reso, args.max_bucket_reso, args.prior_loss_weight,
|
args.resolution, args.enable_bucket, args.min_bucket_reso, args.max_bucket_reso,
|
||||||
args.flip_aug, args.color_aug, args.face_crop_aug_range, args.random_crop, args.debug_dataset)
|
args.bucket_reso_steps, args.bucket_no_upscale,
|
||||||
|
args.prior_loss_weight, args.flip_aug, args.color_aug, args.face_crop_aug_range, args.random_crop, args.debug_dataset)
|
||||||
else:
|
else:
|
||||||
print("Train with captions.")
|
print("Train with captions.")
|
||||||
train_dataset = FineTuningDataset(args.in_json, args.train_batch_size, args.train_data_dir,
|
train_dataset = FineTuningDataset(args.in_json, args.train_batch_size, args.train_data_dir,
|
||||||
tokenizer, args.max_token_length, args.shuffle_caption, args.keep_tokens,
|
tokenizer, args.max_token_length, args.shuffle_caption, args.keep_tokens,
|
||||||
args.resolution, args.enable_bucket, args.min_bucket_reso, args.max_bucket_reso,
|
args.resolution, args.enable_bucket, args.min_bucket_reso, args.max_bucket_reso,
|
||||||
|
args.bucket_reso_steps, args.bucket_no_upscale,
|
||||||
args.flip_aug, args.color_aug, args.face_crop_aug_range, args.random_crop,
|
args.flip_aug, args.color_aug, args.face_crop_aug_range, args.random_crop,
|
||||||
args.dataset_repeats, args.debug_dataset)
|
args.dataset_repeats, args.debug_dataset)
|
||||||
|
|
||||||
@ -217,7 +219,7 @@ def train(args):
|
|||||||
# DataLoaderのプロセス数:0はメインプロセスになる
|
# DataLoaderのプロセス数:0はメインプロセスになる
|
||||||
n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで
|
n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで
|
||||||
train_dataloader = torch.utils.data.DataLoader(
|
train_dataloader = torch.utils.data.DataLoader(
|
||||||
train_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn, num_workers=n_workers)
|
train_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn, num_workers=n_workers, persistent_workers=args.persistent_data_loader_workers)
|
||||||
|
|
||||||
# 学習ステップ数を計算する
|
# 学習ステップ数を計算する
|
||||||
if args.max_train_epochs is not None:
|
if args.max_train_epochs is not None:
|
||||||
@ -312,7 +314,8 @@ def train(args):
|
|||||||
|
|
||||||
# Get the text embedding for conditioning
|
# Get the text embedding for conditioning
|
||||||
input_ids = batch["input_ids"].to(accelerator.device)
|
input_ids = batch["input_ids"].to(accelerator.device)
|
||||||
encoder_hidden_states = train_util.get_hidden_states(args, input_ids, tokenizer, text_encoder, torch.float) # weight_dtype) use float instead of fp16/bf16 because text encoder is float
|
# weight_dtype) use float instead of fp16/bf16 because text encoder is float
|
||||||
|
encoder_hidden_states = train_util.get_hidden_states(args, input_ids, tokenizer, text_encoder, torch.float)
|
||||||
|
|
||||||
# Sample noise that we'll add to the latents
|
# Sample noise that we'll add to the latents
|
||||||
noise = torch.randn_like(latents, device=latents.device)
|
noise = torch.randn_like(latents, device=latents.device)
|
||||||
|
Loading…
Reference in New Issue
Block a user