From 90c0d55457bc0bd2ad675609856d3fdaeb475142 Mon Sep 17 00:00:00 2001 From: bmaltais Date: Thu, 9 Feb 2023 19:17:17 -0500 Subject: [PATCH] 2023/02/09 (v20.7.1) - Caption dropout is supported in ``train_db.py``, ``fine_tune.py`` and ``train_network.py``. Thanks to forestsource! - ``--caption_dropout_rate`` option specifies the dropout rate for captions (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the image is trained with the empty caption. Default is 0 (no dropout). - ``--caption_dropout_every_n_epochs`` option specifies how many epochs to drop captions. If ``3`` is specified, in epoch 3, 6, 9 ..., images are trained with all captions empty. Default is None (no dropout). - ``--caption_tag_dropout_rate`` option specified the dropout rate for tags (comma separated tokens) (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the tag is removed from the caption. If ``--keep_tokens`` option is set, these tokens (tags) are not dropped. Default is 0 (no droupout). - The bulk image downsampling script is added. Documentation is [here](https://github.com/kohya-ss/sd-scripts/blob/main/train_network_README-ja.md#%E7%94%BB%E5%83%8F%E3%83%AA%E3%82%B5%E3%82%A4%E3%82%BA%E3%82%B9%E3%82%AF%E3%83%AA%E3%83%97%E3%83%88) (in Jpanaese). Thanks to bmaltais! - Typo check is added. Thanks to shirayu! - Add option to autolaunch the GUI in a browser and set the server_port. USe either `gui.ps1 --inbrowser --server_port 3456`or `gui.cmd -inbrowser -server_port 3456` --- README.md | 8 ++++++++ tools/resize_images_to_resolutions.py | 18 ++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 3425f0f..a85f8fe 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,14 @@ Then redo the installation instruction within the kohya_ss venv. ## Change history +* 2023/02/09 (v20.7.1) + - Caption dropout is supported in ``train_db.py``, ``fine_tune.py`` and ``train_network.py``. Thanks to forestsource! + - ``--caption_dropout_rate`` option specifies the dropout rate for captions (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the image is trained with the empty caption. Default is 0 (no dropout). + - ``--caption_dropout_every_n_epochs`` option specifies how many epochs to drop captions. If ``3`` is specified, in epoch 3, 6, 9 ..., images are trained with all captions empty. Default is None (no dropout). + - ``--caption_tag_dropout_rate`` option specified the dropout rate for tags (comma separated tokens) (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the tag is removed from the caption. If ``--keep_tokens`` option is set, these tokens (tags) are not dropped. Default is 0 (no droupout). + - The bulk image downsampling script is added. Documentation is [here](https://github.com/kohya-ss/sd-scripts/blob/main/train_network_README-ja.md#%E7%94%BB%E5%83%8F%E3%83%AA%E3%82%B5%E3%82%A4%E3%82%BA%E3%82%B9%E3%82%AF%E3%83%AA%E3%83%97%E3%83%88) (in Jpanaese). Thanks to bmaltais! + - Typo check is added. Thanks to shirayu! + - Add option to autolaunch the GUI in a browser and set the server_port. USe either `gui.ps1 --inbrowser --server_port 3456`or `gui.cmd -inbrowser -server_port 3456` * 2023/02/06 (v20.7.0) - ``--bucket_reso_steps`` and ``--bucket_no_upscale`` options are added to training scripts (fine tuning, DreamBooth, LoRA and Textual Inversion) and ``prepare_buckets_latents.py``. - ``--bucket_reso_steps`` takes the steps for buckets in aspect ratio bucketing. Default is 64, same as before. diff --git a/tools/resize_images_to_resolutions.py b/tools/resize_images_to_resolutions.py index e55b285..3e6f87d 100644 --- a/tools/resize_images_to_resolutions.py +++ b/tools/resize_images_to_resolutions.py @@ -4,13 +4,10 @@ import argparse import shutil import math -def resize_images(src_img_folder, dst_img_folder, max_resolution="512x512", divisible_by=1): +def resize_images(src_img_folder, dst_img_folder, max_resolution="512x512", divisible_by=1, caption_extension=''): # Split the max_resolution string by "," and strip any whitespaces max_resolutions = [res.strip() for res in max_resolution.split(',')] - # # Calculate max_pixels from max_resolution string - # max_pixels = int(max_resolution.split("x")[0]) * int(max_resolution.split("x")[1]) - # Create destination folder if it does not exist if not os.path.exists(dst_img_folder): os.makedirs(dst_img_folder) @@ -20,7 +17,7 @@ def resize_images(src_img_folder, dst_img_folder, max_resolution="512x512", divi # Check if the image is png, jpg or webp if not filename.endswith(('.png', '.jpg', '.webp')): # Copy the file to the destination folder if not png, jpg or webp - shutil.copy(os.path.join(src_img_folder, filename), os.path.join(dst_img_folder, filename)) + # shutil.copy(os.path.join(src_img_folder, filename), os.path.join(dst_img_folder, filename)) continue # Load image @@ -42,8 +39,8 @@ def resize_images(src_img_folder, dst_img_folder, max_resolution="512x512", divi new_height = int(img.shape[0] * math.sqrt(scale_factor)) new_width = int(img.shape[1] * math.sqrt(scale_factor)) - # Resize image - img = cv2.resize(img, (new_width, new_height)) + # Resize image using area interpolation (best when downsampling) + img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA) # Calculate the new height and width that are divisible by divisible_by new_height = new_height if new_height % divisible_by == 0 else new_height - new_height % divisible_by @@ -59,8 +56,8 @@ def resize_images(src_img_folder, dst_img_folder, max_resolution="512x512", divi new_filename = base + '+' + max_resolution + '.jpg' # copy caption file with right name if one exist - if os.path.exists(os.path.join(src_img_folder, base + '.txt')): - shutil.copy(os.path.join(src_img_folder, base + '.txt'), os.path.join(dst_img_folder, new_filename + '.txt')) + if os.path.exists(os.path.join(src_img_folder, base + caption_extension)): + shutil.copy(os.path.join(src_img_folder, base + caption_extension), os.path.join(dst_img_folder, new_filename + caption_extension)) # Save resized image in dst_img_folder cv2.imwrite(os.path.join(dst_img_folder, new_filename), img, [cv2.IMWRITE_JPEG_QUALITY, 100]) @@ -73,8 +70,9 @@ def main(): parser.add_argument('dst_img_folder', type=str, help='Destination folder to save the resized images') parser.add_argument('--max_resolution', type=str, help='Maximum resolution(s) in the format "512x512,448x448,384x384, etc, etc"', default="512x512,448x448,384x384") parser.add_argument('--divisible_by', type=int, help='Ensure new dimensions are divisible by this value', default=1) + parser.add_argument('--caption_extension', type=str, help='Extension of caption files to copy with resized images"', default=".txt") args = parser.parse_args() - resize_images(args.src_img_folder, args.dst_img_folder, args.max_resolution) + resize_images(args.src_img_folder, args.dst_img_folder, args.max_resolution, args.divisible_by, args.caption_extension) if __name__ == '__main__': main() \ No newline at end of file