KohyaSS/tools/resize_images_to_resolution.py

import glob
import os
import cv2
import argparse
import shutil
import math
from PIL import Image
import numpy as np


def resize_images(src_img_folder, dst_img_folder, max_resolution="512x512", divisible_by=2, interpolation=None, save_as_png=False, copy_associated_files=False):
  # Split the max_resolution string by "," and strip any whitespaces
  max_resolutions = [res.strip() for res in max_resolution.split(',')]

  # # Calculate max_pixels from max_resolution string
  # max_pixels = int(max_resolution.split("x")[0]) * int(max_resolution.split("x")[1])

  # Create destination folder if it does not exist
  if not os.path.exists(dst_img_folder):
    os.makedirs(dst_img_folder)

  # Select interpolation method
  if interpolation == 'lanczos4':
    cv2_interpolation = cv2.INTER_LANCZOS4
  elif interpolation == 'cubic':
    cv2_interpolation = cv2.INTER_CUBIC
  else:
    cv2_interpolation = cv2.INTER_AREA

  # Iterate through all files in src_img_folder
  img_exts = (".png", ".jpg", ".jpeg", ".webp", ".bmp")                   # copy from train_util.py
  for filename in os.listdir(src_img_folder):
    # Check if the image is png, jpg or webp etc...
    if not filename.endswith(img_exts):
      # Copy the file to the destination folder if not png, jpg or webp etc (.txt or .caption or etc.)
      shutil.copy(os.path.join(src_img_folder, filename), os.path.join(dst_img_folder, filename))
      continue

    # Load image
    # img = cv2.imread(os.path.join(src_img_folder, filename))
    image = Image.open(os.path.join(src_img_folder, filename))
    if not image.mode == "RGB":
      image = image.convert("RGB")
    img = np.array(image, np.uint8)

    base, _ = os.path.splitext(filename)
    for max_resolution in max_resolutions:
      # Calculate max_pixels from max_resolution string
      max_pixels = int(max_resolution.split("x")[0]) * int(max_resolution.split("x")[1])

      # Calculate current number of pixels
      current_pixels = img.shape[0] * img.shape[1]

      # Check if the image needs resizing
      if current_pixels > max_pixels:
        # Calculate scaling factor
        scale_factor = max_pixels / current_pixels

        # Calculate new dimensions
        new_height = int(img.shape[0] * math.sqrt(scale_factor))
        new_width = int(img.shape[1] * math.sqrt(scale_factor))

        # Resize image
        img = cv2.resize(img, (new_width, new_height), interpolation=cv2_interpolation)
      else:
        new_height, new_width = img.shape[0:2]

      # Calculate the new height and width that are divisible by divisible_by (with/without resizing)
      new_height = new_height if new_height % divisible_by == 0 else new_height - new_height % divisible_by
      new_width = new_width if new_width % divisible_by == 0 else new_width - new_width % divisible_by

      # Center crop the image to the calculated dimensions
      y = int((img.shape[0] - new_height) / 2)
      x = int((img.shape[1] - new_width) / 2)
      img = img[y:y + new_height, x:x + new_width]

      # Split filename into base and extension
      new_filename = base + '+' + max_resolution + ('.png' if save_as_png else '.jpg')

      # Save resized image in dst_img_folder
      # cv2.imwrite(os.path.join(dst_img_folder, new_filename), img, [cv2.IMWRITE_JPEG_QUALITY, 100])
      image = Image.fromarray(img)
      image.save(os.path.join(dst_img_folder, new_filename), quality=100)

      proc = "Resized" if current_pixels > max_pixels else "Saved"
      print(f"{proc} image: {filename} with size {img.shape[0]}x{img.shape[1]} as {new_filename}")

    # If other files with same basename, copy them with resolution suffix
    if copy_associated_files:
      asoc_files = glob.glob(os.path.join(src_img_folder, base + ".*"))
      for asoc_file in asoc_files:
        ext = os.path.splitext(asoc_file)[1]
        if ext in img_exts:
          continue
        for max_resolution in max_resolutions:
          new_asoc_file = base + '+' + max_resolution + ext
          print(f"Copy {asoc_file} as {new_asoc_file}")
          shutil.copy(os.path.join(src_img_folder, asoc_file), os.path.join(dst_img_folder, new_asoc_file))


def setup_parser() -> argparse.ArgumentParser:
  parser = argparse.ArgumentParser(
      description='Resize images in a folder to a specified max resolution(s) / 指定されたフォルダ内の画像を指定した最大画像サイズ（面積）以下にアスペクト比を維持したままリサイズします')
  parser.add_argument('src_img_folder', type=str, help='Source folder containing the images / 元画像のフォルダ')
  parser.add_argument('dst_img_folder', type=str, help='Destination folder to save the resized images / リサイズ後の画像を保存するフォルダ')
  parser.add_argument('--max_resolution', type=str,
                      help='Maximum resolution(s) in the format "512x512,384x384, etc, etc" / 最大画像サイズをカンマ区切りで指定 ("512x512,384x384, etc, etc" など)', default="512x512,384x384,256x256,128x128")
  parser.add_argument('--divisible_by', type=int,
                      help='Ensure new dimensions are divisible by this value / リサイズ後の画像のサイズをこの値で割り切れるようにします', default=1)
  parser.add_argument('--interpolation', type=str, choices=['area', 'cubic', 'lanczos4'],
                      default='area', help='Interpolation method for resizing / リサイズ時の補完方法')
  parser.add_argument('--save_as_png', action='store_true', help='Save as png format / png形式で保存')
  parser.add_argument('--copy_associated_files', action='store_true',
                      help='Copy files with same base name to images (captions etc) / 画像と同じファイル名（拡張子を除く）のファイルもコピーする')

  return parser


def main():
  parser = setup_parser()

  args = parser.parse_args()
  resize_images(args.src_img_folder, args.dst_img_folder, args.max_resolution,
                args.divisible_by, args.interpolation, args.save_as_png, args.copy_associated_files)


if __name__ == '__main__':
  main()
2023/02/09 (v20.7.1) - Caption dropout is supported in ``train_db.py``, ``fine_tune.py`` and ``train_network.py``. Thanks to forestsource! - ``--caption_dropout_rate`` option specifies the dropout rate for captions (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the image is trained with the empty caption. Default is 0 (no dropout). - ``--caption_dropout_every_n_epochs`` option specifies how many epochs to drop captions. If ``3`` is specified, in epoch 3, 6, 9 ..., images are trained with all captions empty. Default is None (no dropout). - ``--caption_tag_dropout_rate`` option specified the dropout rate for tags (comma separated tokens) (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the tag is removed from the caption. If ``--keep_tokens`` option is set, these tokens (tags) are not dropped. Default is 0 (no droupout). - The bulk image downsampling script is added. Documentation is [here](https://github.com/kohya-ss/sd-scripts/blob/main/train_network_README-ja.md#%E7%94%BB%E5%83%8F%E3%83%AA%E3%82%B5%E3%82%A4%E3%82%BA%E3%82%B9%E3%82%AF%E3%83%AA%E3%83%97%E3%83%88) (in Jpanaese). Thanks to bmaltais! - Typo check is added. Thanks to shirayu! - Add option to autolaunch the GUI in a browser and set the server_port. USe either `gui.ps1 --inbrowser --server_port 3456`or `gui.cmd -inbrowser -server_port 3456` 2023-02-10 00:17:24 +00:00			`import glob`
			`import os`
			`import cv2`
			`import argparse`
			`import shutil`
			`import math`
2023/02/11 (v20.7.2): - ``lora_interrogator.py`` is added in ``networks`` folder. See ``python networks\lora_interrogator.py -h`` for usage. - For LoRAs where the activation word is unknown, this script compares the output of Text Encoder after applying LoRA to that of unapplied to find out which token is affected by LoRA. Hopefully you can figure out the activation word. LoRA trained with captions does not seem to be able to interrogate. - Batch size can be large (like 64 or 128). - ``train_textual_inversion.py`` now supports multiple init words. - Following feature is reverted to be the same as before. Sorry for confusion: > Now the number of data in each batch is limited to the number of actual images (not duplicated). Because a certain bucket may contain smaller number of actual images, so the batch may contain same (duplicated) images. - Add new tool to sort, group and average crop image in a dataset 2023-02-11 16:59:38 +00:00			`from PIL import Image`
			`import numpy as np`
2023/02/09 (v20.7.1) - Caption dropout is supported in ``train_db.py``, ``fine_tune.py`` and ``train_network.py``. Thanks to forestsource! - ``--caption_dropout_rate`` option specifies the dropout rate for captions (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the image is trained with the empty caption. Default is 0 (no dropout). - ``--caption_dropout_every_n_epochs`` option specifies how many epochs to drop captions. If ``3`` is specified, in epoch 3, 6, 9 ..., images are trained with all captions empty. Default is None (no dropout). - ``--caption_tag_dropout_rate`` option specified the dropout rate for tags (comma separated tokens) (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the tag is removed from the caption. If ``--keep_tokens`` option is set, these tokens (tags) are not dropped. Default is 0 (no droupout). - The bulk image downsampling script is added. Documentation is [here](https://github.com/kohya-ss/sd-scripts/blob/main/train_network_README-ja.md#%E7%94%BB%E5%83%8F%E3%83%AA%E3%82%B5%E3%82%A4%E3%82%BA%E3%82%B9%E3%82%AF%E3%83%AA%E3%83%97%E3%83%88) (in Jpanaese). Thanks to bmaltais! - Typo check is added. Thanks to shirayu! - Add option to autolaunch the GUI in a browser and set the server_port. USe either `gui.ps1 --inbrowser --server_port 3456`or `gui.cmd -inbrowser -server_port 3456` 2023-02-10 00:17:24 +00:00

			`def resize_images(src_img_folder, dst_img_folder, max_resolution="512x512", divisible_by=2, interpolation=None, save_as_png=False, copy_associated_files=False):`
			`# Split the max_resolution string by "," and strip any whitespaces`
			`max_resolutions = [res.strip() for res in max_resolution.split(',')]`

			`# # Calculate max_pixels from max_resolution string`
			`# max_pixels = int(max_resolution.split("x")[0]) * int(max_resolution.split("x")[1])`

			`# Create destination folder if it does not exist`
			`if not os.path.exists(dst_img_folder):`
			`os.makedirs(dst_img_folder)`

			`# Select interpolation method`
			`if interpolation == 'lanczos4':`
			`cv2_interpolation = cv2.INTER_LANCZOS4`
			`elif interpolation == 'cubic':`
			`cv2_interpolation = cv2.INTER_CUBIC`
			`else:`
			`cv2_interpolation = cv2.INTER_AREA`

			`# Iterate through all files in src_img_folder`
			`img_exts = (".png", ".jpg", ".jpeg", ".webp", ".bmp") # copy from train_util.py`
			`for filename in os.listdir(src_img_folder):`
			`# Check if the image is png, jpg or webp etc...`
			`if not filename.endswith(img_exts):`
			`# Copy the file to the destination folder if not png, jpg or webp etc (.txt or .caption or etc.)`
			`shutil.copy(os.path.join(src_img_folder, filename), os.path.join(dst_img_folder, filename))`
			`continue`

			`# Load image`
2023/02/11 (v20.7.2): - ``lora_interrogator.py`` is added in ``networks`` folder. See ``python networks\lora_interrogator.py -h`` for usage. - For LoRAs where the activation word is unknown, this script compares the output of Text Encoder after applying LoRA to that of unapplied to find out which token is affected by LoRA. Hopefully you can figure out the activation word. LoRA trained with captions does not seem to be able to interrogate. - Batch size can be large (like 64 or 128). - ``train_textual_inversion.py`` now supports multiple init words. - Following feature is reverted to be the same as before. Sorry for confusion: > Now the number of data in each batch is limited to the number of actual images (not duplicated). Because a certain bucket may contain smaller number of actual images, so the batch may contain same (duplicated) images. - Add new tool to sort, group and average crop image in a dataset 2023-02-11 16:59:38 +00:00			`# img = cv2.imread(os.path.join(src_img_folder, filename))`
			`image = Image.open(os.path.join(src_img_folder, filename))`
			`if not image.mode == "RGB":`
			`image = image.convert("RGB")`
			`img = np.array(image, np.uint8)`
2023/02/09 (v20.7.1) - Caption dropout is supported in ``train_db.py``, ``fine_tune.py`` and ``train_network.py``. Thanks to forestsource! - ``--caption_dropout_rate`` option specifies the dropout rate for captions (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the image is trained with the empty caption. Default is 0 (no dropout). - ``--caption_dropout_every_n_epochs`` option specifies how many epochs to drop captions. If ``3`` is specified, in epoch 3, 6, 9 ..., images are trained with all captions empty. Default is None (no dropout). - ``--caption_tag_dropout_rate`` option specified the dropout rate for tags (comma separated tokens) (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the tag is removed from the caption. If ``--keep_tokens`` option is set, these tokens (tags) are not dropped. Default is 0 (no droupout). - The bulk image downsampling script is added. Documentation is [here](https://github.com/kohya-ss/sd-scripts/blob/main/train_network_README-ja.md#%E7%94%BB%E5%83%8F%E3%83%AA%E3%82%B5%E3%82%A4%E3%82%BA%E3%82%B9%E3%82%AF%E3%83%AA%E3%83%97%E3%83%88) (in Jpanaese). Thanks to bmaltais! - Typo check is added. Thanks to shirayu! - Add option to autolaunch the GUI in a browser and set the server_port. USe either `gui.ps1 --inbrowser --server_port 3456`or `gui.cmd -inbrowser -server_port 3456` 2023-02-10 00:17:24 +00:00
			`base, _ = os.path.splitext(filename)`
			`for max_resolution in max_resolutions:`
			`# Calculate max_pixels from max_resolution string`
			`max_pixels = int(max_resolution.split("x")[0]) * int(max_resolution.split("x")[1])`

			`# Calculate current number of pixels`
			`current_pixels = img.shape[0] * img.shape[1]`

			`# Check if the image needs resizing`
			`if current_pixels > max_pixels:`
			`# Calculate scaling factor`
			`scale_factor = max_pixels / current_pixels`

			`# Calculate new dimensions`
			`new_height = int(img.shape[0] * math.sqrt(scale_factor))`
			`new_width = int(img.shape[1] * math.sqrt(scale_factor))`

			`# Resize image`
			`img = cv2.resize(img, (new_width, new_height), interpolation=cv2_interpolation)`
			`else:`
			`new_height, new_width = img.shape[0:2]`

			`# Calculate the new height and width that are divisible by divisible_by (with/without resizing)`
			`new_height = new_height if new_height % divisible_by == 0 else new_height - new_height % divisible_by`
			`new_width = new_width if new_width % divisible_by == 0 else new_width - new_width % divisible_by`

			`# Center crop the image to the calculated dimensions`
			`y = int((img.shape[0] - new_height) / 2)`
			`x = int((img.shape[1] - new_width) / 2)`
			`img = img[y:y + new_height, x:x + new_width]`

			`# Split filename into base and extension`
			`new_filename = base + '+' + max_resolution + ('.png' if save_as_png else '.jpg')`

			`# Save resized image in dst_img_folder`
2023/02/11 (v20.7.2): - ``lora_interrogator.py`` is added in ``networks`` folder. See ``python networks\lora_interrogator.py -h`` for usage. - For LoRAs where the activation word is unknown, this script compares the output of Text Encoder after applying LoRA to that of unapplied to find out which token is affected by LoRA. Hopefully you can figure out the activation word. LoRA trained with captions does not seem to be able to interrogate. - Batch size can be large (like 64 or 128). - ``train_textual_inversion.py`` now supports multiple init words. - Following feature is reverted to be the same as before. Sorry for confusion: > Now the number of data in each batch is limited to the number of actual images (not duplicated). Because a certain bucket may contain smaller number of actual images, so the batch may contain same (duplicated) images. - Add new tool to sort, group and average crop image in a dataset 2023-02-11 16:59:38 +00:00			`# cv2.imwrite(os.path.join(dst_img_folder, new_filename), img, [cv2.IMWRITE_JPEG_QUALITY, 100])`
			`image = Image.fromarray(img)`
			`image.save(os.path.join(dst_img_folder, new_filename), quality=100)`

2023/02/09 (v20.7.1) - Caption dropout is supported in ``train_db.py``, ``fine_tune.py`` and ``train_network.py``. Thanks to forestsource! - ``--caption_dropout_rate`` option specifies the dropout rate for captions (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the image is trained with the empty caption. Default is 0 (no dropout). - ``--caption_dropout_every_n_epochs`` option specifies how many epochs to drop captions. If ``3`` is specified, in epoch 3, 6, 9 ..., images are trained with all captions empty. Default is None (no dropout). - ``--caption_tag_dropout_rate`` option specified the dropout rate for tags (comma separated tokens) (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the tag is removed from the caption. If ``--keep_tokens`` option is set, these tokens (tags) are not dropped. Default is 0 (no droupout). - The bulk image downsampling script is added. Documentation is [here](https://github.com/kohya-ss/sd-scripts/blob/main/train_network_README-ja.md#%E7%94%BB%E5%83%8F%E3%83%AA%E3%82%B5%E3%82%A4%E3%82%BA%E3%82%B9%E3%82%AF%E3%83%AA%E3%83%97%E3%83%88) (in Jpanaese). Thanks to bmaltais! - Typo check is added. Thanks to shirayu! - Add option to autolaunch the GUI in a browser and set the server_port. USe either `gui.ps1 --inbrowser --server_port 3456`or `gui.cmd -inbrowser -server_port 3456` 2023-02-10 00:17:24 +00:00			`proc = "Resized" if current_pixels > max_pixels else "Saved"`
			`print(f"{proc} image: {filename} with size {img.shape[0]}x{img.shape[1]} as {new_filename}")`

			`# If other files with same basename, copy them with resolution suffix`
			`if copy_associated_files:`
			`asoc_files = glob.glob(os.path.join(src_img_folder, base + ".*"))`
			`for asoc_file in asoc_files:`
			`ext = os.path.splitext(asoc_file)[1]`
			`if ext in img_exts:`
			`continue`
			`for max_resolution in max_resolutions:`
			`new_asoc_file = base + '+' + max_resolution + ext`
			`print(f"Copy {asoc_file} as {new_asoc_file}")`
			`shutil.copy(os.path.join(src_img_folder, asoc_file), os.path.join(dst_img_folder, new_asoc_file))`


Update to latest sd-scripts updates 2023-03-22 00:20:57 +00:00			`def setup_parser() -> argparse.ArgumentParser:`
2023/02/09 (v20.7.1) - Caption dropout is supported in ``train_db.py``, ``fine_tune.py`` and ``train_network.py``. Thanks to forestsource! - ``--caption_dropout_rate`` option specifies the dropout rate for captions (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the image is trained with the empty caption. Default is 0 (no dropout). - ``--caption_dropout_every_n_epochs`` option specifies how many epochs to drop captions. If ``3`` is specified, in epoch 3, 6, 9 ..., images are trained with all captions empty. Default is None (no dropout). - ``--caption_tag_dropout_rate`` option specified the dropout rate for tags (comma separated tokens) (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the tag is removed from the caption. If ``--keep_tokens`` option is set, these tokens (tags) are not dropped. Default is 0 (no droupout). - The bulk image downsampling script is added. Documentation is [here](https://github.com/kohya-ss/sd-scripts/blob/main/train_network_README-ja.md#%E7%94%BB%E5%83%8F%E3%83%AA%E3%82%B5%E3%82%A4%E3%82%BA%E3%82%B9%E3%82%AF%E3%83%AA%E3%83%97%E3%83%88) (in Jpanaese). Thanks to bmaltais! - Typo check is added. Thanks to shirayu! - Add option to autolaunch the GUI in a browser and set the server_port. USe either `gui.ps1 --inbrowser --server_port 3456`or `gui.cmd -inbrowser -server_port 3456` 2023-02-10 00:17:24 +00:00			`parser = argparse.ArgumentParser(`
			`description='Resize images in a folder to a specified max resolution(s) / 指定されたフォルダ内の画像を指定した最大画像サイズ（面積）以下にアスペクト比を維持したままリサイズします')`
			`parser.add_argument('src_img_folder', type=str, help='Source folder containing the images / 元画像のフォルダ')`
			`parser.add_argument('dst_img_folder', type=str, help='Destination folder to save the resized images / リサイズ後の画像を保存するフォルダ')`
			`parser.add_argument('--max_resolution', type=str,`
			`help='Maximum resolution(s) in the format "512x512,384x384, etc, etc" / 最大画像サイズをカンマ区切りで指定 ("512x512,384x384, etc, etc" など)', default="512x512,384x384,256x256,128x128")`
			`parser.add_argument('--divisible_by', type=int,`
			`help='Ensure new dimensions are divisible by this value / リサイズ後の画像のサイズをこの値で割り切れるようにします', default=1)`
			`parser.add_argument('--interpolation', type=str, choices=['area', 'cubic', 'lanczos4'],`
			`default='area', help='Interpolation method for resizing / リサイズ時の補完方法')`
			`parser.add_argument('--save_as_png', action='store_true', help='Save as png format / png形式で保存')`
			`parser.add_argument('--copy_associated_files', action='store_true',`
			`help='Copy files with same base name to images (captions etc) / 画像と同じファイル名（拡張子を除く）のファイルもコピーする')`

Update to latest sd-scripts updates 2023-03-22 00:20:57 +00:00			`return parser`


			`def main():`
			`parser = setup_parser()`

2023/02/09 (v20.7.1) - Caption dropout is supported in ``train_db.py``, ``fine_tune.py`` and ``train_network.py``. Thanks to forestsource! - ``--caption_dropout_rate`` option specifies the dropout rate for captions (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the image is trained with the empty caption. Default is 0 (no dropout). - ``--caption_dropout_every_n_epochs`` option specifies how many epochs to drop captions. If ``3`` is specified, in epoch 3, 6, 9 ..., images are trained with all captions empty. Default is None (no dropout). - ``--caption_tag_dropout_rate`` option specified the dropout rate for tags (comma separated tokens) (0~1.0, 0.1 means 10% chance for dropout). If dropout occurs, the tag is removed from the caption. If ``--keep_tokens`` option is set, these tokens (tags) are not dropped. Default is 0 (no droupout). - The bulk image downsampling script is added. Documentation is [here](https://github.com/kohya-ss/sd-scripts/blob/main/train_network_README-ja.md#%E7%94%BB%E5%83%8F%E3%83%AA%E3%82%B5%E3%82%A4%E3%82%BA%E3%82%B9%E3%82%AF%E3%83%AA%E3%83%97%E3%83%88) (in Jpanaese). Thanks to bmaltais! - Typo check is added. Thanks to shirayu! - Add option to autolaunch the GUI in a browser and set the server_port. USe either `gui.ps1 --inbrowser --server_port 3456`or `gui.cmd -inbrowser -server_port 3456` 2023-02-10 00:17:24 +00:00			`args = parser.parse_args()`
			`resize_images(args.src_img_folder, args.dst_img_folder, args.max_resolution,`
			`args.divisible_by, args.interpolation, args.save_as_png, args.copy_associated_files)`


			`if __name__ == '__main__':`
			`main()`