2023-01-06 00:16:13 +00:00
# このスクリプトのライセンスは、train_dreambooth.pyと同じくApache License 2.0とします
# (c) 2022 Kohya S. @kohya_ss
# 横長の画像から顔検出して正立するように回転し、そこを中心に正方形に切り出す
# v2: extract max face if multiple faces are found
# v3: add crop_ratio option
# v4: add multiple faces extraction and min/max size
import argparse
import math
import cv2
import glob
import os
from anime_face_detector import create_detector
from tqdm import tqdm
import numpy as np
KP_REYE = 11
KP_LEYE = 19
SCORE_THRES = 0.90
def detect_faces ( detector , image , min_size ) :
preds = detector ( image ) # bgr
# print(len(preds))
faces = [ ]
for pred in preds :
bb = pred [ ' bbox ' ]
score = bb [ - 1 ]
if score < SCORE_THRES :
continue
left , top , right , bottom = bb [ : 4 ]
cx = int ( ( left + right ) / 2 )
cy = int ( ( top + bottom ) / 2 )
fw = int ( right - left )
fh = int ( bottom - top )
lex , ley = pred [ ' keypoints ' ] [ KP_LEYE , 0 : 2 ]
rex , rey = pred [ ' keypoints ' ] [ KP_REYE , 0 : 2 ]
angle = math . atan2 ( ley - rey , lex - rex )
angle = angle / math . pi * 180
faces . append ( ( cx , cy , fw , fh , angle ) )
faces . sort ( key = lambda x : max ( x [ 2 ] , x [ 3 ] ) , reverse = True ) # 大きい順
return faces
def rotate_image ( image , angle , cx , cy ) :
h , w = image . shape [ 0 : 2 ]
rot_mat = cv2 . getRotationMatrix2D ( ( cx , cy ) , angle , 1.0 )
# # 回転する分、すこし画像サイズを大きくする→とりあえず無効化
# nh = max(h, int(w * math.sin(angle)))
# nw = max(w, int(h * math.sin(angle)))
# if nh > h or nw > w:
# pad_y = nh - h
# pad_t = pad_y // 2
# pad_x = nw - w
# pad_l = pad_x // 2
# m = np.array([[0, 0, pad_l],
# [0, 0, pad_t]])
# rot_mat = rot_mat + m
# h, w = nh, nw
# cx += pad_l
# cy += pad_t
result = cv2 . warpAffine ( image , rot_mat , ( w , h ) , flags = cv2 . INTER_LINEAR , borderMode = cv2 . BORDER_REFLECT )
return result , cx , cy
def process ( args ) :
assert ( not args . resize_fit ) or args . resize_face_size is None , f " resize_fit and resize_face_size can ' t be specified both / resize_fitとresize_face_sizeはどちらか片方しか指定できません "
assert args . crop_ratio is None or args . resize_face_size is None , f " crop_ratio指定時はresize_face_sizeは指定できません "
# アニメ顔検出モデルを読み込む
print ( " loading face detector. " )
detector = create_detector ( ' yolov3 ' )
# cropの引数を解析する
if args . crop_size is None :
crop_width = crop_height = None
else :
tokens = args . crop_size . split ( ' , ' )
assert len ( tokens ) == 2 , f " crop_size must be ' width,height ' / crop_sizeは ' 幅,高さ ' で指定してください "
crop_width , crop_height = [ int ( t ) for t in tokens ]
if args . crop_ratio is None :
crop_h_ratio = crop_v_ratio = None
else :
tokens = args . crop_ratio . split ( ' , ' )
assert len ( tokens ) == 2 , f " crop_ratio must be ' horizontal,vertical ' / crop_ratioは ' 幅,高さ ' の倍率で指定してください "
crop_h_ratio , crop_v_ratio = [ float ( t ) for t in tokens ]
# 画像を処理する
print ( " processing. " )
output_extension = " .png "
os . makedirs ( args . dst_dir , exist_ok = True )
paths = glob . glob ( os . path . join ( args . src_dir , " *.png " ) ) + glob . glob ( os . path . join ( args . src_dir , " *.jpg " ) ) + \
glob . glob ( os . path . join ( args . src_dir , " *.webp " ) )
for path in tqdm ( paths ) :
basename = os . path . splitext ( os . path . basename ( path ) ) [ 0 ]
# image = cv2.imread(path) # 日本語ファイル名でエラーになる
image = cv2 . imdecode ( np . fromfile ( path , np . uint8 ) , cv2 . IMREAD_UNCHANGED )
if len ( image . shape ) == 2 :
image = cv2 . cvtColor ( image , cv2 . COLOR_GRAY2BGR )
if image . shape [ 2 ] == 4 :
print ( f " image has alpha. ignore / 画像の透明度が設定されているため無視します: { path } " )
image = image [ : , : , : 3 ] . copy ( ) # copyをしないと内部的に透明度情報が付いたままになるらしい
h , w = image . shape [ : 2 ]
faces = detect_faces ( detector , image , args . multiple_faces )
for i , face in enumerate ( faces ) :
cx , cy , fw , fh , angle = face
face_size = max ( fw , fh )
if args . min_size is not None and face_size < args . min_size :
continue
if args . max_size is not None and face_size > = args . max_size :
continue
face_suffix = f " _ { i + 1 : 02d } " if args . multiple_faces else " "
# オプション指定があれば回転する
face_img = image
if args . rotate :
face_img , cx , cy = rotate_image ( face_img , angle , cx , cy )
# オプション指定があれば顔を中心に切り出す
if crop_width is not None or crop_h_ratio is not None :
cur_crop_width , cur_crop_height = crop_width , crop_height
if crop_h_ratio is not None :
cur_crop_width = int ( face_size * crop_h_ratio + .5 )
cur_crop_height = int ( face_size * crop_v_ratio + .5 )
# リサイズを必要なら行う
scale = 1.0
if args . resize_face_size is not None :
# 顔サイズを基準にリサイズする
scale = args . resize_face_size / face_size
if scale < cur_crop_width / w :
print (
f " image width too small in face size based resizing / 顔を基準にリサイズすると画像の幅がcrop sizeより小さい( 顔が相対的に大きすぎる) ので顔サイズが変わります: { path } " )
scale = cur_crop_width / w
if scale < cur_crop_height / h :
print (
f " image height too small in face size based resizing / 顔を基準にリサイズすると画像の高さがcrop sizeより小さい( 顔が相対的に大きすぎる) ので顔サイズが変わります: { path } " )
scale = cur_crop_height / h
elif crop_h_ratio is not None :
# 倍率指定の時にはリサイズしない
pass
else :
# 切り出しサイズ指定あり
if w < cur_crop_width :
print ( f " image width too small/ 画像の幅がcrop sizeより小さいので画質が劣化します: { path } " )
scale = cur_crop_width / w
if h < cur_crop_height :
print ( f " image height too small/ 画像の高さがcrop sizeより小さいので画質が劣化します: { path } " )
scale = cur_crop_height / h
if args . resize_fit :
scale = max ( cur_crop_width / w , cur_crop_height / h )
if scale != 1.0 :
w = int ( w * scale + .5 )
h = int ( h * scale + .5 )
face_img = cv2 . resize ( face_img , ( w , h ) , interpolation = cv2 . INTER_AREA if scale < 1.0 else cv2 . INTER_LANCZOS4 )
cx = int ( cx * scale + .5 )
cy = int ( cy * scale + .5 )
fw = int ( fw * scale + .5 )
fh = int ( fh * scale + .5 )
cur_crop_width = min ( cur_crop_width , face_img . shape [ 1 ] )
cur_crop_height = min ( cur_crop_height , face_img . shape [ 0 ] )
x = cx - cur_crop_width / / 2
cx = cur_crop_width / / 2
if x < 0 :
cx = cx + x
x = 0
elif x + cur_crop_width > w :
cx = cx + ( x + cur_crop_width - w )
x = w - cur_crop_width
face_img = face_img [ : , x : x + cur_crop_width ]
y = cy - cur_crop_height / / 2
cy = cur_crop_height / / 2
if y < 0 :
cy = cy + y
y = 0
elif y + cur_crop_height > h :
cy = cy + ( y + cur_crop_height - h )
y = h - cur_crop_height
face_img = face_img [ y : y + cur_crop_height ]
# # debug
# print(path, cx, cy, angle)
# crp = cv2.resize(image, (image.shape[1]//8, image.shape[0]//8))
# cv2.imshow("image", crp)
# if cv2.waitKey() == 27:
# break
# cv2.destroyAllWindows()
# debug
if args . debug :
cv2 . rectangle ( face_img , ( cx - fw / / 2 , cy - fh / / 2 ) , ( cx + fw / / 2 , cy + fh / / 2 ) , ( 255 , 0 , 255 ) , fw / / 20 )
_ , buf = cv2 . imencode ( output_extension , face_img )
with open ( os . path . join ( args . dst_dir , f " { basename } { face_suffix } _ { cx : 04d } _ { cy : 04d } _ { fw : 04d } _ { fh : 04d } { output_extension } " ) , " wb " ) as f :
buf . tofile ( f )
if __name__ == ' __main__ ' :
parser = argparse . ArgumentParser ( )
parser . add_argument ( " --src_dir " , type = str , help = " directory to load images / 画像を読み込むディレクトリ " )
parser . add_argument ( " --dst_dir " , type = str , help = " directory to save images / 画像を保存するディレクトリ " )
parser . add_argument ( " --rotate " , action = " store_true " , help = " rotate images to align faces / 顔が正立するように画像を回転する " )
parser . add_argument ( " --resize_fit " , action = " store_true " ,
help = " resize to fit smaller side after cropping / 切り出し後の画像の短辺がcrop_sizeにあうようにリサイズする " )
parser . add_argument ( " --resize_face_size " , type = int , default = None ,
help = " resize image before cropping by face size / 切り出し前に顔がこのサイズになるようにリサイズする " )
parser . add_argument ( " --crop_size " , type = str , default = None ,
help = " crop images with ' width,height ' pixels, face centered / 顔を中心として ' 幅,高さ ' のサイズで切り出す " )
parser . add_argument ( " --crop_ratio " , type = str , default = None ,
help = " crop images with ' horizontal,vertical ' ratio to face, face centered / 顔を中心として顔サイズの ' 幅倍率,高さ倍率 ' のサイズで切り出す " )
parser . add_argument ( " --min_size " , type = int , default = None ,
help = " minimum face size to output (included) / 処理対象とする顔の最小サイズ(この値以上) " )
parser . add_argument ( " --max_size " , type = int , default = None ,
help = " maximum face size to output (excluded) / 処理対象とする顔の最大サイズ(この値未満) " )
parser . add_argument ( " --multiple_faces " , action = " store_true " ,
help = " output each faces / 複数の顔が見つかった場合、それぞれを切り出す " )
parser . add_argument ( " --debug " , action = " store_true " , help = " render rect for face / 処理後画像の顔位置に矩形を描画します " )
args = parser . parse_args ( )
2023-01-15 16:05:22 +00:00
process ( args )