init
This commit is contained in:
parent
fdecb63685
commit
bb57f30c2d
@ -70,6 +70,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
|
|||||||
- No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
|
- No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
|
||||||
- DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args)
|
- DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args)
|
||||||
- [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add --xformers to commandline args)
|
- [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add --xformers to commandline args)
|
||||||
|
- Aesthetic, a way to generate images with a specific aesthetic by using clip images embds (implementation of https://github.com/vicgalle/stable-diffusion-aesthetic-gradients)
|
||||||
|
|
||||||
## Installation and Running
|
## Installation and Running
|
||||||
Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs.
|
Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs.
|
||||||
|
0
aesthetic_embeddings/insert_embs_here.txt
Normal file
0
aesthetic_embeddings/insert_embs_here.txt
Normal file
@ -316,11 +316,16 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
|
|||||||
return f"{all_prompts[index]}{negative_prompt_text}\n{generation_params_text}".strip()
|
return f"{all_prompts[index]}{negative_prompt_text}\n{generation_params_text}".strip()
|
||||||
|
|
||||||
|
|
||||||
def process_images(p: StableDiffusionProcessing) -> Processed:
|
def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0,
|
||||||
|
aesthetic_imgs=None,aesthetic_slerp=False) -> Processed:
|
||||||
"""this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch"""
|
"""this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch"""
|
||||||
|
|
||||||
|
aesthetic_lr = float(aesthetic_lr)
|
||||||
|
aesthetic_weight = float(aesthetic_weight)
|
||||||
|
aesthetic_steps = int(aesthetic_steps)
|
||||||
|
|
||||||
if type(p.prompt) == list:
|
if type(p.prompt) == list:
|
||||||
assert(len(p.prompt) > 0)
|
assert (len(p.prompt) > 0)
|
||||||
else:
|
else:
|
||||||
assert p.prompt is not None
|
assert p.prompt is not None
|
||||||
|
|
||||||
@ -394,7 +399,13 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
|
|||||||
#uc = p.sd_model.get_learned_conditioning(len(prompts) * [p.negative_prompt])
|
#uc = p.sd_model.get_learned_conditioning(len(prompts) * [p.negative_prompt])
|
||||||
#c = p.sd_model.get_learned_conditioning(prompts)
|
#c = p.sd_model.get_learned_conditioning(prompts)
|
||||||
with devices.autocast():
|
with devices.autocast():
|
||||||
uc = prompt_parser.get_learned_conditioning(shared.sd_model, len(prompts) * [p.negative_prompt], p.steps)
|
if hasattr(shared.sd_model.cond_stage_model, "set_aesthetic_params"):
|
||||||
|
shared.sd_model.cond_stage_model.set_aesthetic_params(0, 0, 0)
|
||||||
|
uc = prompt_parser.get_learned_conditioning(shared.sd_model, len(prompts) * [p.negative_prompt],
|
||||||
|
p.steps)
|
||||||
|
if hasattr(shared.sd_model.cond_stage_model, "set_aesthetic_params"):
|
||||||
|
shared.sd_model.cond_stage_model.set_aesthetic_params(aesthetic_lr, aesthetic_weight,
|
||||||
|
aesthetic_steps, aesthetic_imgs,aesthetic_slerp)
|
||||||
c = prompt_parser.get_multicond_learned_conditioning(shared.sd_model, prompts, p.steps)
|
c = prompt_parser.get_multicond_learned_conditioning(shared.sd_model, prompts, p.steps)
|
||||||
|
|
||||||
if len(model_hijack.comments) > 0:
|
if len(model_hijack.comments) > 0:
|
||||||
|
@ -9,11 +9,14 @@ from torch.nn.functional import silu
|
|||||||
|
|
||||||
import modules.textual_inversion.textual_inversion
|
import modules.textual_inversion.textual_inversion
|
||||||
from modules import prompt_parser, devices, sd_hijack_optimizations, shared
|
from modules import prompt_parser, devices, sd_hijack_optimizations, shared
|
||||||
from modules.shared import opts, device, cmd_opts
|
from modules.shared import opts, device, cmd_opts, aesthetic_embeddings
|
||||||
from modules.sd_hijack_optimizations import invokeAI_mps_available
|
from modules.sd_hijack_optimizations import invokeAI_mps_available
|
||||||
|
|
||||||
import ldm.modules.attention
|
import ldm.modules.attention
|
||||||
import ldm.modules.diffusionmodules.model
|
import ldm.modules.diffusionmodules.model
|
||||||
|
from transformers import CLIPVisionModel, CLIPModel
|
||||||
|
import torch.optim as optim
|
||||||
|
import copy
|
||||||
|
|
||||||
attention_CrossAttention_forward = ldm.modules.attention.CrossAttention.forward
|
attention_CrossAttention_forward = ldm.modules.attention.CrossAttention.forward
|
||||||
diffusionmodules_model_nonlinearity = ldm.modules.diffusionmodules.model.nonlinearity
|
diffusionmodules_model_nonlinearity = ldm.modules.diffusionmodules.model.nonlinearity
|
||||||
@ -109,13 +112,29 @@ class StableDiffusionModelHijack:
|
|||||||
_, remade_batch_tokens, _, _, _, token_count = self.clip.process_text([text])
|
_, remade_batch_tokens, _, _, _, token_count = self.clip.process_text([text])
|
||||||
return remade_batch_tokens[0], token_count, get_target_prompt_token_count(token_count)
|
return remade_batch_tokens[0], token_count, get_target_prompt_token_count(token_count)
|
||||||
|
|
||||||
|
def slerp(low, high, val):
|
||||||
|
low_norm = low/torch.norm(low, dim=1, keepdim=True)
|
||||||
|
high_norm = high/torch.norm(high, dim=1, keepdim=True)
|
||||||
|
omega = torch.acos((low_norm*high_norm).sum(1))
|
||||||
|
so = torch.sin(omega)
|
||||||
|
res = (torch.sin((1.0-val)*omega)/so).unsqueeze(1)*low + (torch.sin(val*omega)/so).unsqueeze(1) * high
|
||||||
|
return res
|
||||||
|
|
||||||
class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
|
class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
|
||||||
def __init__(self, wrapped, hijack):
|
def __init__(self, wrapped, hijack):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.wrapped = wrapped
|
self.wrapped = wrapped
|
||||||
|
self.clipModel = CLIPModel.from_pretrained(
|
||||||
|
self.wrapped.transformer.name_or_path
|
||||||
|
)
|
||||||
|
del self.clipModel.vision_model
|
||||||
self.hijack: StableDiffusionModelHijack = hijack
|
self.hijack: StableDiffusionModelHijack = hijack
|
||||||
self.tokenizer = wrapped.tokenizer
|
self.tokenizer = wrapped.tokenizer
|
||||||
|
# self.vision = CLIPVisionModel.from_pretrained(self.wrapped.transformer.name_or_path).eval()
|
||||||
|
self.image_embs_name = None
|
||||||
|
self.image_embs = None
|
||||||
|
self.load_image_embs(None)
|
||||||
|
|
||||||
self.token_mults = {}
|
self.token_mults = {}
|
||||||
|
|
||||||
self.comma_token = [v for k, v in self.tokenizer.get_vocab().items() if k == ',</w>'][0]
|
self.comma_token = [v for k, v in self.tokenizer.get_vocab().items() if k == ',</w>'][0]
|
||||||
@ -136,6 +155,23 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
|
|||||||
if mult != 1.0:
|
if mult != 1.0:
|
||||||
self.token_mults[ident] = mult
|
self.token_mults[ident] = mult
|
||||||
|
|
||||||
|
def set_aesthetic_params(self, aesthetic_lr, aesthetic_weight, aesthetic_steps, image_embs_name=None,
|
||||||
|
aesthetic_slerp=True):
|
||||||
|
self.slerp = aesthetic_slerp
|
||||||
|
self.aesthetic_lr = aesthetic_lr
|
||||||
|
self.aesthetic_weight = aesthetic_weight
|
||||||
|
self.aesthetic_steps = aesthetic_steps
|
||||||
|
self.load_image_embs(image_embs_name)
|
||||||
|
|
||||||
|
def load_image_embs(self, image_embs_name):
|
||||||
|
if image_embs_name is None or len(image_embs_name) == 0:
|
||||||
|
image_embs_name = None
|
||||||
|
if image_embs_name is not None and self.image_embs_name != image_embs_name:
|
||||||
|
self.image_embs_name = image_embs_name
|
||||||
|
self.image_embs = torch.load(aesthetic_embeddings[self.image_embs_name], map_location=device)
|
||||||
|
self.image_embs /= self.image_embs.norm(dim=-1, keepdim=True)
|
||||||
|
self.image_embs.requires_grad_(False)
|
||||||
|
|
||||||
def tokenize_line(self, line, used_custom_terms, hijack_comments):
|
def tokenize_line(self, line, used_custom_terms, hijack_comments):
|
||||||
id_end = self.wrapped.tokenizer.eos_token_id
|
id_end = self.wrapped.tokenizer.eos_token_id
|
||||||
|
|
||||||
@ -333,7 +369,47 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
|
|||||||
|
|
||||||
z1 = self.process_tokens(tokens, multipliers)
|
z1 = self.process_tokens(tokens, multipliers)
|
||||||
z = z1 if z is None else torch.cat((z, z1), axis=-2)
|
z = z1 if z is None else torch.cat((z, z1), axis=-2)
|
||||||
|
|
||||||
|
if len(text[
|
||||||
|
0]) != 0 and self.aesthetic_steps != 0 and self.aesthetic_lr != 0 and self.aesthetic_weight != 0 and self.image_embs_name != None:
|
||||||
|
if not opts.use_old_emphasis_implementation:
|
||||||
|
remade_batch_tokens = [
|
||||||
|
[self.wrapped.tokenizer.bos_token_id] + x[:75] + [self.wrapped.tokenizer.eos_token_id] for x in
|
||||||
|
remade_batch_tokens]
|
||||||
|
|
||||||
|
tokens = torch.asarray(remade_batch_tokens).to(device)
|
||||||
|
with torch.enable_grad():
|
||||||
|
model = copy.deepcopy(self.clipModel).to(device)
|
||||||
|
model.requires_grad_(True)
|
||||||
|
|
||||||
|
# We optimize the model to maximize the similarity
|
||||||
|
optimizer = optim.Adam(
|
||||||
|
model.text_model.parameters(), lr=self.aesthetic_lr
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(self.aesthetic_steps):
|
||||||
|
text_embs = model.get_text_features(input_ids=tokens)
|
||||||
|
text_embs = text_embs / text_embs.norm(dim=-1, keepdim=True)
|
||||||
|
sim = text_embs @ self.image_embs.T
|
||||||
|
loss = -sim
|
||||||
|
optimizer.zero_grad()
|
||||||
|
loss.mean().backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
zn = model.text_model(input_ids=tokens, output_hidden_states=-opts.CLIP_stop_at_last_layers)
|
||||||
|
if opts.CLIP_stop_at_last_layers > 1:
|
||||||
|
zn = zn.hidden_states[-opts.CLIP_stop_at_last_layers]
|
||||||
|
zn = model.text_model.final_layer_norm(zn)
|
||||||
|
else:
|
||||||
|
zn = zn.last_hidden_state
|
||||||
|
model.cpu()
|
||||||
|
del model
|
||||||
|
|
||||||
|
if self.slerp:
|
||||||
|
z = slerp(z, zn, self.aesthetic_weight)
|
||||||
|
else:
|
||||||
|
z = z * (1 - self.aesthetic_weight) + zn * self.aesthetic_weight
|
||||||
|
|
||||||
remade_batch_tokens = rem_tokens
|
remade_batch_tokens = rem_tokens
|
||||||
batch_multipliers = rem_multipliers
|
batch_multipliers = rem_multipliers
|
||||||
i += 1
|
i += 1
|
||||||
|
@ -30,6 +30,8 @@ parser.add_argument("--no-half-vae", action='store_true', help="do not switch th
|
|||||||
parser.add_argument("--no-progressbar-hiding", action='store_true', help="do not hide progressbar in gradio UI (we hide it because it slows down ML if you have hardware acceleration in browser)")
|
parser.add_argument("--no-progressbar-hiding", action='store_true', help="do not hide progressbar in gradio UI (we hide it because it slows down ML if you have hardware acceleration in browser)")
|
||||||
parser.add_argument("--max-batch-count", type=int, default=16, help="maximum batch count value for the UI")
|
parser.add_argument("--max-batch-count", type=int, default=16, help="maximum batch count value for the UI")
|
||||||
parser.add_argument("--embeddings-dir", type=str, default=os.path.join(script_path, 'embeddings'), help="embeddings directory for textual inversion (default: embeddings)")
|
parser.add_argument("--embeddings-dir", type=str, default=os.path.join(script_path, 'embeddings'), help="embeddings directory for textual inversion (default: embeddings)")
|
||||||
|
parser.add_argument("--aesthetic_embeddings-dir", type=str, default=os.path.join(script_path, 'aesthetic_embeddings'),
|
||||||
|
help="aesthetic_embeddings directory(default: aesthetic_embeddings)")
|
||||||
parser.add_argument("--hypernetwork-dir", type=str, default=os.path.join(models_path, 'hypernetworks'), help="hypernetwork directory")
|
parser.add_argument("--hypernetwork-dir", type=str, default=os.path.join(models_path, 'hypernetworks'), help="hypernetwork directory")
|
||||||
parser.add_argument("--allow-code", action='store_true', help="allow custom script execution from webui")
|
parser.add_argument("--allow-code", action='store_true', help="allow custom script execution from webui")
|
||||||
parser.add_argument("--medvram", action='store_true', help="enable stable diffusion model optimizations for sacrificing a little speed for low VRM usage")
|
parser.add_argument("--medvram", action='store_true', help="enable stable diffusion model optimizations for sacrificing a little speed for low VRM usage")
|
||||||
@ -90,6 +92,9 @@ os.makedirs(cmd_opts.hypernetwork_dir, exist_ok=True)
|
|||||||
hypernetworks = hypernetwork.list_hypernetworks(cmd_opts.hypernetwork_dir)
|
hypernetworks = hypernetwork.list_hypernetworks(cmd_opts.hypernetwork_dir)
|
||||||
loaded_hypernetwork = None
|
loaded_hypernetwork = None
|
||||||
|
|
||||||
|
aesthetic_embeddings = {f.replace(".pt",""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in
|
||||||
|
os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")}
|
||||||
|
|
||||||
|
|
||||||
def reload_hypernetworks():
|
def reload_hypernetworks():
|
||||||
global hypernetworks
|
global hypernetworks
|
||||||
|
@ -48,7 +48,7 @@ class PersonalizedBase(Dataset):
|
|||||||
print("Preparing dataset...")
|
print("Preparing dataset...")
|
||||||
for path in tqdm.tqdm(self.image_paths):
|
for path in tqdm.tqdm(self.image_paths):
|
||||||
try:
|
try:
|
||||||
image = Image.open(path).convert('RGB').resize((self.width, self.height), PIL.Image.BICUBIC)
|
image = Image.open(path).convert('RGB').resize((self.width, self.height), PIL.Image.Resampling.BICUBIC)
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -172,7 +172,15 @@ def create_embedding(name, num_vectors_per_token, init_text='*'):
|
|||||||
return fn
|
return fn
|
||||||
|
|
||||||
|
|
||||||
def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_image_prompt):
|
def batched(dataset, total, n=1):
|
||||||
|
for ndx in range(0, total, n):
|
||||||
|
yield [dataset.__getitem__(i) for i in range(ndx, min(ndx + n, total))]
|
||||||
|
|
||||||
|
|
||||||
|
def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps,
|
||||||
|
create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding,
|
||||||
|
preview_image_prompt, batch_size=1,
|
||||||
|
gradient_accumulation=1):
|
||||||
assert embedding_name, 'embedding not selected'
|
assert embedding_name, 'embedding not selected'
|
||||||
|
|
||||||
shared.state.textinfo = "Initializing textual inversion training..."
|
shared.state.textinfo = "Initializing textual inversion training..."
|
||||||
@ -204,7 +212,11 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini
|
|||||||
|
|
||||||
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
|
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
|
||||||
with torch.autocast("cuda"):
|
with torch.autocast("cuda"):
|
||||||
ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file)
|
ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width,
|
||||||
|
height=training_height,
|
||||||
|
repeats=shared.opts.training_image_repeats_per_epoch,
|
||||||
|
placeholder_token=embedding_name, model=shared.sd_model,
|
||||||
|
device=devices.device, template_file=template_file)
|
||||||
|
|
||||||
hijack = sd_hijack.model_hijack
|
hijack = sd_hijack.model_hijack
|
||||||
|
|
||||||
@ -223,7 +235,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini
|
|||||||
scheduler = LearnRateScheduler(learn_rate, steps, ititial_step)
|
scheduler = LearnRateScheduler(learn_rate, steps, ititial_step)
|
||||||
optimizer = torch.optim.AdamW([embedding.vec], lr=scheduler.learn_rate)
|
optimizer = torch.optim.AdamW([embedding.vec], lr=scheduler.learn_rate)
|
||||||
|
|
||||||
pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step)
|
pbar = tqdm.tqdm(enumerate(batched(ds, steps - ititial_step, batch_size)), total=steps - ititial_step)
|
||||||
for i, entry in pbar:
|
for i, entry in pbar:
|
||||||
embedding.step = i + ititial_step
|
embedding.step = i + ititial_step
|
||||||
|
|
||||||
@ -235,17 +247,20 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini
|
|||||||
break
|
break
|
||||||
|
|
||||||
with torch.autocast("cuda"):
|
with torch.autocast("cuda"):
|
||||||
c = cond_model([entry.cond_text])
|
c = cond_model([e.cond_text for e in entry])
|
||||||
|
|
||||||
|
x = torch.stack([e.latent for e in entry]).to(devices.device)
|
||||||
|
loss = shared.sd_model(x, c)[0]
|
||||||
|
|
||||||
x = entry.latent.to(devices.device)
|
|
||||||
loss = shared.sd_model(x.unsqueeze(0), c)[0]
|
|
||||||
del x
|
del x
|
||||||
|
|
||||||
losses[embedding.step % losses.shape[0]] = loss.item()
|
losses[embedding.step % losses.shape[0]] = loss.item()
|
||||||
|
|
||||||
optimizer.zero_grad()
|
|
||||||
loss.backward()
|
loss.backward()
|
||||||
optimizer.step()
|
if ((i + 1) % gradient_accumulation == 0) or (i + 1 == steps - ititial_step):
|
||||||
|
optimizer.step()
|
||||||
|
optimizer.zero_grad()
|
||||||
|
|
||||||
|
|
||||||
epoch_num = embedding.step // len(ds)
|
epoch_num = embedding.step // len(ds)
|
||||||
epoch_step = embedding.step - (epoch_num * len(ds)) + 1
|
epoch_step = embedding.step - (epoch_num * len(ds)) + 1
|
||||||
@ -259,7 +274,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini
|
|||||||
if embedding.step > 0 and images_dir is not None and embedding.step % create_image_every == 0:
|
if embedding.step > 0 and images_dir is not None and embedding.step % create_image_every == 0:
|
||||||
last_saved_image = os.path.join(images_dir, f'{embedding_name}-{embedding.step}.png')
|
last_saved_image = os.path.join(images_dir, f'{embedding_name}-{embedding.step}.png')
|
||||||
|
|
||||||
preview_text = entry.cond_text if preview_image_prompt == "" else preview_image_prompt
|
preview_text = entry[0].cond_text if preview_image_prompt == "" else preview_image_prompt
|
||||||
|
|
||||||
p = processing.StableDiffusionProcessingTxt2Img(
|
p = processing.StableDiffusionProcessingTxt2Img(
|
||||||
sd_model=shared.sd_model,
|
sd_model=shared.sd_model,
|
||||||
@ -305,7 +320,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini
|
|||||||
<p>
|
<p>
|
||||||
Loss: {losses.mean():.7f}<br/>
|
Loss: {losses.mean():.7f}<br/>
|
||||||
Step: {embedding.step}<br/>
|
Step: {embedding.step}<br/>
|
||||||
Last prompt: {html.escape(entry.cond_text)}<br/>
|
Last prompt: {html.escape(entry[-1].cond_text)}<br/>
|
||||||
Last saved embedding: {html.escape(last_saved_file)}<br/>
|
Last saved embedding: {html.escape(last_saved_file)}<br/>
|
||||||
Last saved image: {html.escape(last_saved_image)}<br/>
|
Last saved image: {html.escape(last_saved_image)}<br/>
|
||||||
</p>
|
</p>
|
||||||
|
@ -6,7 +6,14 @@ import modules.processing as processing
|
|||||||
from modules.ui import plaintext_to_html
|
from modules.ui import plaintext_to_html
|
||||||
|
|
||||||
|
|
||||||
def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, steps: int, sampler_index: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, enable_hr: bool, scale_latent: bool, denoising_strength: float, *args):
|
def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, steps: int, sampler_index: int,
|
||||||
|
restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, seed: int, subseed: int,
|
||||||
|
subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool,
|
||||||
|
height: int, width: int, enable_hr: bool, scale_latent: bool, denoising_strength: float,
|
||||||
|
aesthetic_lr=0,
|
||||||
|
aesthetic_weight=0, aesthetic_steps=0,
|
||||||
|
aesthetic_imgs=None,
|
||||||
|
aesthetic_slerp=False, *args):
|
||||||
p = StableDiffusionProcessingTxt2Img(
|
p = StableDiffusionProcessingTxt2Img(
|
||||||
sd_model=shared.sd_model,
|
sd_model=shared.sd_model,
|
||||||
outpath_samples=opts.outdir_samples or opts.outdir_txt2img_samples,
|
outpath_samples=opts.outdir_samples or opts.outdir_txt2img_samples,
|
||||||
@ -40,7 +47,7 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2:
|
|||||||
processed = modules.scripts.scripts_txt2img.run(p, *args)
|
processed = modules.scripts.scripts_txt2img.run(p, *args)
|
||||||
|
|
||||||
if processed is None:
|
if processed is None:
|
||||||
processed = process_images(p)
|
processed = process_images(p, aesthetic_lr, aesthetic_weight, aesthetic_steps, aesthetic_imgs, aesthetic_slerp)
|
||||||
|
|
||||||
shared.total_tqdm.clear()
|
shared.total_tqdm.clear()
|
||||||
|
|
||||||
|
@ -24,7 +24,8 @@ import gradio.routes
|
|||||||
|
|
||||||
from modules import sd_hijack
|
from modules import sd_hijack
|
||||||
from modules.paths import script_path
|
from modules.paths import script_path
|
||||||
from modules.shared import opts, cmd_opts
|
from modules.shared import opts, cmd_opts,aesthetic_embeddings
|
||||||
|
|
||||||
if cmd_opts.deepdanbooru:
|
if cmd_opts.deepdanbooru:
|
||||||
from modules.deepbooru import get_deepbooru_tags
|
from modules.deepbooru import get_deepbooru_tags
|
||||||
import modules.shared as shared
|
import modules.shared as shared
|
||||||
@ -534,6 +535,14 @@ def create_ui(wrap_gradio_gpu_call):
|
|||||||
width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
|
width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
|
||||||
height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
|
height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
|
||||||
|
|
||||||
|
with gr.Group():
|
||||||
|
aesthetic_lr = gr.Textbox(label='Learning rate', placeholder="Learning rate", value="0.005")
|
||||||
|
aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.7)
|
||||||
|
aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=50)
|
||||||
|
|
||||||
|
aesthetic_imgs = gr.Dropdown(sorted(aesthetic_embeddings.keys()), label="Imgs embedding", value=sorted(aesthetic_embeddings.keys())[0] if len(aesthetic_embeddings) > 0 else None)
|
||||||
|
aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1)
|
restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1)
|
||||||
tiling = gr.Checkbox(label='Tiling', value=False)
|
tiling = gr.Checkbox(label='Tiling', value=False)
|
||||||
@ -586,25 +595,30 @@ def create_ui(wrap_gradio_gpu_call):
|
|||||||
fn=wrap_gradio_gpu_call(modules.txt2img.txt2img),
|
fn=wrap_gradio_gpu_call(modules.txt2img.txt2img),
|
||||||
_js="submit",
|
_js="submit",
|
||||||
inputs=[
|
inputs=[
|
||||||
txt2img_prompt,
|
txt2img_prompt,
|
||||||
txt2img_negative_prompt,
|
txt2img_negative_prompt,
|
||||||
txt2img_prompt_style,
|
txt2img_prompt_style,
|
||||||
txt2img_prompt_style2,
|
txt2img_prompt_style2,
|
||||||
steps,
|
steps,
|
||||||
sampler_index,
|
sampler_index,
|
||||||
restore_faces,
|
restore_faces,
|
||||||
tiling,
|
tiling,
|
||||||
batch_count,
|
batch_count,
|
||||||
batch_size,
|
batch_size,
|
||||||
cfg_scale,
|
cfg_scale,
|
||||||
seed,
|
seed,
|
||||||
subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox,
|
subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox,
|
||||||
height,
|
height,
|
||||||
width,
|
width,
|
||||||
enable_hr,
|
enable_hr,
|
||||||
scale_latent,
|
scale_latent,
|
||||||
denoising_strength,
|
denoising_strength,
|
||||||
] + custom_inputs,
|
aesthetic_lr,
|
||||||
|
aesthetic_weight,
|
||||||
|
aesthetic_steps,
|
||||||
|
aesthetic_imgs,
|
||||||
|
aesthetic_slerp
|
||||||
|
] + custom_inputs,
|
||||||
outputs=[
|
outputs=[
|
||||||
txt2img_gallery,
|
txt2img_gallery,
|
||||||
generation_info,
|
generation_info,
|
||||||
@ -1097,6 +1111,9 @@ def create_ui(wrap_gradio_gpu_call):
|
|||||||
template_file = gr.Textbox(label='Prompt template file', value=os.path.join(script_path, "textual_inversion_templates", "style_filewords.txt"))
|
template_file = gr.Textbox(label='Prompt template file', value=os.path.join(script_path, "textual_inversion_templates", "style_filewords.txt"))
|
||||||
training_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
|
training_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
|
||||||
training_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
|
training_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
|
||||||
|
batch_size = gr.Slider(minimum=1, maximum=64, step=1, label="Batch Size", value=4)
|
||||||
|
gradient_accumulation = gr.Slider(minimum=1, maximum=256, step=1, label="Gradient accumulation",
|
||||||
|
value=1)
|
||||||
steps = gr.Number(label='Max steps', value=100000, precision=0)
|
steps = gr.Number(label='Max steps', value=100000, precision=0)
|
||||||
create_image_every = gr.Number(label='Save an image to log directory every N steps, 0 to disable', value=500, precision=0)
|
create_image_every = gr.Number(label='Save an image to log directory every N steps, 0 to disable', value=500, precision=0)
|
||||||
save_embedding_every = gr.Number(label='Save a copy of embedding to log directory every N steps, 0 to disable', value=500, precision=0)
|
save_embedding_every = gr.Number(label='Save a copy of embedding to log directory every N steps, 0 to disable', value=500, precision=0)
|
||||||
@ -1180,6 +1197,8 @@ def create_ui(wrap_gradio_gpu_call):
|
|||||||
template_file,
|
template_file,
|
||||||
save_image_with_stored_embedding,
|
save_image_with_stored_embedding,
|
||||||
preview_image_prompt,
|
preview_image_prompt,
|
||||||
|
batch_size,
|
||||||
|
gradient_accumulation
|
||||||
],
|
],
|
||||||
outputs=[
|
outputs=[
|
||||||
ti_output,
|
ti_output,
|
||||||
|
Loading…
Reference in New Issue
Block a user