diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py index eaff12f0..372555ff 100644 --- a/modules/sd_hijack_optimizations.py +++ b/modules/sd_hijack_optimizations.py @@ -372,7 +372,7 @@ def scaled_dot_product_attention_forward(self, x, context=None, mask=None): dtype = q.dtype if shared.opts.upcast_attn: - q, k = q.float(), k.float() + q, k, v = q.float(), k.float(), v.float() # the output of sdp = (batch, num_heads, seq_len, head_dim) hidden_states = torch.nn.functional.scaled_dot_product_attention(