Add InvokeAI and lstein to credits, add back CUDA support
This commit is contained in:
parent
98fd5cde72
commit
574c8e554a
@ -123,6 +123,7 @@ The documentation was moved from this README over to the project's [wiki](https:
|
|||||||
- LDSR - https://github.com/Hafiidz/latent-diffusion
|
- LDSR - https://github.com/Hafiidz/latent-diffusion
|
||||||
- Ideas for optimizations - https://github.com/basujindal/stable-diffusion
|
- Ideas for optimizations - https://github.com/basujindal/stable-diffusion
|
||||||
- Doggettx - Cross Attention layer optimization - https://github.com/Doggettx/stable-diffusion, original idea for prompt editing.
|
- Doggettx - Cross Attention layer optimization - https://github.com/Doggettx/stable-diffusion, original idea for prompt editing.
|
||||||
|
- InvokeAI, lstein - Cross Attention layer optimization - https://github.com/invoke-ai/InvokeAI (originally http://github.com/lstein/stable-diffusion)
|
||||||
- Rinon Gal - Textual Inversion - https://github.com/rinongal/textual_inversion (we're not using his code, but we are using his ideas).
|
- Rinon Gal - Textual Inversion - https://github.com/rinongal/textual_inversion (we're not using his code, but we are using his ideas).
|
||||||
- Idea for SD upscale - https://github.com/jquesnelle/txt2imghd
|
- Idea for SD upscale - https://github.com/jquesnelle/txt2imghd
|
||||||
- Noise generation for outpainting mk2 - https://github.com/parlance-zz/g-diffuser-bot
|
- Noise generation for outpainting mk2 - https://github.com/parlance-zz/g-diffuser-bot
|
||||||
|
@ -173,7 +173,20 @@ def einsum_op_tensor_mem(q, k, v, max_tensor_mb):
|
|||||||
return einsum_op_slice_0(q, k, v, q.shape[0] // div)
|
return einsum_op_slice_0(q, k, v, q.shape[0] // div)
|
||||||
return einsum_op_slice_1(q, k, v, max(q.shape[1] // div, 1))
|
return einsum_op_slice_1(q, k, v, max(q.shape[1] // div, 1))
|
||||||
|
|
||||||
|
def einsum_op_cuda(q, k, v):
|
||||||
|
stats = torch.cuda.memory_stats(q.device)
|
||||||
|
mem_active = stats['active_bytes.all.current']
|
||||||
|
mem_reserved = stats['reserved_bytes.all.current']
|
||||||
|
mem_free_cuda, _ = torch.cuda.mem_get_info(q.device)
|
||||||
|
mem_free_torch = mem_reserved - mem_active
|
||||||
|
mem_free_total = mem_free_cuda + mem_free_torch
|
||||||
|
# Divide factor of safety as there's copying and fragmentation
|
||||||
|
return self.einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20))
|
||||||
|
|
||||||
def einsum_op(q, k, v):
|
def einsum_op(q, k, v):
|
||||||
|
if q.device.type == 'cuda':
|
||||||
|
return einsum_op_cuda(q, k, v)
|
||||||
|
|
||||||
if q.device.type == 'mps':
|
if q.device.type == 'mps':
|
||||||
if mem_total_gb >= 32:
|
if mem_total_gb >= 32:
|
||||||
return einsum_op_mps_v1(q, k, v)
|
return einsum_op_mps_v1(q, k, v)
|
||||||
|
Loading…
Reference in New Issue
Block a user