EasyAnimate

Running

EasyAnimate / easyanimate /vae /ldm /modules /vaemodules /attention_processors.py

bubbliiiing

Create Code

19fe404 over 1 year ago

5.07 kB

	from typing import TYPE_CHECKING

	import torch
	import torch.nn.functional as F

	if TYPE_CHECKING:
	from .attention import Attention

	class AttnProcessor:
	r"""
	Default processor for performing attention-related computations.
	"""

	def __call__(
	self,
	attn: "Attention",
	hidden_states: torch.FloatTensor,
	encoder_hidden_states,
	attention_mask,
	temb = None,
	) -> torch.Tensor:
	residual = hidden_states

	if attn.spatial_norm is not None:
	hidden_states = attn.spatial_norm(hidden_states, temb = None)

	# B, L, C
	assert hidden_states.ndim == 3, f"Hidden states must be 3-dimensional, got {hidden_states.ndim}"

	batch_size, sequence_length, _ = (
	hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
	)
	attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)

	if attn.group_norm is not None:
	hidden_states = attn.group_norm(hidden_states.transpose(1, 2))
	hidden_states = hidden_states.transpose(1, 2)

	query = attn.to_q(hidden_states)

	if encoder_hidden_states is None:
	encoder_hidden_states = hidden_states
	elif attn.norm_cross:
	encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)

	key = attn.to_k(encoder_hidden_states)
	value = attn.to_v(encoder_hidden_states)

	query = attn.head_to_batch_dim(query)
	key = attn.head_to_batch_dim(key)
	value = attn.head_to_batch_dim(value)

	attention_probs = attn.get_attention_scores(query, key, attention_mask)
	hidden_states = torch.bmm(attention_probs, value)
	hidden_states = attn.batch_to_head_dim(hidden_states)

	hidden_states = attn.to_out(hidden_states)
	hidden_states = attn.dropout(hidden_states)

	if attn.residual_connection:
	hidden_states = hidden_states + residual

	hidden_states = hidden_states / attn.rescale_output_factor

	return hidden_states

	class AttnProcessor2_0:
	r"""
	Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0).
	"""

	def __init__(self):
	if not hasattr(F, "scaled_dot_product_attention"):
	raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.")

	def __call__(
	self,
	attn: "Attention",
	hidden_states: torch.FloatTensor,
	encoder_hidden_states,
	attention_mask,
	temb = None,
	) -> torch.FloatTensor:
	residual = hidden_states
	if attn.spatial_norm is not None:
	hidden_states = attn.spatial_norm(hidden_states, temb = None)

	# B, L, C
	assert hidden_states.ndim == 3, f"Hidden states must be 3-dimensional, got {hidden_states.ndim}"

	batch_size, sequence_length, _ = (
	hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
	)

	if attention_mask is not None:
	attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
	# scaled_dot_product_attention expects attention_mask shape to be
	# (batch, heads, source_length, target_length)
	attention_mask = attention_mask.view(batch_size, attn.nheads, -1, attention_mask.shape[-1])

	if attn.group_norm is not None:
	hidden_states = attn.group_norm(hidden_states.transpose(1, 2))
	hidden_states = hidden_states.transpose(1, 2)

	query: torch.Tensor = attn.to_q(hidden_states)

	if encoder_hidden_states is None:
	encoder_hidden_states = hidden_states
	elif attn.norm_cross:
	encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)

	key: torch.Tensor = attn.to_k(encoder_hidden_states)
	value: torch.Tensor = attn.to_v(encoder_hidden_states)

	inner_dim = key.shape[-1]
	head_dim = inner_dim // attn.nheads

	query = query.view(batch_size, -1, attn.nheads, head_dim).transpose(1, 2)

	key = key.view(batch_size, -1, attn.nheads, head_dim).transpose(1, 2)
	value = value.view(batch_size, -1, attn.nheads, head_dim).transpose(1, 2)

	# the output of sdp = (batch, num_heads, seq_len, head_dim)
	hidden_states = F.scaled_dot_product_attention(
	query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False, scale=attn.scale
	)

	hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.nheads * head_dim)
	hidden_states = hidden_states.to(query.dtype)

	hidden_states = attn.to_out(hidden_states)
	hidden_states = attn.dropout(hidden_states)

	if attn.residual_connection:
	hidden_states = hidden_states + residual

	hidden_states = hidden_states / attn.rescale_output_factor

	return hidden_states