from einops import rearrange
import torch
import as nn
# Transformer Attention模块定义
class TAttention():
def __init__(self, dim, heads=8, dim_head=64, dropout=0.):
inner_dim = dim_head * heads
project_out = not (heads == 1 and dim_head == dim)
= heads
= dim_head ** -0.5
= (dim=-1)
self.to_qkv = (dim, inner_dim * 3, bias=False)
self.to_out = (
(inner_dim, dim),
) if project_out else ()
def forward(self, x):
qkv = self.to_qkv(x).chunk(3, dim=-1)
q, k, v = map(lambda t: rearrange(t, 'b p n (h d) -> b p h n d', h=), qkv)
dots = (q, (-1, -2)) *
attn = (dots)
out = (attn, v)
out = rearrange(out, 'b p h n d -> b p n (h d)')
return self.to_out(out)
# MobileViT模块定义
class MoblieTrans():
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
= ([])
for _ in range(depth):
PreNorm(dim, TAttention(dim, heads, dim_head, dropout)),
PreNorm(dim, FeedForward(dim, mlp_dim, dropout))
def forward(self, x):
for attn, ff in :
x = attn(x) + x
x = ff(x) + x
return x
# MobileViT Block定义
class MV2B():
def __init__(self, ch_in, ch_out, stride=1, expansion=4):
= stride
assert stride in [1, 2]
hidden_dim = int(ch_in * expansion)
self.use_res_connect = == 1 and ch_in == ch_out
if expansion == 1:
= (
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
nn.Conv2d(hidden_dim, ch_out, 1, 1, 0, bias=False),
= (
nn.Conv2d(ch_in, hidden_dim, 1, 1, 0, bias=False),
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
nn.Conv2d(hidden_dim, ch_out, 1, 1, 0, bias=False),
def forward(self, x):
if self.use_res_connect:
return x + (x)
return (x)
# MobileViT Block定义
class MobileViT_Block():
def __init__(self, ch_in, dim=64, depth=2, kernel_size=3, patch_size=(2, 2), mlp_dim=int(64 * 2), dropout=0.):
, = patch_size
self.conv1 = conv_nxn_bn(ch_in, ch_in, kernel_size)
self.conv2 = conv_1x1_bn(ch_in, dim)
= MoblieTrans(dim, depth, 4, 8, mlp_dim, dropout)
self.conv3 = conv_1x1_bn(dim, ch_in)
self.conv4 = conv_nxn_bn(2 * ch_in, ch_in, kernel_size)
def forward(self, x):
y = ()
# Local representations
x = self.conv1(x)
x = self.conv2(x)
# Global representations
_, _, h, w =
x = rearrange(x, 'b d (h ph) (w pw) -> b (ph pw) (h w) d', ph=, pw=)
x = (x)
x = rearrange(x, 'b (ph pw) (h w) d -> b d (h ph) (w pw)', h=h // , w=w // , ph=,
x = self.conv3(x)
x = ((x, y), 1)
x = self.conv4(x)
return x
# 1x1卷积层+BatchNorm+SiLU激活函数
def conv_1x1_bn(ch_in, ch_out):
return (
nn.Conv2d(ch_in, ch_out, 1, 1, 0, bias=False),
# nxn卷积层+BatchNorm+SiLU激活函数
def conv_nxn_bn(ch_in, ch_out, kernal_size=3, stride=1):
return (
nn.Conv2d(ch_in, ch_out, kernal_size, stride, 1, bias=False),
# LayerNormalization + Function模块
class PreNorm():
def __init__(self, dim, fn):
= (dim)
= fn
def forward(self, x, **kwargs):
return ((x), **kwargs)
# Feed Forward模块
class FeedForward():
def __init__(self, dim, hidden_dim, dropout=0.):
= (
(dim, hidden_dim),
(hidden_dim, dim),
def forward(self, x):
return (x)