from einops import rearrange
def conv_1x1_bn(inp, oup):
return (
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
()
)
def conv_nxn_bn(inp, oup, kernal_size=3, stride=1):
return (
nn.Conv2d(inp, oup, kernal_size, stride, 1, bias=False),
nn.BatchNorm2d(oup),
()
)
class PreNorm():
def __init__(self, dim, fn):
super().__init__()
= (dim)
= fn # mg
def forward(self, x, **kwargs):
return ((x), **kwargs)
class Attention():
def __init__(self, dim, heads=8, dim_head=64, dropout=0.):
super().__init__()
inner_dim = dim_head * heads
project_out = not (heads == 1 and dim_head == dim)
= heads
= dim_head ** -0.5
= (dim = -1)
self.to_qkv = (dim, inner_dim * 3, bias = False)
self.to_out = (
(inner_dim, dim),
(dropout)# mg
) if project_out else ()
def forward(self, x):
qkv = self.to_qkv(x).chunk(3, dim=-1)
q, k, v = map(lambda t: rearrange(t, 'b p n (h d) -> b p h n d', h = ), qkv)
dots = (q, (-1, -2)) *
attn = (dots)
out = (attn, v)
out = rearrange(out, 'b p h n d -> b p n (h d)')
return self.to_out(out)
class FeedForward():
def __init__(self, dim, hidden_dim, dropout=0.):
super().__init__()
= (
(dim, hidden_dim),
(),
(dropout),
(hidden_dim, dim),
(dropout)
)
def forward(self, x):
return (x)
class MBTransformer():
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
super().__init__()
= ([])
for _ in range(depth):
(([
PreNorm(dim, Attention(dim, heads, dim_head, dropout)),
PreNorm(dim, FeedForward(dim, mlp_dim, dropout))
]))
def forward(self, x):
for attn, ff in :
x = attn(x) + x
x = ff(x) + x
return x
class MV2Block():
def __init__(self, inp, oup, stride=1, expansion=4):
super().__init__()
= stride
assert stride in [1, 2]
hidden_dim = int(inp * expansion)
self.use_res_connect = == 1 and inp == oup
if expansion == 1:
= (
# dw
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
(),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
else:
= (
# pw
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
nn.BatchNorm2d(hidden_dim),
(),
# dw
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
(),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
def forward(self, x):
if self.use_res_connect:
return x + (x)
else:
return (x)
class MobileViTv3_block():
def __init__(self, channel, dim, depth=2, kernel_size=3, patch_size=(2, 2), mlp_dim=int(64*2), dropout=0.):
super().__init__()
, = patch_size
self.mv01 = MV2Block(channel, channel)
self.conv1 = conv_nxn_bn(channel, channel, kernel_size)
self.conv3 = conv_1x1_bn(dim, channel)
self.conv2 = conv_1x1_bn(channel, dim)
= MBTransformer(dim, depth, 4, 8, mlp_dim, dropout)
self.conv4 = conv_nxn_bn(2 * channel, channel, kernel_size)
def forward(self, x):
y = ()
x = self.conv1(x)
x = self.conv2(x)
z = ()
_, _, h, w =
x = rearrange(x, 'b d (h ph) (w pw) -> b (ph pw) (h w) d', ph=, pw=)
x = (x)
x = rearrange(x, 'b (ph pw) (h w) d -> b d (h ph) (w pw)', h=h//, w=w//, ph=, pw=)
x = self.conv3(x)
x = ((x, z), 1)
x = self.conv4(x)
x = x + y
x = self.mv01(x)
return x