#DualViT
import torch
import as nn
import as F
from functools import partial
from import DropPath, to_2tuple, trunc_normal_
import math
# ========== 类:深度可分离卷积 ==========
class DWConv():
def __init__(self, dim=768):
super(DWConv, self).__init__()
= nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim)
def forward(self, x, H, W):
B, N, C =
x = (1, 2).view(B, C, H, W)
x = (x)
x = (2).transpose(1, 2)
return x
# ========== 2.PVT2FFN类:组合了线性层、深度可分离卷积层和激活函数,适用于处理输入特征并输出经过一系列线性和非线性变换后的结果。 ==========
class PVT2FFN():
def __init__(self, in_features, hidden_features):
super().__init__()
self.fc1 = (in_features, hidden_features)
= DWConv(hidden_features)
= ()
self.fc2 = (hidden_features, in_features)
(self._init_weights)
def _init_weights(self, m):
if isinstance(m, ):
trunc_normal_(, std=.02)
if isinstance(m, ) and is not None:
.constant_(, 0)
elif isinstance(m, ):
if is not None:
.constant_(, 0)
if is not None:
.constant_(, 1.0)
elif isinstance(m, nn.Conv2d):
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
fan_out //=
.normal_(0, (2.0 / fan_out))
if is not None:
.zero_()
def forward(self, x, H, W):
x = self.fc1(x)
x = (x, H, W)
x = (x)
x = self.fc2(x)
return x
# ========== 类:特征融合和转换 ==========
class MergeFFN():
def __init__(self, in_features, hidden_features):
super().__init__()
self.fc1 = (in_features, hidden_features)
= DWConv(hidden_features)
= ()
self.fc2 = (hidden_features, in_features)
self.fc_proxy = (
(in_features, 2 * in_features),
(),
(2 * in_features, in_features),
)
(self._init_weights)
def _init_weights(self, m):
if isinstance(m, ):
trunc_normal_(, std=.02)
if isinstance(m, ) and is not None:
.constant_(, 0)
elif isinstance(m, ):
if is not None:
.constant_(, 0)
if is not None:
.constant_(, 1.0)
elif isinstance(m, nn.Conv2d):
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
fan_out //=
.normal_(0, (2.0 / fan_out))
if is not None:
.zero_()
def forward(self, x, H, W):
x, semantics = (x, [H * W, [1] - H * W], dim=1)
semantics = self.fc_proxy(semantics)
x = self.fc1(x)
x = (x, H, W)
x = (x)
x = self.fc2(x)
x = ([x, semantics], dim=1)
return x
# ========== 类:注意力机制 ==========
class Attention():
def __init__(self, dim, num_heads):
super().__init__()
assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
= dim
self.num_heads = num_heads
head_dim = dim // num_heads
= head_dim ** -0.5
= (dim, dim)
= (dim, dim * 2)
= (dim, dim)
(self._init_weights)
def _init_weights(self, m):
if isinstance(m, ):
trunc_normal_(, std=.02)
if isinstance(m, ) and is not None:
.constant_(, 0)
elif isinstance(m, ):
if is not None:
.constant_(, 0)
if is not None:
.constant_(, 1.0)
elif isinstance(m, nn.Conv2d):
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
fan_out //=
.normal_(0, (2.0 / fan_out))
if is not None:
.zero_()
def forward(self, x):
# x =(3, 0, 1, 2)
B, H, W, C =
N = H * W
q = (x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
kv = (x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
k, v = kv[0], kv[1]
attn = (q @ (-2, -1)) *
attn = (dim=-1)
x = (attn @ v).transpose(1, 2).reshape(B, H, W , C)
x = (x)
return x
# ========== 类:本文核心方法 ==========
class MergeBlockattention():
def __init__(self,input, dim, num_heads=2, mlp_ratio=8, drop_path=0., norm_layer=, is_last=False):
super().__init__()
self.norm1 = norm_layer(dim)
self.norm2 = norm_layer(dim)
= Attention(dim, num_heads)
'''
self.norm1: 第一个归一化层 (),对输入数据进行归一化处理。
self.norm2: 第二个归一化层,同样用于归一化处理。
: 注意力机制模块 (Attention),用于计算输入的注意力表示。
: 根据 is_last 参数选择不同的多层感知机模块。如果 is_last 为 True,则使用
PVT2FFN;否则使用之前定义的 MergeFFN。
self.is_last: 标志位,指示是否是最后一个模块。
self.drop_path: 如果指定了 drop_path 大于 0,则应用一定概率的 DropPath,否则使用恒等
映射 (()).
self.gamma1, self.gamma2: 可学习的参数 gamma,用于调节归一化后的特征。
'''
if is_last:
= PVT2FFN(in_features=dim, hidden_features=int(dim * mlp_ratio))
else:
= MergeFFN(in_features=dim, hidden_features=int(dim * mlp_ratio))
self.is_last = is_last
self.drop_path = DropPath(drop_path) if drop_path > 0. else ()
layer_scale_init_value = 1e-6
self.gamma1 = (layer_scale_init_value * ((dim)),
requires_grad=True) if layer_scale_init_value > 0 else None
self.gamma2 = (layer_scale_init_value * ((dim)),
requires_grad=True) if layer_scale_init_value > 0 else None
(self._init_weights)
def _init_weights(self, m):
if isinstance(m, ):
trunc_normal_(, std=.02)
if isinstance(m, ) and is not None:
.constant_(, 0)
elif isinstance(m, ):
if is not None:
.constant_(, 0)
if is not None:
.constant_(, 1.0)
elif isinstance(m, nn.Conv2d):
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
fan_out //=
.normal_(0, (2.0 / fan_out))
if is not None:
.zero_()
def forward(self, x):
B, C, H, W =
x = (0, 2, 3, 1)
#x = x + self.drop_path(self.gamma1 * (self.norm1(x)))
x =(self.norm1(x))
x = (0, 3, 2, 1)
return x