YOLOv5改进系列(21)——替换主干网络之RepViT(清华 ICCV 2023|最新开源移动端ViT)

时间:2025-04-02 09:17:00
  • class Conv2d_BN():
  • def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1,
  • groups=1, bn_weight_init=1, resolution=-10000):
  • super().__init__()
  • self.add_module('c', .Conv2d(
  • a, b, ks, stride, pad, dilation, groups, bias=False))
  • self.add_module('bn', .BatchNorm2d(b))
  • .constant_(, bn_weight_init)
  • .constant_(, 0)
  • @torch.no_grad()
  • def fuse(self):
  • c, bn = self._modules.values()
  • w = / (bn.running_var + )**0.5
  • w = * w[:, None, None, None]
  • b = - bn.running_mean * / \
  • (bn.running_var + )**0.5
  • m = .Conv2d((1) * , (
  • 0), [2:], stride=, padding=, dilation=, groups=,
  • device=)
  • .copy_(w)
  • .copy_(b)
  • return m
  • class Residual():
  • def __init__(self, m, drop=0.):
  • super().__init__()
  • = m
  • = drop
  • def forward(self, x):
  • if and > 0:
  • return x + (x) * ((0), 1, 1, 1,
  • device=).ge_().div(1 - ).detach()
  • else:
  • return x + (x)
  • @torch.no_grad()
  • def fuse(self):
  • if isinstance(, Conv2d_BN):
  • m = ()
  • assert( == m.in_channels)
  • identity = ([0], [1], 1, 1)
  • identity = (identity, [1,1,1,1])
  • += ()
  • return m
  • elif isinstance(, .Conv2d):
  • m =
  • assert( != m.in_channels)
  • identity = ([0], [1], 1, 1)
  • identity = (identity, [1,1,1,1])
  • += ()
  • return m
  • else:
  • return self
  • class RepVGGDW():
  • def __init__(self, ed) -> None:
  • super().__init__()
  • = Conv2d_BN(ed, ed, 3, 1, 1, groups=ed)
  • self.conv1 = Conv2d_BN(ed, ed, 1, 1, 0, groups=ed)
  • = ed
  • def forward(self, x):
  • return (x) + self.conv1(x) + x
  • @torch.no_grad()
  • def fuse(self):
  • conv = ()
  • conv1 = self.()
  • conv_w =
  • conv_b =
  • conv1_w =
  • conv1_b =
  • conv1_w = (conv1_w, [1,1,1,1])
  • identity = ((conv1_w.shape[0], conv1_w.shape[1], 1, 1, device=conv1_w.device), [1,1,1,1])
  • final_conv_w = conv_w + conv1_w + identity
  • final_conv_b = conv_b + conv1_b
  • .copy_(final_conv_w)
  • .copy_(final_conv_b)
  • return conv
  • class RepViTBlock():
  • def __init__(self,in1, inp, hidden_dim, oup, kernel_size=3, stride=2, use_se=0, use_hs=0):
  • super(RepViTBlock, self).__init__()
  • assert stride in [1, 2]
  • = stride == 1 and inp == oup
  • print(inp)
  • print(hidden_dim)
  • print(oup)
  • assert(hidden_dim == 2 * inp)
  • if stride == 2:
  • self.token_mixer = (
  • Conv2d_BN(inp, inp, kernel_size, stride, (kernel_size - 1) // 2, groups=inp),
  • SqueezeExcite(inp, 0.25) if use_se else (),
  • Conv2d_BN(inp, oup, ks=1, stride=1, pad=0)
  • )
  • self.channel_mixer = Residual((
  • # pw
  • Conv2d_BN(oup, 2 * oup, 1, 1, 0),
  • () if use_hs else (),
  • # pw-linear
  • Conv2d_BN(2 * oup, oup, 1, 1, 0, bn_weight_init=0),
  • ))
  • else:
  • assert()
  • self.token_mixer = (
  • RepVGGDW(inp),
  • SqueezeExcite(inp, 0.25) if use_se else (),
  • )
  • self.channel_mixer = Residual((
  • # pw
  • Conv2d_BN(inp, hidden_dim, 1, 1, 0),
  • () if use_hs else (),
  • # pw-linear
  • Conv2d_BN(hidden_dim, oup, 1, 1, 0, bn_weight_init=0),
  • ))
  • def forward(self, x):
  • return self.channel_mixer(self.token_mixer(x))