Pytorch参数更新实验

时间:2025-03-29 09:47:03
# 以下代码中,需要设置或取消对应的代码屏蔽,完成不同的测试内容 class ConvBlock(nn.Module): def __init__(self): super(ConvBlock,self).__init__() self.conv = nn.Conv2d(20,20,3,1,1) self.bn = nn.BatchNorm2d(20) self.relu = nn.ReLU() def forward(self, x): x = self.conv(x) x = self.bn(x) x = self.relu(x) return x class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 10, kernel_size=5) self.conv2 = nn.Conv2d(10, 20, kernel_size=5) self.conv3 = nn.Conv2d(20,20,3,1,1) self.conv4 = nn.Sequential(OrderedDict([ ('conv4_1', nn.Conv2d(20, 20, 3, 1, 1)), ('conv4_2', nn.Conv2d(20, 20, 3, 1, 1)) ])) # 测试不同的block构建是否有影响 self.conv5 = ConvBlock() self.conv6 = nn.Sequential( ConvBlock(), ConvBlock() ) # 测试不同的block构建是否有影响 self.drop = nn.Dropout2d() self.fc1 = nn.Linear(320, 50) self.fc2 = nn.Linear(50, 10) self.m = torch.distributions.bernoulli.Bernoulli(torch.Tensor([0.5])) self.run = 0 def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) x = F.relu(F.max_pool2d(self.drop(self.conv2(x)), 2)) # if : # ((), (1)): # print('run conv3') # x = self.conv3(x) # # = 0 # 设置仅第一次运行时用 # else: # print('skip conv3') # += 1 # 设置第一次后运行时用 x = self.conv3(x) x = self.conv4(x) x = self.conv5(x) # self.conv6._modules['0']._modules['conv'].weight.detach_() # 设置后变为is_leaf=True,requires_grad=False,运行conv6(x)后不会对conv6 block1中的conv造成影响(后者仍为is_leaf=False,requires_grad=True x = self.conv6(x) # self..requires_grad = False # 可以直接在forward中设置requires_grad # self.conv4._modules['conv4_1'].weight.requires_grad = False # 可以直接在forward中设置requires_grad # self.conv5._modules['conv'].weight.requires_grad = False # 可以直接在forward中设置requires_grad # self.conv6._modules['0']._modules['conv'].weight.requires_grad = False # 可以直接在forward中设置requires_grad # for item in self.(): # 可以直接在forward中设置requires_grad # if isinstance(item, nn.Conv2d): # item_no_grad = () # .requires_grad = False # self.conv6._modules['0']._modules['conv'].weight.detach_() # weight_no_grad = self.conv6._modules['0']._modules['conv'].() x = x.view(-1, 320) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return x # F.log_softmax(x, dim=1) gpu_id = [3] device = torch.device("cuda:%d"%gpu_id[0] if torch.cuda.is_available() else "cpu") # model = Net() if len(gpu_id)>1: # multi-GPU setting model = nn.DataParallel(model,device_ids=gpu_id) model = model.to(device) elif len(gpu_id)==1: model = model.to(device) # ['CUDA_VISIBLE_DEVICES'] = '3' # device = ("cuda" if .is_available() else "cpu") # model = polynet_stodepth() # model = () #to(device) LOSS = nn.CrossEntropyLoss() OPTIMIZER = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9 ) # del OPTIMIZER.param_groups[0]['params'][4] # 移除优化器中的参数 # OPTIMIZER.param_groups[0]['params'].append(model._modules['conv3'].weight) # 将参数加入优化器参数列表 epoch = 10 batch = 50 bs = 2 model.train() # model..requires_grad = False for i in range(epoch): print('===== epoch %d'%i) for j in range(batch): print('epoch-%d, batch-%d'%(i,j)) inputs = torch.randn(bs,1,28,28).to(device) labels = torch.randint(low=0,high=9,size=(bs,)).to(device) # inputs = (bs, 3, 235, 235).cuda() #.to(device) # labels = (low=0, high=9, size=(bs,)).cuda() #.to(device) pred = model(inputs) loss = LOSS(pred, labels) # for m in (): # print(str(m.__class__)) # 打印的是各操作的类型,如Conv2d,类型是有重复的 # for pname, p in model.named_parameters(): # print(pname) # 打印的是各操作的名字,名字是唯一标识的 OPTIMIZER.zero_grad() loss.backward() # model..requires_grad = False OPTIMIZER.step() print('done')