Pytorch参数更新实验
# 以下代码中,需要设置或取消对应的代码屏蔽,完成不同的测试内容
class ConvBlock(nn.Module):
def __init__(self):
super(ConvBlock,self).__init__()
self.conv = nn.Conv2d(20,20,3,1,1)
self.bn = nn.BatchNorm2d(20)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv3 = nn.Conv2d(20,20,3,1,1)
self.conv4 = nn.Sequential(OrderedDict([
('conv4_1', nn.Conv2d(20, 20, 3, 1, 1)),
('conv4_2', nn.Conv2d(20, 20, 3, 1, 1))
])) # 测试不同的block构建是否有影响
self.conv5 = ConvBlock()
self.conv6 = nn.Sequential(
ConvBlock(),
ConvBlock()
) # 测试不同的block构建是否有影响
self.drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
self.m = torch.distributions.bernoulli.Bernoulli(torch.Tensor([0.5]))
self.run = 0
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.drop(self.conv2(x)), 2))
# if : # ((), (1)):
# print('run conv3')
# x = self.conv3(x)
# # = 0 # 设置仅第一次运行时用
# else:
# print('skip conv3')
# += 1 # 设置第一次后运行时用
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
# self.conv6._modules['0']._modules['conv'].weight.detach_() # 设置后变为is_leaf=True,requires_grad=False,运行conv6(x)后不会对conv6 block1中的conv造成影响(后者仍为is_leaf=False,requires_grad=True
x = self.conv6(x)
# self..requires_grad = False # 可以直接在forward中设置requires_grad
# self.conv4._modules['conv4_1'].weight.requires_grad = False # 可以直接在forward中设置requires_grad
# self.conv5._modules['conv'].weight.requires_grad = False # 可以直接在forward中设置requires_grad
# self.conv6._modules['0']._modules['conv'].weight.requires_grad = False # 可以直接在forward中设置requires_grad
# for item in self.(): # 可以直接在forward中设置requires_grad
# if isinstance(item, nn.Conv2d):
# item_no_grad = ()
# .requires_grad = False
# self.conv6._modules['0']._modules['conv'].weight.detach_()
# weight_no_grad = self.conv6._modules['0']._modules['conv'].()
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return x # F.log_softmax(x, dim=1)
gpu_id = [3]
device = torch.device("cuda:%d"%gpu_id[0] if torch.cuda.is_available() else "cpu") #
model = Net()
if len(gpu_id)>1: # multi-GPU setting
model = nn.DataParallel(model,device_ids=gpu_id)
model = model.to(device)
elif len(gpu_id)==1:
model = model.to(device)
# ['CUDA_VISIBLE_DEVICES'] = '3'
# device = ("cuda" if .is_available() else "cpu")
# model = polynet_stodepth()
# model = () #to(device)
LOSS = nn.CrossEntropyLoss()
OPTIMIZER = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9 )
# del OPTIMIZER.param_groups[0]['params'][4] # 移除优化器中的参数
# OPTIMIZER.param_groups[0]['params'].append(model._modules['conv3'].weight) # 将参数加入优化器参数列表
epoch = 10
batch = 50
bs = 2
model.train()
# model..requires_grad = False
for i in range(epoch):
print('===== epoch %d'%i)
for j in range(batch):
print('epoch-%d, batch-%d'%(i,j))
inputs = torch.randn(bs,1,28,28).to(device)
labels = torch.randint(low=0,high=9,size=(bs,)).to(device)
# inputs = (bs, 3, 235, 235).cuda() #.to(device)
# labels = (low=0, high=9, size=(bs,)).cuda() #.to(device)
pred = model(inputs)
loss = LOSS(pred, labels)
# for m in ():
# print(str(m.__class__)) # 打印的是各操作的类型,如Conv2d,类型是有重复的
# for pname, p in model.named_parameters():
# print(pname) # 打印的是各操作的名字,名字是唯一标识的
OPTIMIZER.zero_grad()
loss.backward()
# model..requires_grad = False
OPTIMIZER.step()
print('done')