深度学习-变分自编码器(VAE)生成MNIST手写数字图片

时间:2025-04-06 07:33:46
  • import os
  • import torch
  • import as nn
  • import as F
  • import torchvision
  • from torchvision import transforms
  • from import save_image
  • # 配置GPU或CPU设置
  • device = ('cuda' if .is_available() else 'cpu')
  • # 创建目录保存生成的图片
  • sample_dir = 'samples'
  • if not (sample_dir):
  • (sample_dir)
  • # 超参数设置
  • image_size = 784 #图片大小
  • h_dim = 400
  • z_dim = 20
  • num_epochs = 15 #15个循环
  • batch_size = 128 #一批的数量
  • learning_rate = 1e-3 #学习率
  • # 获取数据集
  • dataset = (root='./data',
  • train=True,
  • transform=(),
  • download=True)
  • # 数据加载,按照batch_size大小加载,并随机打乱
  • data_loader = (dataset=dataset,
  • batch_size=batch_size,
  • shuffle=True)
  • # VAE模型
  • class VAE():
  • def __init__(self, image_size=784, h_dim=400, z_dim=20):
  • super(VAE, self).__init__()
  • self.fc1 = (image_size, h_dim)
  • self.fc2 = (h_dim, z_dim)
  • self.fc3 = (h_dim, z_dim)
  • self.fc4 = (z_dim, h_dim)
  • self.fc5 = (h_dim, image_size)
  • # 编码,学习高斯分布均值与方差
  • def encode(self, x):
  • h = (self.fc1(x))
  • return self.fc2(h), self.fc3(h)
  • # 将高斯分布均值与方差参数重表示,生成隐变量z 若x~N(mu, var*var)分布,则(x-mu)/var=z~N(0, 1)分布
  • def reparameterize(self, mu, log_var):
  • std = (log_var / 2)
  • eps = torch.randn_like(std)
  • return mu + eps * std
  • # 解码隐变量z
  • def decode(self, z):
  • h = (self.fc4(z))
  • return (self.fc5(h))
  • # 计算重构值和隐变量z的分布参数
  • def forward(self, x):
  • mu, log_var = (x) # 从原始样本x中学习隐变量z的分布,即学习服从高斯分布均值与方差
  • z = (mu, log_var) # 将高斯分布均值与方差参数重表示,生成隐变量z
  • x_reconst = (z) # 解码隐变量z,生成重构x’
  • return x_reconst, mu, log_var # 返回重构值和隐变量的分布参数
  • # 构造VAE实例对象
  • model = VAE().to(device)
  • print(model)
  • """VAE(
  • (fc1): Linear(in_features=784, out_features=400, bias=True)
  • (fc2): Linear(in_features=400, out_features=20, bias=True)
  • (fc3): Linear(in_features=400, out_features=20, bias=True)
  • (fc4): Linear(in_features=20, out_features=400, bias=True)
  • (fc5): Linear(in_features=400, out_features=784, bias=True)
  • )"""
  • # 选择优化器,并传入VAE模型参数和学习率
  • optimizer = ((), lr=learning_rate)
  • # 开始训练一共15个循环
  • for epoch in range(num_epochs):
  • for i, (x, _) in enumerate(data_loader):
  • # 前向传播
  • x = (device).view(-1,image_size) # 将batch_size*1*28*28 ---->batch_size*image_size 其中,image_size=1*28*28=784
  • x_reconst, mu, log_var = model(x) # 将batch_size*748的x输入模型进行前向传播计算,重构值和服从高斯分布的隐变量z的分布参数(均值和方差)
  • # 计算重构损失和KL散度
  • # 重构损失
  • reconst_loss = F.binary_cross_entropy(x_reconst, x, size_average=False)
  • # KL散度
  • kl_div = - 0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
  • # 反向传播与优化
  • # 计算误差(重构误差和KL散度值)
  • loss = reconst_loss + kl_div
  • # 清空上一步的残余更新参数值
  • optimizer.zero_grad()
  • # 误差反向传播, 计算参数更新值
  • ()
  • # 将参数更新值施加到VAE model的parameters上
  • ()
  • # 每迭代一定步骤,打印结果值
  • if (i + 1) % 10 == 0:
  • print("Epoch[{}/{}], Step [{}/{}], Reconst Loss: {:.4f}, KL Div: {:.4f}"
  • .format(epoch + 1, num_epochs, i + 1, len(data_loader), reconst_loss.item(), kl_div.item()))
  • with torch.no_grad():
  • # 保存采样值
  • # 生成随机数 z
  • z = (batch_size, z_dim).to(device) # z的大小为batch_size * z_dim = 128*20
  • # 对随机数 z 进行解码decode输出
  • out = (z).view(-1, 1, 28, 28)
  • # 保存结果值
  • save_image(out, (sample_dir, 'sampled-{}.png'.format(epoch + 1)))
  • # 保存重构值
  • # 将batch_size*748的x输入模型进行前向传播计算,获取重构值out
  • out, _, _ = model(x)
  • # 将输入与输出拼接在一起输出保存 batch_size*1*28*(28+28)=batch_size*1*28*56
  • x_concat = ([(-1, 1, 28, 28), (-1, 1, 28, 28)], dim=3)
  • save_image(x_concat, (sample_dir, 'reconst-{}.png'.format(epoch + 1)))