GAN代码解析(tensorflow实现)文章结尾有GAN资料合集

时间:2021-11-18 13:50:33

虽然暂时还没用到对抗生成网络,但是看过GAN以及WGAN、IRGAN后觉得非常有意思,将博弈论的思路放到了神经网络里面来。主要的思路就是一个生成器与一个分类器,分类器的作用是区分这个数据来源是真实的还是生成器产生的,生成器的作用就是产生新的数据尽可能蒙混分类器不让区分开,在训练过程中交替训练分类器与生成器让两者在竞争中不断进化提高自身的性能,训练完成后分类器和生成器都可以使用,下面边上代码边讲。

img_height = 28
img_width = 28
img_size = img_height * img_width

to_train = True
to_restore = False
output_path = "output"

max_epoch = 500

h1_size = 150
h2_size = 300
z_size = 100
batch_size = 256
这个实验中使用的是mnist图像数据,先定义一下图像的长宽、是否训练、是否保存、保存模型地址、最大批次以及隐藏层的参数和生成器输入的维度和batch_size

def build_generator(z_prior):
w1 = tf.Variable(tf.truncated_normal([z_size, h1_size], stddev=0.1), name="g_w1", dtype=tf.float32)
b1 = tf.Variable(tf.zeros([h1_size]), name="g_b1", dtype=tf.float32)
h1 = tf.nn.relu(tf.matmul(z_prior, w1) + b1)
w2 = tf.Variable(tf.truncated_normal([h1_size, h2_size], stddev=0.1), name="g_w2", dtype=tf.float32)
b2 = tf.Variable(tf.zeros([h2_size]), name="g_b2", dtype=tf.float32)
h2 = tf.nn.relu(tf.matmul(h1, w2) + b2)
w3 = tf.Variable(tf.truncated_normal([h2_size, img_size], stddev=0.1), name="g_w3", dtype=tf.float32)
b3 = tf.Variable(tf.zeros([img_size]), name="g_b3", dtype=tf.float32)
h3 = tf.matmul(h2, w3) + b3
x_generate = tf.nn.tanh(h3)
g_params = [w1, b1, w2, b2, w3, b3]
return x_generate, g_params

build_generator定义了生成器,输入参数为一个长度为100的先验向量,然后通过三个全连接层(其实这里可以使用任何形式不一定是全连接层,变种中有CNN和LSTM的)映射到长度为784的向量也就是图像扁平化后的长度,返回了映射中用到的参数

def build_discriminator(x_data, x_generated, keep_prob):
x_in = tf.concat([x_data, x_generated], 0)
w1 = tf.Variable(tf.truncated_normal([img_size, h2_size], stddev=0.1), name="d_w1", dtype=tf.float32)
b1 = tf.Variable(tf.zeros([h2_size]), name="d_b1", dtype=tf.float32)
h1 = tf.nn.dropout(tf.nn.relu(tf.matmul(x_in, w1) + b1), keep_prob)
w2 = tf.Variable(tf.truncated_normal([h2_size, h1_size], stddev=0.1), name="d_w2", dtype=tf.float32)
b2 = tf.Variable(tf.zeros([h1_size]), name="d_b2", dtype=tf.float32)
h2 = tf.nn.dropout(tf.nn.relu(tf.matmul(h1, w2) + b2), keep_prob)
w3 = tf.Variable(tf.truncated_normal([h1_size, 1], stddev=0.1), name="d_w3", dtype=tf.float32)
b3 = tf.Variable(tf.zeros([1]), name="d_b3", dtype=tf.float32)
h3 = tf.matmul(h2, w3) + b3
y_data = tf.nn.sigmoid(tf.slice(h3, [0, 0], [batch_size, -1], name=None))
y_generated = tf.nn.sigmoid(tf.slice(h3, [batch_size, 0], [-1, -1], name=None))
d_params = [w1, b1, w2, b2, w3, b3]
return y_data, y_generated, d_params
build_discriminator定义了区分器,每次接受一个批次的真实数据与生成数据,在全连接层后使用sigmoid计算每个数据是真实数据的概率
def train():    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)    x_data = tf.placeholder(tf.float32, [batch_size, img_size], name="x_data")    z_prior = tf.placeholder(tf.float32, [batch_size, z_size], name="z_prior")    keep_prob = tf.placeholder(tf.float32, name="keep_prob")    global_step = tf.Variable(0, name="global_step", trainable=False)    x_generated, g_params = build_generator(z_prior)    y_data, y_generated, d_params = build_discriminator(x_data, x_generated, keep_prob)    d_loss = - (tf.log(y_data) + tf.log(1 - y_generated))    g_loss = - tf.log(y_generated)    optimizer = tf.train.AdamOptimizer(0.0001)    d_trainer = optimizer.minimize(d_loss, var_list=d_params)    g_trainer = optimizer.minimize(g_loss, var_list=g_params)    init = tf.initialize_all_variables()    saver = tf.train.Saver()    sess = tf.Session()    sess.run(init)    if to_restore:        chkpt_fname = tf.train.latest_checkpoint(output_path)        saver.restore(sess, chkpt_fname)    else:        if os.path.exists(output_path):            shutil.rmtree(output_path)        os.mkdir(output_path)    z_sample_val = np.random.normal(0, 1, size=(batch_size, z_size)).astype(np.float32)    for i in range(sess.run(global_step), max_epoch):        for j in range(60000 / batch_size):            print "epoch:%s, iter:%s" % (i, j)            x_value, _ = mnist.train.next_batch(batch_size)            x_value = 2 * x_value.astype(np.float32) - 1            z_value = np.random.normal(0, 1, size=(batch_size, z_size)).astype(np.float32)            sess.run(d_trainer,                     feed_dict={x_data: x_value, z_prior: z_value, keep_prob: np.sum(0.7).astype(np.float32)})            if j % 1 == 0:                sess.run(g_trainer,                         feed_dict={x_data: x_value, z_prior: z_value, keep_prob: np.sum(0.7).astype(np.float32)})        x_gen_val = sess.run(x_generated, feed_dict={z_prior: z_sample_val})        show_result(x_gen_val, os.path.join(output_path, "sample%s.jpg" % i))        z_random_sample_val = np.random.normal(0, 1, size=(batch_size, z_size)).astype(np.float32)        x_gen_val = sess.run(x_generated, feed_dict={z_prior: z_random_sample_val})        show_result(x_gen_val, os.path.join(output_path, "random_sample%s.jpg" % i))        sess.run(tf.assign(global_step, i + 1))        saver.save(sess, os.path.join(output_path, "model"), global_step=global_step)

train函数中首先载入数据,然后定义了占位符,接下来定义了交替训练的损失函数,g_loss是生成器的损失函数,计算的就是生产数据的交叉熵,d_loss是整个数据的交叉熵,因为分类器要保证在所有数据上都能很好的区分,所以损失函数中包含了所有数据。之后做了一些保存的文件夹操作,接着从0-1均匀分布中抽取了z(至于为什么用这个分布,可以去查看一个概率论,几乎所有重要的概率分布都可以从均匀分布Uniform(0,1)中生成出来),接着就是交替训练以及生产一个训练好的生成器生成的图片了。

def show_result(batch_res, fname, grid_size=(8, 8), grid_pad=5):
batch_res = 0.5 * batch_res.reshape((batch_res.shape[0], img_height, img_width)) + 0.5
img_h, img_w = batch_res.shape[1], batch_res.shape[2]
grid_h = img_h * grid_size[0] + grid_pad * (grid_size[0] - 1)
grid_w = img_w * grid_size[1] + grid_pad * (grid_size[1] - 1)
img_grid = np.zeros((grid_h, grid_w), dtype=np.uint8)
for i, res in enumerate(batch_res):
if i >= grid_size[0] * grid_size[1]:
break
img = (res) * 255
img = img.astype(np.uint8)
row = (i // grid_size[0]) * (img_h + grid_pad)
col = (i % grid_size[1]) * (img_w + grid_pad)
img_grid[row:row + img_h, col:col + img_w] = img
imsave(fname, img_grid)

GAN代码解析(tensorflow实现)文章结尾有GAN资料合集

GAN合集