『cs231n』通过代码理解风格迁移

文件目录

『cs231n』通过代码理解风格迁移

vgg16.py

import os

import numpy as np

import tensorflow as tf

from download import exist_or_download

model_url = 'https://s3.amazonaws.com/cadl/models/vgg16.tfmodel'

model_dir = 'vgg16/'

model_name = 'vgg16.tfmodel'

def model_download():

    print('Downloading Model ... ')

    exist_or_download(url=model_url, dir=model_dir)

class VGG16:

    input_tensor_name = 'images:0'

    layer_operation_names = ['conv1_1/conv1_1','conv1_2/conv1_2',

                            'conv2_1/conv2_1','conv2_2/conv2_2',

                            'conv3_1/conv3_1','conv3_2/conv3_2','conv3_3/conv3_3',

                            'conv4_1/conv4_1','conv4_2/conv4_2','conv4_3/conv4_3',

                            'conv5_1/conv5_1','conv5_2/conv5_2','conv5_3/conv5_3']

    dropout_tensor_names = ['dropout/random_uniform:0',

                            'dropout_1/random_uniform:0']

    def __init__(self):

        # 载入tfmodel模型到新的图中

        self.graph = tf.Graph()

        with self.graph.as_default():

            with tf.gfile.FastGFile(os.path.join(model_dir,model_name),'rb') as f:

                graph_def = tf.GraphDef()

                graph_def.ParseFromString(f.read())

                tf.import_graph_def(graph_def,name='')

            # 由图中获取tensor

            self.input_tensor = self.graph.get_tensor_by_name(self.input_tensor_name)

            self.layer_tensors = [self.graph.get_tensor_by_name(name + ':0') for name in self.layer_operation_names]

    def get_layer_tensors(self, layer_ids):

        '''获取特定序列号的tensors'''

        return [self.layer_tensors[idx] for idx in layer_ids]

    def get_layer_names(self, layer_ids):

        '''获取特定序列号的op名称'''

        return [self.layer_operation_names[idx] for idx in layer_ids]

    def get_all_layer_names(self, startswith=None):

        '''获取全op中名称开头为startswith的op名称'''

        names = [op.name for op in self.graph.get_operations()]

        if startswith is not None:

            names = [name for name in names if name.startswith(startswith)]

        return names

    def create_feed_dict(self,input_image):

        '''把input图片扩维，并以feed字典形式返回'''

        input_image = np.expand_dims(input_image, axis=0)

        feed_dict = {self.input_tensor_name: input_image}

        return feed_dict

二进制模型文件载入流程：

# 新建空白图

self.graph = tf.Graph()

# 空白图列为默认图

with self.graph.as_default():

    # 二进制读取模型文件

    with tf.gfile.FastGFile(os.path.join(model_dir,model_name),'rb') as f:

        # 新建GraphDef文件，用于临时载入模型中的图

        graph_def = tf.GraphDef()

        # GraphDef加载模型中的图

        graph_def.ParseFromString(f.read())

        # 在空白图中加载GraphDef中的图

        tf.import_graph_def(graph_def,name='')

        # 在图中获取张量需要使用graph.get_tensor_by_name加张量名

        # 这里的张量可以直接用于session的run方法求值了

        # 补充一个基础知识，形如'conv1'是节点名称，而'conv1:0'是张量名称，表示节点的第一个输出张量

        self.input_tensor = self.graph.get_tensor_by_name(self.input_tensor_name)

        self.layer_tensors = [self.graph.get_tensor_by_name(name + ':0') for name   in self.layer_operation_names]

『TensorFlow』迁移学习_他山之石，可以攻玉

『TensorFlow』模型载入方法汇总

download.py

import os

import sys

import tarfile

import zipfile

import urllib.request

def _print_download_progress(count, block_size, total_size):

    '''打印下载进度'''

    # 当前下载进度

    pct_complete = float(count * block_size / total_size)

    # 强化表达式格式输出，\r表示新行，后面的表示小数点后一位百分数

    msg = '\r- Download progress: {0:.1%}'.format(pct_complete)

    # 输出，这里和print功能差不多

    sys.stdout.write(msg)

    sys.stdout.flush()

def exist_or_download(url,dir):

    # url末尾为文件名，提取出来

    file_name = url.split('/')[-1]

    # 保存文件路径&文件名

    file_path = os.path.join(dir, file_name)

    if not os.path.exists(file_path):

        if not os.path.exists(dir):

            os.makedirs(dir)

        # 下载文件

        file_path, _ = urllib.request.urlretrieve(url=url,

                                                  filename=file_path,

                                                  reporthook=_print_download_progress)

        # 参数 finename 指定了保存本地路径（如果参数未指定，urllib会生成一个临时文件保存数据。）

        # 参数 reporthook 是一个回调函数，当连接上服务器、以及相应的数据块传输完毕时会触发该回调，我们可以利用这个回调函数来显示当前的下载进度。

        # 参数 data 指 post 到服务器的数据

        # 该方法返回一个包含两个元素的(filename, headers)元组，filename 表示保存到本地的路径，header 表示服务器的响应头。

        print('\r- Download finish.')

        # 自动解压

        if file_path.endwith('.zip'):

            zipfile.ZipFile(file=file_path, mode='r').extractall(dir)

        elif file_path.endwith('.tar.gz','tgz'):

            tarfile.open(name=file_path, model='r:gz').extractall(dir)

        print('Done')

    else:

        print('Data has apparently alreadly been downloaded and unpacked.')

sys.stdout.write()

由于python3中print有end关键字，所以意义不大，但是在2.x中若想实现输出不换行，只能直接调用stdout对象的write方法了，因为stdout没有end这个符号这一说，输出不会换行，因此如果你想同一样输出多次，在需要输出的字符串对象里面加上"\r",就可以回到行首。

urllib.request.urlretrieve()：请求url的常用方法

urllib.request.urlretrieve(url=url,

                          filename=file_path,

                          reporthook=_print_download_progress)

# 参数 finename 指定了保存本地路径（如果参数未指定，urllib会生成一个临时文件保存数据。）

# 参数 reporthook 是一个回调函数需要自己实现，当连接上服务器、以及相应的数据块传输完毕时会触发该回调，会传入（已接受数据块数量，数据块大小，文件总大小）三个参数。

# 参数 data 指 post 到服务器的数据

# 该方法返回一个包含两个元素的(filename, headers)元组，filename 表示保存到本地的路径，header 表示服务器的响应头。

解压函数：先读取，后创建

if file_path.endwith('.zip'):

    zipfile.ZipFile(file=file_path, mode='r').extractall(dir)

elif file_path.endwith('.tar.gz','tgz'):

    tarfile.open(name=file_path, model='r:gz').extractall(dir)

Style_transfer.py

import os

import numpy as np

import tensorflow as tf

import matplotlib.pyplot as plt

from PIL import Image

import vgg16

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

print('\nTensotFlow version:', tf.__version__, '\n')

vgg16.model_download()

"""图像处理辅助函数"""

def load_image(file_name, max_size=None):

    '''把图片放大为最长边等于max_size的大小，并转换为float32数组'''

    image = Image.open(file_name)

    if max_size is not None:

        factor = max_size/np.max(image.size)

        size = np.array(image.size)*factor

        size = size.astype(int)

        image = image.resize(size, Image.LANCZOS)   # 插值算法，常用于下采样（放大）

    return np.float32(image)

def save_image(image, file_name):

    '''把数组转化成图像'''

    # 像素切割,类型转换

    image = np.clip(image, 0.0, 255.0).astype(np.uint8)

    # 存为图片

    with open(file_name, 'wb') as file:

        Image.fromarray(image).save(file, 'jpeg')

def plot_image(image):

    '''展示数组对应的图片'''

    image = np.clip(image, 0.0, 255.0).astype(np.uint8)

    plt.figure()

    plt.imshow(Image.fromarray(image))

    plt.show()

def plot_images(content_image, style_image, mixed_image):

    '''展示三张图片'''

    fig, axes = plt.subplots(1,3,figsize=(10,10))

    fig.subplots_adjust(hspace=0.1, wspace=0.1)

    ax = axes.flat[0]

    ax.imshow(content_image/255.0)

    ax.set_xlabel('Content')

    ax = axes.flat[1]

    ax.imshow(mixed_image/255.0)

    ax.set_xlabel('Mixed')

    ax = axes.flat[2]

    ax.imshow(style_image / 255.0)

    ax.set_xlabel('Style')

    # 删除坐标轴

    for ax in axes.flat:

        ax.set_xticks([])

        ax.set_yticks([])

    plt.show()

"""损失函数"""

def mean_squared_error(a, b):

    '''

    求两个张量的平方差

    :param a: 张量a，tf的数据格式

    :param b: 张量b，tf的数据格式

    :return: 平方差值

    '''

    return tf.reduce_mean(tf.square(a-b))

def create_content_loss(session, model, content_image, layer_ids):

    '''

    内容学习损失函数

    :param session: tf会话对象

    :param model: 模型对象

    :param content_image: 内容图片

    :param layer_ids: 模型层list

    :return: 损失函数值

    '''

    feed_dict = model.create_feed_dict(input_image=content_image)

    layers = model.get_layer_tensors(layer_ids)

    values = session.run(layers, feed_dict=feed_dict)

    with model.graph.as_default():

        layer_losses= []

        for value, layer in zip(values, layers):

            # print(layer, tf.constant(value))

            loss = mean_squared_error(layer, tf.constant(value))

            layer_losses.append(loss)

        total_loss = tf.reduce_mean(layer_losses)

        return total_loss

def gram_matrix(tensor):

    '''创建格拉姆矩阵，本质上就是风格层中激活特征向量的点乘矩阵'''

    shape = tensor.get_shape()

    factor = tf.reshape(tensor, shape=[-1, int(shape[3])])

    matrix = tf.matmul(factor, factor, transpose_a=True)

    return matrix

def create_style_loss(session, model, style_image, layer_ids):

    '''

    风格学习损失函数

    :param session: tf会话对象

    :param model: 模型对象

    :param style_image: 风格图片

    :param layer_ids: 模型层list

    :return:

    '''

    feed_dict = model.create_feed_dict(input_image=style_image)

    layers = model.get_layer_tensors(layer_ids)

    gram_layers = [gram_matrix(layer) for layer in layers]

    # print(np.asarray(session.run(layers,feed_dict)[0]).shape)

    with model.graph.as_default():

        values = session.run(gram_layers, feed_dict=feed_dict)

        layer_losses = []

        for value, gram_layer in zip(values, gram_layers):

            loss = mean_squared_error(gram_layer, tf.constant(value))

            layer_losses.append(loss)

        total_loss = tf.reduce_mean(layer_losses)

    return total_loss

def create_denoise_loss(model):

    '''混合图像去噪损失函数'''

    loss = tf.reduce_sum(tf.abs(model.input_tensor[:,1:,:,:] - model.input_tensor[:,:-1,:,:])) + \

           tf.reduce_sum(tf.abs(model.input_tensor[:,:,1:,:] - model.input_tensor[:,:,:-1,:]))

    return loss

"""风格迁移"""

def style_transfer(content_image, style_image,

                   content_layer_ids, style_layer_ids,

                   weight_content=1.5, weight_style=10.0, weight_denoise=0.3,

                   num_iterations=120, learning_rate=10.0):

    '''

    :param content_image: 内容学习对象

    :param style_image: 风格学习对象

    :param content_layer_ids: 内容特征提取层

    :param style_layer_ids: 风格特征提取层

    :param weight_content: 内容loss函数权重

    :param weight_style: 风格loss函数权重

    :param weight_denoise: 降噪loss函数权重

    :param num_iterations: 迭代次数

    :param learning_rate: 学习率

    :return: 最终输出图

    '''

    model = vgg16.VGG16()

    session = tf.Session(graph=model.graph)

    print('Content layers:\n',model.get_layer_names(content_layer_ids),'\n')

    print('Style layers:\n',model.get_layer_names(style_layer_ids),'\n')

    loss_content = create_content_loss(session, model, content_image, content_layer_ids)

    loss_style = create_style_loss(session, model, style_image, style_layer_ids)

    loss_denoise = create_denoise_loss(model)

    with model.graph.as_default():

        adj_content = tf.Variable(1e-10, name='adj_content')

        adj_style = tf.Variable(1e-10, name='adj_style')

        adj_denoise = tf.Variable(1e-10, name='adj_denoise')

        init = tf.global_variables_initializer()

    session.run(init)

    update_adj_content = adj_content.assign(1.0 / (loss_content+1e-10))

    update_adj_style = adj_style.assign(1.0 / (loss_style+1e-10))

    update_adj_denoise = adj_denoise.assign(1.0 / (loss_denoise+1e-10))

    loss = weight_content * adj_content * loss_content + \

           weight_style * adj_style * loss_style + \

           weight_denoise * adj_denoise * loss_denoise

    # tf.summary.scalar(loss, 'loss')

    # merge = tf.summary.merge_all()

    gradient = tf.gradients(loss, model.input_tensor)  # 计算梯度，并非使用optimizer类实现

    run_list = [gradient, update_adj_content, update_adj_style, update_adj_denoise]

    mixed_image = np.random.rand(*content_image.shape) + 128

    '''

    from PIL import Image

    plt.imshow(Image.fromarray(np.uint8(mixed_image)))

    plt.savefig('{0}.png'.format([-1]),format='png')

    '''

    mixed_images = []

    for i in range(num_iterations):

        feed_dict = model.create_feed_dict(mixed_image)

        # layers = model.get_layer_tensors(style_layer_ids)

        # gram_layers = [gram_matrix(layer) for layer in layers]

        # print(np.asarray(session.run(gram_layers,feed_dict)[0]).shape)

        grad, adj_content_val, adj_style_val, adj_denoise_val = session.run(run_list, feed_dict=feed_dict)

        lr_step = learning_rate / (np.std(grad)+1e-8)

        mixed_image -= np.squeeze(np.asarray(grad) * lr_step)  # <-------好多坑

        mixed_image = np.clip(mixed_image, 0.0, 255.0)

        print('.',end='')  # 每次迭代画一个点，模拟进度条

        if (i%10 == 0) or (i == num_iterations-1):

            print('\n', 'Iteration:', i)

            print('Weight Adj. for Content: {0:2e}, Style: {1:2e}, Denoise: {2:2e}'.format(

                adj_content_val, adj_style_val, adj_denoise_val))

            plot_images(content_image, style_image, mixed_image)

            mixed_images.append(mixed_image)

    print('\n', 'Final images:')

    plot_image(mixed_image)

    session.close()

    return mixed_images

if __name__=='__main__':

    content_filename = 'images/image_r.jpg'

    content_image = load_image(content_filename,max_size=None)

    style_filename = 'images/mosaic.jpg'

    style_image = load_image(style_filename,max_size=300)

    print('\nContent image size: \r ', content_image.shape,

          '\rStyle image size: \r ',style_image.shape, '\n')

    content_layer_ids = [4]

    style_layer_ids = list(range(13))

    img = style_transfer(content_image=content_image,

                         style_image=style_image,

                         content_layer_ids=content_layer_ids,

                         style_layer_ids=style_layer_ids,

                         weight_content=1.5,

                         weight_style=10.0,

                         weight_denoise=0.3,

                         num_iterations=300,

                         learning_rate=10.0)

"""

细节1：

这应该是个全卷积型网络，由于没有用到FC层所以不能肯定，但是至少feed部分没有限制尺寸，

斯格拉姆矩阵利用[固定深度C,不定长L].dot([不定长L,固定深度C])=[C,C],忽略两者feature尺寸的差距，所以理所当然的混合图

大小和内容图大小必须下相同。

细节2：

在loss函数建立的过程中，在静态的graph中动态的构建了新的op，之所以这么说是在静态loss函数op构建中使用了session得到了

中间的参量（也就是content和style的feature），而整个loss函数所在graph的目标输入是mixed。

细节3：

在def style_transfer()中有一处标注了坑，这里面有一点突发状况，新建的Variable节点被报错不存在于默认图中，经排查，这

是因为这几个节点和model类中的节点不存在依赖关系，而model中的graph不是默认graph，所以要添加上下文环境使之进入model的

graph中。

细节3：

loss函数整合时有这样的计算公式：权重*本次loss/上次loss，调整更新速度，感觉意义不大。

"""

"""

# 保存输出图像

from PIL import Image

for i in range(len(img)):

    image = Image.fromarray(np.uint8(img[i]))

    plt.imshow(image)

    plt.savefig('{0}.png'.format([i]),format='png')

"""

计算梯度函数注意一下，意外的方便，

tf.gradients(loss, model.input_tensor)  # 计算梯度，并非使用optimizer类实现

『cs231n』通过代码理解风格迁移

效果一般，我认为是风格层选的太多了，应该更多的注重高层的特征，减少底层的特征学习，否则学不出来漩涡的结构。

使用

"""

# 保存输出图像

from PIL import Image

for i in range(len(img)):

    image = Image.fromarray(np.uint8(img[i]))

    plt.imshow(image)

    plt.savefig('{0}.png'.format([i]),format='png')

"""

最近（17.9.19）使用这个内容做了课堂展示，所以进行了一点探究，几点总结如下：

『cs231n』通过代码理解风格迁移

实验室的电脑配的差不多了，尝试看看能不能得到更好的结果。

秒客网

『cs231n』通过代码理解风格迁移

文件目录

vgg16.py

二进制模型文件载入流程：

download.py

sys.stdout.write()

urllib.request.urlretrieve()：请求url的常用方法

解压函数：先读取，后创建

Style_transfer.py

相关文章