caffe添加python数据层

caffe添加python数据层（ImageData）

在caffe中添加自定义层时，必须要实现这四个函数，在C++中是（LayerSetUp，Reshape，Forward_cpu，Backward_cpu），在python 中是（setup，reshape，forward_cpu，backword_cpu）。

prototxt

layer {

  name: "data"

  type: "Python"

  top: "data"

  top: "label"

  include {

    phase: TRAIN

  }

  python_param {

    module: "src.data_layer.rank_layer_live" # 不能代目录形式

    layer: "DataLayer"

    param_str: " {\'pascal_root\': \'data\' ,\'split\': \'live_train\', \'im_shape\': [224, 224],\'batch_size\': 32}"

  }

}

layer {

  name: "data"

  type: "Python"

  top: "data"

  top: "label"

  include {

    phase: TEST

  }

  python_param {

    module: "src.data_layer.rank_layer_live"

    layer: "DataLayer"

    #batch_size: 160

    param_str: " {\'pascal_root\': \'data\' ,\'split\': \'live_test\', \'im_shape\': [224, 224],\'batch_size\': 32}"

  }

}

- 数据定义层：

import cv2

import sys

sys.path.append("/home/rjw/caffe/python")

import caffe

import numpy as np

import multiprocessing as mtp

import pdb

import os.path as osp

## 理解参考：https://blog.csdn.net/auto1993/article/details/78951849

class DataLayer(caffe.Layer):

    def setup(self, bottom, top):

        self._name_to_top_map = {}

        self._name_to_top_map['data'] = 0

        self._name_to_top_map['label'] = 1

        # === Read input parameters ===

        self.workers= mtp.Pool(10)

        # params is a python dictionary with layer parameters.

        params = eval(self.param_str)

        # Check the paramameters for validity.

        check_params(params)

        # store input as class variables

        self.batch_size = params['batch_size']

        self.pascal_root = params['pascal_root']

        self.im_shape = params['im_shape']

        # get list of image indexes.

        list_file = params['split'] + '.txt'

        filename = [line.rstrip('\n') for line in open(

            osp.join(self.pascal_root, list_file))]

        self._roidb = []

        self.scores =[]

        for i in filename:

            self._roidb.append(i.split()[0])

            self.scores.append(float(i.split()[1]))

        self._perm = None

        self._cur = 0

        self.num =0

        top[0].reshape(

            self.batch_size, 3, params['im_shape'][0], params['im_shape'][1])

        top[1].reshape(self.batch_size, 1)    

    def _get_next_minibatch_inds(self):

        """Return the roidb indices for the next minibatch."""

        db_inds = []

        dis = 4      # total number of distortions in live dataset

        batch = 2    # number of images for each distortion level

        level = 4    # distortion levels for each   mini_batch = level * dis_mini*batch

        #shuff = np.random.permutation(range(dis))

        Num = len(self.scores)/dis/level

        for k in range(dis):

            for i in range(level):

                temp = self.num

                for j in range(batch):

                    db_inds.append(len(self.scores)/dis*k+i*Num+temp)

                    temp = temp +1

        self.num = self.num+batch

        if Num-self.num<batch:

            self.num=0

        db_inds = np.asarray(db_inds)

        return db_inds

    def get_minibatch(self,minibatch_db):

        """Given a roidb, construct a minibatch sampled from it."""

        # Get the input image blob, formatted for caffe  

        jobs =self.workers.map(preprocess,minibatch_db)

        #print len(jobs)

        index = 0

        images_train = np.zeros([self.batch_size,3,224,224],np.float32)

        #pdb.set_trace()

        for index_job in range(len(jobs)):

            images_train[index,:,:,:] = jobs[index_job]

            index += 1 

        blobs = {'data': images_train}

        return blobs

    def forward(self, bottom, top):

        """Get blobs and copy them into this layer's top blob vector."""

        db_inds = self._get_next_minibatch_inds()

        minibatch_db = []

        for i in range(len(db_inds)):

            minibatch_db.append(self._roidb[int(db_inds[i])])

        #minibatch_db = [self._roidb[i] for i in db_inds]

        #print minibatch_db

        scores = []

        for i in range(len(db_inds)):

            scores.append(self.scores[int(db_inds[i])])

        blobs = self.get_minibatch(minibatch_db)

        blobs ['label'] =np.asarray(scores)

        for blob_name, blob in blobs.iteritems():

            top_ind = self._name_to_top_map[blob_name]

            # Reshape net's input blobs

            top[top_ind].reshape(*(blob.shape))

            # Copy data into net's input blobs

            top[top_ind].data[...] = blob.astype(np.float32, copy=False)

    def backward(self, top, propagate_down, bottom):

        """This layer does not propagate gradients."""

        pass

    def reshape(self, bottom, top):

        """Reshaping happens during the call to forward."""

        pass

def preprocess(data):

    sp = 224

    im = np.asarray(cv2.imread(data))

    x =  im.shape[0]

    y = im.shape[1]

    x_p = np.random.randint(x-sp,size=1)[0]

    y_p = np.random.randint(y-sp,size=1)[0]

    #print x_p,y_p

    images = im[x_p:x_p+sp,y_p:y_p+sp,:].transpose([2,0,1])

    #print images.shape

    return images

def check_params(params):

    """

    A utility function to check the parameters for the data layers.

    """

    assert 'split' in params.keys(

    ), 'Params must include split (train, val, or test).'

    required = ['batch_size', 'pascal_root', 'im_shape']

    for r in required:

        assert r in params.keys(), 'Params must include {}'.format(r)

秒客网

caffe添加python数据层

相关文章