首页 > 代码库 > caffe 中 python 数据层

caffe 中 python 数据层

caffe中大多数层用C++写成。 但是对于自己数据的输入要写对应的输入层,比如你要去图像中的一部分,不能用LMDB,或者你的label 需要特殊的标记。 这时候就需要用python 写一个输入层。


如在fcn 的voc_layers.py 中 有两个类:



分别包含:setup,reshape,forward, backward, load_image, load_label. 不需要backward 没有参数更新。

import caffeimport numpy as npfrom PIL import Imageimport randomclass VOCSegDataLayer(caffe.Layer):    """    Load (input image, label image) pairs from PASCAL VOC    one-at-a-time while reshaping the net to preserve dimensions.    Use this to feed data to a fully convolutional network.    """    def setup(self, bottom, top):        """        Setup data layer according to parameters:        - voc_dir: path to PASCAL VOC year dir        - split: train / val / test        - mean: tuple of mean values to subtract        - randomize: load in random order (default: True)        - seed: seed for randomization (default: None / current time)        for PASCAL VOC semantic segmentation.        example        params = dict(voc_dir="/path/to/PASCAL/VOC2011",            mean=(104.00698793, 116.66876762, 122.67891434),            split="val")        """        # config        params = eval(self.param_str)        self.voc_dir = params[‘voc_dir‘]        self.split = params[‘split‘]        self.mean = np.array(params[‘mean‘])        self.random = params.get(‘randomize‘, True)        self.seed = params.get(‘seed‘, None)        # two tops: data and label        if len(top) != 2:            raise Exception("Need to define two tops: data and label.")        # data layers have no bottoms        if len(bottom) != 0:            raise Exception("Do not define a bottom.")        # load indices for images and labels        split_f  = ‘{}/ImageSets/Segmentation/{}.txt‘.format(self.voc_dir,                self.split)        self.indices = open(split_f, ‘r‘).read().splitlines()        self.idx = 0        # make eval deterministic        if ‘train‘ not in self.split:            self.random = False        # randomization: seed and pick        if self.random:            random.seed(self.seed)            self.idx = random.randint(0, len(self.indices)-1)    def reshape(self, bottom, top):        # load image + label image pair        self.data = http://www.mamicode.com/self.load_image(self.indices[self.idx])"""        Load input image and preprocess for Caffe:        - cast to float        - switch channels RGB -> BGR        - subtract mean        - transpose to channel x height x width order        """        im = Image.open(‘{}/JPEGImages/{}.jpg‘.format(self.voc_dir, idx))        in_ = np.array(im, dtype=np.float32)        in_ = in_[:,:,::-1]        in_ -= self.mean        in_ = in_.transpose((2,0,1))        return in_    def load_label(self, idx):        """        Load label image as 1 x height x width integer array of label indices.        The leading singleton dimension is required by the loss.        """        im = Image.open(‘{}/SegmentationClass/{}.png‘.format(self.voc_dir, idx))        label = np.array(im, dtype=np.uint8)        label = label[np.newaxis, ...]        return labelclass SBDDSegDataLayer(caffe.Layer):    """    Load (input image, label image) pairs from the SBDD extended labeling    of PASCAL VOC for semantic segmentation    one-at-a-time while reshaping the net to preserve dimensions.    Use this to feed data to a fully convolutional network.    """    def setup(self, bottom, top):        """        Setup data layer according to parameters:        - sbdd_dir: path to SBDD `dataset` dir        - split: train / seg11valid        - mean: tuple of mean values to subtract        - randomize: load in random order (default: True)        - seed: seed for randomization (default: None / current time)        for SBDD semantic segmentation.        N.B.segv11alid is the set of segval11 that does not intersect with SBDD.        Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.        example        params = dict(sbdd_dir="/path/to/SBDD/dataset",            mean=(104.00698793, 116.66876762, 122.67891434),            split="valid")        """        # config        params = eval(self.param_str)        self.sbdd_dir = params[‘sbdd_dir‘]        self.split = params[‘split‘]        self.mean = np.array(params[‘mean‘])        self.random = params.get(‘randomize‘, True)        self.seed = params.get(‘seed‘, None)        # two tops: data and label        if len(top) != 2:            raise Exception("Need to define two tops: data and label.")        # data layers have no bottoms        if len(bottom) != 0:            raise Exception("Do not define a bottom.")        # load indices for images and labels        split_f  = ‘{}/{}.txt‘.format(self.sbdd_dir,                self.split)        self.indices = open(split_f, ‘r‘).read().splitlines()        self.idx = 0        # make eval deterministic        if ‘train‘ not in self.split:            self.random = False        # randomization: seed and pick        if self.random:            random.seed(self.seed)            self.idx = random.randint(0, len(self.indices)-1)    def reshape(self, bottom, top):        # load image + label image pair        self.data = http://www.mamicode.com/self.load_image(self.indices[self.idx])"""        Load input image and preprocess for Caffe:        - cast to float        - switch channels RGB -> BGR        - subtract mean        - transpose to channel x height x width order        """        im = Image.open(‘{}/img/{}.jpg‘.format(self.sbdd_dir, idx))        in_ = np.array(im, dtype=np.float32)        in_ = in_[:,:,::-1]        in_ -= self.mean        in_ = in_.transpose((2,0,1))        return in_    def load_label(self, idx):        """        Load label image as 1 x height x width integer array of label indices.        The leading singleton dimension is required by the loss.        """        import scipy.io        mat = scipy.io.loadmat(‘{}/cls/{}.mat‘.format(self.sbdd_dir, idx))        label = mat[‘GTcls‘][0][‘Segmentation‘][0].astype(np.uint8)        label = label[np.newaxis, ...]        return label


对于 最终的loss 层:

在prototxt 中定义的layer:

layer {  type: ‘Python‘  #python   name: ‘loss‘     # loss 层  top: ‘loss‘  bottom: ‘ipx‘  bottom: ‘ipy‘  python_param {       module: ‘pyloss‘          # 写在pyloss 文件中       layer: ‘EuclideanLossLayer‘    # 对应此类的名字  }  # set loss weight so Caffe knows this is a loss layer  loss_weight: 1}


loss 层的实现 :

import caffeimport numpy as npclass EuclideanLossLayer(caffe.Layer):    """    Compute the Euclidean Loss in the same manner as the C++ EuclideanLossLayer    to demonstrate the class interface for developing layers in Python.    """    def setup(self, bottom, top):# top是最后的loss, bottom 中有两个值,一个网络的输出, 一个是label。        # check input pair        if len(bottom) != 2:            raise Exception("Need two inputs to compute distance.")    def reshape(self, bottom, top):        # check input dimensions match        if bottom[0].count != bottom[1].count:            raise Exception("Inputs must have the same dimension.")        # difference is shape of inputs        self.diff = np.zeros_like(bottom[0].data, dtype=np.float32)        # loss output is scalar        top[0].reshape(1)    def forward(self, bottom, top):        self.diff[...] = bottom[0].data - bottom[1].data        top[0].data[...] = np.sum(self.diff**2) / bottom[0].num / 2.    def backward(self, top, propagate_down, bottom):        for i in range(2):            if not propagate_down[i]:                continue            if i == 0:                sign = 1            else:                sign = -1            bottom[i].diff[...] = sign * self.diff / bottom[i].num



caffe 中 python 数据层