caffe SigmoidCrossEntropyLossLayer 理论代码学习

时间:2021-07-17 19:40:18

交叉熵损失函数

交叉熵损失函数的简单介绍的链接
下面我们就介绍一下caffe里面实现交叉熵的流程:
首先:下面这个式子就是交叉熵的损失表达式
E=1nn=1n[pnlogp^n+(1pn)log(1p^n)]
SigmoidCrossEntropyLossLayer的输入bottom[0],bottom[1],其中bottom[0]是输入的预测的结果,bottom[1]是标签值。bottom的维度都是 (N×C×H×W) ,bottom的表示符号是x, x[,+] p^n=σ(xn)[0,1] ,bottom[1]是 p[0,1] ,输出的loss维度是 (1×1×1×1)
σ(xn)=11+exn
反向传播的导数:
Exn=Ep^np^nxn=1N(pn1p^n1pn1p^n)(p^n(1p^n))=1N(p^npn)
其中caffe里面计算loss的代码看起来有点跟表达式不相像可以参考:
代码:

  Dtype loss = 0;
for (int i = 0; i < count; ++i) {
loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
}

top[0]->mutable_cpu_data()[0] = loss / num;

caffe loss理解

图片引用自链接:
caffe  SigmoidCrossEntropyLossLayer 理论代码学习

介绍完理论和注意的内容,接下来就贴代码和一些注释:

SigmoidCrossEntropyLossLayer的定义
template <typename Dtype>
class SigmoidCrossEntropyLossLayer : public LossLayer<Dtype> {
public:
explicit SigmoidCrossEntropyLossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param),
sigmoid_layer_(new SigmoidLayer<Dtype>(param)),
sigmoid_output_(new Blob<Dtype>()) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "SigmoidCrossEntropyLoss"; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

/// The internal SigmoidLayer used to map predictions to probabilities.
shared_ptr<SigmoidLayer<Dtype> > sigmoid_layer_;//用于生成生成sigmoid结果
/// sigmoid_output stores the output of the SigmoidLayer.
shared_ptr<Blob<Dtype> > sigmoid_output_;//指向sigmoid的输出
/// bottom vector holder to call the underlying SigmoidLayer::Forward
vector<Blob<Dtype>*> sigmoid_bottom_vec_;//sigmoid的输入
/// top vector holder to call the underlying SigmoidLayer::Forward
vector<Blob<Dtype>*> sigmoid_top_vec_; //sigmoid的输出
};
SigmoidCrossEntropyLossLayer类的成员函数实现
template <typename Dtype>
void SigmoidCrossEntropyLossLayer<Dtype>::LayerSetUp(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::LayerSetUp(bottom, top);
sigmoid_bottom_vec_.clear();
sigmoid_bottom_vec_.push_back(bottom[0]);
sigmoid_top_vec_.clear();
sigmoid_top_vec_.push_back(sigmoid_output_.get());
sigmoid_layer_->SetUp(sigmoid_bottom_vec_, sigmoid_top_vec_);
}

template <typename Dtype>
void SigmoidCrossEntropyLossLayer<Dtype>::Reshape(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::Reshape(bottom, top);
CHECK_EQ(bottom[0]->count(), bottom[1]->count()) <<
"SIGMOID_CROSS_ENTROPY_LOSS layer inputs must have the same count.";
sigmoid_layer_->Reshape(sigmoid_bottom_vec_, sigmoid_top_vec_);
}

template <typename Dtype>
void SigmoidCrossEntropyLossLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
// The forward pass computes the sigmoid outputs.
sigmoid_bottom_vec_[0] = bottom[0];
sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_);
// Compute the loss (negative log likelihood)
const int count = bottom[0]->count();
const int num = bottom[0]->num();
// Stable version of loss computation from input data
const Dtype* input_data = bottom[0]->cpu_data();
const Dtype* target = bottom[1]->cpu_data();
Dtype loss = 0;
for (int i = 0; i < count; ++i) {
loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
}
top[0]->mutable_cpu_data()[0] = loss / num;
}

template <typename Dtype>
void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu(
const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[1]) {
LOG(FATAL) << this->type()
<< " Layer cannot backpropagate to label inputs.";
}
if (propagate_down[0]) {
// First, compute the diff
const int count = bottom[0]->count();
const int num = bottom[0]->num();
const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();
const Dtype* target = bottom[1]->cpu_data();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
caffe_sub(count, sigmoid_output_data, target, bottom_diff);
// Scale down gradient
const Dtype loss_weight = top[0]->cpu_diff()[0];
caffe_scal(count, loss_weight / num, bottom_diff);
}
}