在caffe中定义新层时如何获得学习率或迭代次数

Question

我想在迭代次数达到一定次数后更改损失层中的损失计算方法
为了实现它，我想我需要获得当前的学习率或迭代次数，然后我使用 if 短语来选择是否改变损失计算方法。

Answer 1

据我所知，无法从 python 层内直接访问求解器的迭代次数和学习率。
但是，您可以保留自己的计数器

import caffe

class IterCounterLossLayer(caffe.Layer):
def setup(self, bottom, top):
  # do your setup here...
  self.iter_counter = 0  # setup a counter

def reshape(self, bottom, top):
  # reshape code here...
  # loss output is scalar
  top[0].reshape(1)

def forward(self, bottom, top):
  if self.iter_counter < 1000:
    # some way of computing the loss
    # ...
  else:
    # another way
    # ...
  self.iter_counter += 1  # increment, you may consider incrementing by bottom[0].shape[0] the batch size...

def backward(self, top, propagate_down, bottom):
  if self.iter_counter < 1000:
    # gradients need to fit the loss
    # ...
  else:
    # another way
    # ...

Answer 2

你可以在Caffe中添加一个成员变量class来保存当前的学习率或迭代次数，并在你需要的层中访问。

例如，要获得所需的当前迭代时间，您需要进行 3 个关键修改（为了简化）：

在common.hpp:

  class Caffe {
    public:
      static Caffe& Get();

      ...//Some other public members

      //Returns the current iteration times
      inline static int current_iter() { return Get().cur_iter_; }
      //Sets the current iteration times
      inline static void set_cur_iter(int iter) { Get().cur_iter_ = iter; }

    protected:

      //The variable to save the current itertion times
      int cur_iter_;

      ...//Some other protected members
  }

在solver.cpp:

  template <typename Dtype>
  void Solver<Dtype>::Step(int iters) {

    ...

    while (iter_ < stop_iter) {
      Caffe::set_cur_iter(iter_ );
      ...//Left Operations
    }
  }

要访问当前迭代次数的地方：

  template <typename Dtype>
  void SomeLayer<Dtype>::some_func() {
    int current_iter = Caffe::current_iter();
    ...//Operations you want
  }

Answer 3

要进行迭代，您可以使用我的 count_layer 作为自定义层的底层，您可以从以下方面受益：

当您使用权重进行微调时，迭代次数将从您保存的权重继续。
采用模块化实施。
无需更改现有的 caffe 代码。

train_val.prototxt

layer {
  name: "iteration"
  top: "iteration"
  type: "Count"
}

count_layer.hpp

#ifndef CAFFE_COUNT_LAYER_HPP_
#define CAFFE_COUNT_LAYER_HPP_

#include <vector>

#include "caffe/common.hpp"
#include "caffe/filler.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
class CountLayer : public Layer<Dtype> {
 public:
   explicit CountLayer(const LayerParameter& param)
     : Layer<Dtype>(param), delta_(1) {}
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {

    if (this->blobs_.size() > 0) {
      LOG(INFO) << "Skipping parameter initialization";
    } else {
      this->blobs_.resize(1);
      this->blobs_[0].reset(new Blob<Dtype>());
      if (this->layer_param_.count_param().has_shape()){
        this->blobs_[0]->Reshape(this->layer_param_.count_param().shape());
      } else{
        this->blobs_[0]->Reshape(vector<int>{1, 1});
      }
      shared_ptr<Filler<Dtype> > base_filler(GetFiller<Dtype>(
        this->layer_param_.count_param().base_filler()));
      base_filler->Fill(this->blobs_[0].get());
    }
    top[0]->Reshape(this->blobs_[0]->shape());

    string name = this->layer_param().name();
    if (name == ""){
      name = "Count";
    }
    if (this->layer_param_.param_size() <= 0){
      LOG(INFO) << "Layer " << name << "'s decay_mult has been set to 0";
      this->layer_param_.add_param()->set_decay_mult(Dtype(0));
    } else if (!this->layer_param_.param(0).has_decay_mult()){
      LOG(INFO) << "Layer " << name << "'s decay_mult has been set to 0";
      this->layer_param_.mutable_param(0)->set_decay_mult(0);
    } 

    delta_ = Dtype(this->layer_param_.count_param().delta());
    //this make top start from base and make finetune correct
    caffe_add_scalar(this->blobs_[0]->count(), -delta_, this->blobs_[0]->mutable_cpu_data()); 
  }
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) { }
  virtual inline const char* type() const { return "Parameter"; }
  virtual inline int ExactNumBottomBlobs() const { return 0; }
  virtual inline int ExactNumTopBlobs() const { return 1; }

 protected:
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
    caffe_add_scalar(this->blobs_[0]->count(), delta_, this->blobs_[0]->mutable_cpu_data());
    top[0]->ShareData(*(this->blobs_[0]));
    top[0]->ShareDiff(*(this->blobs_[0]));
  }
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)
  { 
  }

  private:
    Dtype delta_;
};

}  // namespace caffe

#endif

caffe_layer.cpp

#include "caffe/layers/count_layer.hpp"

namespace caffe {

INSTANTIATE_CLASS(CountLayer);
REGISTER_LAYER_CLASS(Count);

}  // namespace caffe

caffe.proto

optional CountParameter count_param = 666;
...
message CountParameter {
  optional BlobShape shape = 1;
  optional FillerParameter base_filler = 2; // The filler for the base
  optional float delta = 3 [default = 1];
}

在caffe中定义新层时如何获得学习率或迭代次数

how to get learning rate or iteration times when define new layer in caffe

machine-learning

neural-network

deep-learning

caffe