在caffe中定义新层时如何获得学习率或迭代次数

how to get learning rate or iteration times when define new layer in caffe

我想在迭代次数达到一定次数后更改损失层中的损失计算方法
为了实现它,我想我需要获得当前的学习率或迭代次数,然后我使用 if 短语来选择是否改变损失计算方法。

据我所知,无法从 python 层内直接访问求解器的迭代次数和学习率。
但是,您可以保留自己的计数器

import caffe

class IterCounterLossLayer(caffe.Layer):
def setup(self, bottom, top):
  # do your setup here...
  self.iter_counter = 0  # setup a counter

def reshape(self, bottom, top):
  # reshape code here...
  # loss output is scalar
  top[0].reshape(1)

def forward(self, bottom, top):
  if self.iter_counter < 1000:
    # some way of computing the loss
    # ...
  else:
    # another way
    # ...
  self.iter_counter += 1  # increment, you may consider incrementing by bottom[0].shape[0] the batch size...

def backward(self, top, propagate_down, bottom):
  if self.iter_counter < 1000:
    # gradients need to fit the loss
    # ...
  else:
    # another way
    # ...

你可以在Caffe中添加一个成员变量class来保存当前的学习率或迭代次数,并在你需要的层中访问。

例如,要获得所需的当前迭代时间,您需要进行 3 个关键修改(为了简化):

  1. common.hpp:

      class Caffe {
        public:
          static Caffe& Get();
    
          ...//Some other public members
    
          //Returns the current iteration times
          inline static int current_iter() { return Get().cur_iter_; }
          //Sets the current iteration times
          inline static void set_cur_iter(int iter) { Get().cur_iter_ = iter; }
    
        protected:
    
          //The variable to save the current itertion times
          int cur_iter_;
    
          ...//Some other protected members
      }
    
  2. solver.cpp:

      template <typename Dtype>
      void Solver<Dtype>::Step(int iters) {
    
        ...
    
        while (iter_ < stop_iter) {
          Caffe::set_cur_iter(iter_ );
          ...//Left Operations
        }
      }
    
  3. 要访问当前迭代次数的地方:

      template <typename Dtype>
      void SomeLayer<Dtype>::some_func() {
        int current_iter = Caffe::current_iter();
        ...//Operations you want
      }
    

要进行迭代,您可以使用我的 count_layer 作为自定义层的底层,您可以从以下方面受益:

  1. 当您使用权重进行微调时,迭代次数将从您保存的权重继续。
  2. 采用模块化实施。
  3. 无需更改现有的 caffe 代码。

train_val.prototxt

layer {
  name: "iteration"
  top: "iteration"
  type: "Count"
}

count_layer.hpp

#ifndef CAFFE_COUNT_LAYER_HPP_
#define CAFFE_COUNT_LAYER_HPP_

#include <vector>

#include "caffe/common.hpp"
#include "caffe/filler.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
class CountLayer : public Layer<Dtype> {
 public:
   explicit CountLayer(const LayerParameter& param)
     : Layer<Dtype>(param), delta_(1) {}
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {

    if (this->blobs_.size() > 0) {
      LOG(INFO) << "Skipping parameter initialization";
    } else {
      this->blobs_.resize(1);
      this->blobs_[0].reset(new Blob<Dtype>());
      if (this->layer_param_.count_param().has_shape()){
        this->blobs_[0]->Reshape(this->layer_param_.count_param().shape());
      } else{
        this->blobs_[0]->Reshape(vector<int>{1, 1});
      }
      shared_ptr<Filler<Dtype> > base_filler(GetFiller<Dtype>(
        this->layer_param_.count_param().base_filler()));
      base_filler->Fill(this->blobs_[0].get());
    }
    top[0]->Reshape(this->blobs_[0]->shape());

    string name = this->layer_param().name();
    if (name == ""){
      name = "Count";
    }
    if (this->layer_param_.param_size() <= 0){
      LOG(INFO) << "Layer " << name << "'s decay_mult has been set to 0";
      this->layer_param_.add_param()->set_decay_mult(Dtype(0));
    } else if (!this->layer_param_.param(0).has_decay_mult()){
      LOG(INFO) << "Layer " << name << "'s decay_mult has been set to 0";
      this->layer_param_.mutable_param(0)->set_decay_mult(0);
    } 

    delta_ = Dtype(this->layer_param_.count_param().delta());
    //this make top start from base and make finetune correct
    caffe_add_scalar(this->blobs_[0]->count(), -delta_, this->blobs_[0]->mutable_cpu_data()); 
  }
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) { }
  virtual inline const char* type() const { return "Parameter"; }
  virtual inline int ExactNumBottomBlobs() const { return 0; }
  virtual inline int ExactNumTopBlobs() const { return 1; }

 protected:
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
    caffe_add_scalar(this->blobs_[0]->count(), delta_, this->blobs_[0]->mutable_cpu_data());
    top[0]->ShareData(*(this->blobs_[0]));
    top[0]->ShareDiff(*(this->blobs_[0]));
  }
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)
  { 
  }

  private:
    Dtype delta_;
};

}  // namespace caffe

#endif

caffe_layer.cpp

#include "caffe/layers/count_layer.hpp"

namespace caffe {

INSTANTIATE_CLASS(CountLayer);
REGISTER_LAYER_CLASS(Count);

}  // namespace caffe

caffe.proto

optional CountParameter count_param = 666;
...
message CountParameter {
  optional BlobShape shape = 1;
  optional FillerParameter base_filler = 2; // The filler for the base
  optional float delta = 3 [default = 1];
}