Skip to content

Commit

Permalink
Merge pull request #144 from lukeyeager/nvidia/detectnet-layers
Browse files Browse the repository at this point in the history
DetectNet layers and example
  • Loading branch information
lukeyeager committed May 27, 2016
2 parents 2f0fbed + c6fbdfd commit 959dee8
Show file tree
Hide file tree
Showing 15 changed files with 4,712 additions and 0 deletions.
2,546 changes: 2,546 additions & 0 deletions examples/kitti/detectnet_network.prototxt

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions examples/kitti/detectnet_solver.prototxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# DetectNet solver
net: "examples/kitti/detectnet_network.prototxt"
test_state { stage: "val" }

max_iter: 38000 # ~60 train epochs
test_interval: 1280 # ~2 train epochs
test_iter: 185 # 1 test epoch
snapshot: 1280 # ~2 train epochs
snapshot_prefix: "examples/kitti/detectnet_snapshot"
display: 160

solver_type: ADAM
base_lr: 0.0002
lr_policy: "step"
stepsize: 12000
gamma: 0.1
momentum: 0.9
momentum2: 0.999
weight_decay: 1e-08

solver_mode: GPU
1 change: 1 addition & 0 deletions examples/kitti/detectnet_time.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
build/tools/caffe time -model examples/kitti/detectnet_network.prototxt $@
4 changes: 4 additions & 0 deletions examples/kitti/detectnet_train.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
build/tools/caffe train \
-solver examples/kitti/detectnet_solver.prototxt \
-weights models/bvlc_googlenet/bvlc_googlenet.caffemodel \
$@
155 changes: 155 additions & 0 deletions include/caffe/layers/detectnet_transform_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#ifndef DETECTNET_TRANSFORMATION_HPP
#define DETECTNET_TRANSFORMATION_HPP

#include <boost/array.hpp>

#include <opencv2/core/core.hpp>
#include <opencv2/opencv.hpp>

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

template<typename Dtype>
class CoverageGenerator;

template<typename Dtype>
struct BboxLabel_;

struct AugmentSelection;

/**
* @brief Applies common transformations to the input data, such as
* scaling, mirroring, substracting the image mean...
*/
template <typename Dtype>
class DetectNetTransformationLayer : public Layer<Dtype> {
public:
typedef cv::Size2i Size2i;
typedef cv::Size_<Dtype> Size2v;
typedef cv::Point2i Point2i;
typedef cv::Point_<Dtype> Point2v;
typedef cv::Rect Rect;
typedef cv::Rect_<Dtype> Rectv;
typedef cv::Vec<Dtype, 3> Vec3v;
typedef cv::Mat_<cv::Vec<Dtype, 1> > Mat1v;
typedef cv::Mat_<Vec3v> Mat3v;
typedef BboxLabel_<Dtype> BboxLabel;

explicit DetectNetTransformationLayer(const LayerParameter& param);

virtual ~DetectNetTransformationLayer() {}

virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "DetectNetTransformation"; }
virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 2; }

protected:
virtual void Forward_cpu(
const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);


virtual void Backward_cpu(
const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {}

void transform(
const Mat3v& inputImage,
const vector<BboxLabel>& inputBboxes,
Mat3v* outputImage,
Dtype* outputLabel);


/**
* @return a Dtype from [0..1].
*/
Dtype randDouble();

bool augmentation_flip(
const Mat3v& img,
Mat3v* img_aug,
const vector<BboxLabel>& bboxlist,
vector<BboxLabel>*);
float augmentation_rotate(
const Mat3v& img_src,
Mat3v* img_aug,
const vector<BboxLabel>& bboxlist,
vector<BboxLabel>*);
float augmentation_scale(
const Mat3v& img,
Mat3v* img_temp,
const vector<BboxLabel>& bboxlist,
vector<BboxLabel>*);
void transform_scale(
const Mat3v& img,
Mat3v* img_temp,
const vector<BboxLabel>& bboxList,
vector<BboxLabel>* bboxList_aug,
const Size2i& size);
Point2i augmentation_crop(
const Mat3v& img_temp,
Mat3v* img_aug,
const vector<BboxLabel>& bboxlist,
vector<BboxLabel>*);

void transform_crop(
const Mat3v& img_temp,
Mat3v* img_aug,
const vector<BboxLabel>& bboxlist,
vector<BboxLabel>* bboxlist_aug,
Rect inner,
Size2i outer_area,
Point2i outer_offset) const;

float augmentation_hueRotation(
const Mat3v& img,
Mat3v* result);

float augmentation_desaturation(
const Mat3v& img,
Mat3v* result);

Mat1v getTransformationMatrix(Rect region, Dtype rotation) const;
Rect getBoundingRect(Rect region, Dtype rotation) const;
void matToBlob(const Mat3v& source, Dtype* destination) const;
void matsToBlob(const vector<Mat3v>& source, Blob<Dtype>* destination) const;
vector<Mat3v> blobToMats(const Blob<Dtype>& image) const;
vector<vector<BboxLabel> > blobToLabels(const Blob<Dtype>& labels) const;
Mat3v dataToMat(
const Dtype* _data,
Size2i dimensions) const;
void retrieveMeanImage(Size2i dimensions = Size2i());
void retrieveMeanChannels();

void meanSubtract(Mat3v* source) const;
void pixelMeanSubtraction(Mat3v* source) const;
void channelMeanSubtraction(Mat3v* source) const;

DetectNetAugmentationParameter a_param_;
DetectNetGroundTruthParameter g_param_;
TransformationParameter t_param_;

shared_ptr<CoverageGenerator<Dtype> > coverage_;

Phase phase_;

Mat3v data_mean_;
boost::array<Dtype, 3> mean_values_;
shared_ptr<Caffe::RNG> rng_;
};

} // namespace caffe

#endif /* DETECTNET_TRANSFORMATION_HPP */
51 changes: 51 additions & 0 deletions include/caffe/layers/l1_loss_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#ifndef CAFFE_L1_LOSS_LAYER_HPP_
#define CAFFE_L1_LOSS_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

#include "caffe/layers/loss_layer.hpp"

namespace caffe {

/*
* L1Loss
*/
template <typename Dtype>
class L1LossLayer : public LossLayer<Dtype> {
public:
explicit L1LossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param), diff_() {}
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "L1Loss"; }
/**
* Unlike most loss layers, in the L1LossLayer we can backpropagate
* to both inputs -- override to return true and always allow force_backward.
*/
virtual inline bool AllowForceBackward(const int bottom_index) const {
return true;
}

protected:
/// @copydoc L1LossLayer
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
// virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
// const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
// virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
// const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

Blob<Dtype> diff_;
Blob<Dtype> sign_;
};

} // namespace caffe

#endif // CAFFE_L1_LOSS_LAYER_HPP_
Loading

0 comments on commit 959dee8

Please # to comment.