Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Training cannot converge when use 5 conv layers #8

Open
unfir225 opened this issue Dec 12, 2016 · 0 comments
Open

Training cannot converge when use 5 conv layers #8

unfir225 opened this issue Dec 12, 2016 · 0 comments

Comments

@unfir225
Copy link

I'm trying to train a model with 5 conv layers from AlexNet model. And I followed src/fast_dbox_config.py, using a yaml file to setting DEDUP_BOXES=0.0625(1/16). Other settings are using default config. Using pretrained model imagenet_models/CaffeNet.v2.caffemodel for 5 conv layers.
But the the loss cannot converge.

Here is my train prototxt

name: "CaffeNet"
#COCO dataset needs to change the bbox regression layer
input: "data"
input_shape {
dim: 1
dim: 3
dim: 140
dim: 140
}
input: "rois"
input_shape {
dim: 1 # to be changed on-the-fly to num ROIs
dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
dim: 1
dim: 1
}
input: "labels"
input_shape {
dim: 1 # to be changed on-the-fly to match num ROIs
dim: 1
dim: 1
dim: 1
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "norm1"
type: "LRN"
bottom: "conv1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "norm1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "norm2"
type: "LRN"
bottom: "conv2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "norm2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "roi_pool2_0"
type: "ROIPooling"
bottom: "conv5"
bottom: "rois"
top: "pool2_0"
roi_pooling_param {
pooled_w: 6
pooled_h: 6
spatial_scale: 0.125 # 1/16
}
}
layer {
name: "fc3"
type: "InnerProduct"
bottom: "pool2_0"
top: "fc3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1024
}
}
layer {
name: "relu3_0"
type: "ReLU"
bottom: "fc3"
top: "fc3"
}
layer {
name: "drop3"
type: "Dropout"
bottom: "fc3"
top: "fc3"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "cls_score"
type: "InnerProduct"
bottom: "fc3"
top: "cls_score"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "loss_cls"
type: "SoftmaxWithLoss"
bottom: "cls_score"
bottom: "labels"
top: "loss_cls"
loss_weight: 1
}

# for free to join this conversation on GitHub. Already have an account? # to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant