diff --git a/models/caffenet_bbox_reg.prototxt b/models/caffenet_bbox_reg.prototxt new file mode 100644 index 0000000000000000000000000000000000000000..8a7e5ce53fa6af70b1d16ecbece075a03bf07c99 --- /dev/null +++ b/models/caffenet_bbox_reg.prototxt @@ -0,0 +1,312 @@ +name: "CaffeNet" +input: "data" +input_dim: 1 +input_dim: 3 +input_dim: 227 +input_dim: 227 +input: "rois" +input_dim: 1 # to be changed on-the-fly to num ROIs +input_dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing +input_dim: 1 +input_dim: 1 +input: "labels" +input_dim: 1 # to be changed on-the-fly to match num ROIs +input_dim: 1 +input_dim: 1 +input_dim: 1 +input: "bbox_targets" +input_dim: 1 # to be changed on-the-fly to match num ROIs +input_dim: 84 # 4 * K (=21) classes +input_dim: 1 +input_dim: 1 +input: "bbox_loss_weights" +input_dim: 1 # to be changed on-the-fly to match num ROIs +input_dim: 84 # 4 * K (=21) classes +input_dim: 1 +input_dim: 1 +layers { + name: "conv1" + type: CONVOLUTION + bottom: "data" + top: "conv1" + convolution_param { + num_output: 96 + kernel_size: 11 + stride: 4 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + name: "relu1" + type: RELU + bottom: "conv1" + top: "conv1" +} +layers { + name: "pool1" + type: POOLING + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layers { + name: "norm1" + type: LRN + bottom: "pool1" + top: "norm1" + lrn_param { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } +} +layers { + name: "conv2" + type: CONVOLUTION + bottom: "norm1" + top: "conv2" + convolution_param { + num_output: 256 + pad: 2 + kernel_size: 5 + group: 2 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "relu2" + type: RELU + bottom: "conv2" + top: "conv2" +} +layers { + name: "pool2" + type: POOLING + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layers { + name: "norm2" + type: LRN + bottom: "pool2" + top: "norm2" + lrn_param { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } +} +layers { + name: "conv3" + type: CONVOLUTION + bottom: "norm2" + top: "conv3" + convolution_param { + num_output: 384 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "relu3" + type: RELU + bottom: "conv3" + top: "conv3" +} +layers { + name: "conv4" + type: CONVOLUTION + bottom: "conv3" + top: "conv4" + convolution_param { + num_output: 384 + pad: 1 + kernel_size: 3 + group: 2 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "relu4" + type: RELU + bottom: "conv4" + top: "conv4" +} +layers { + name: "conv5" + type: CONVOLUTION + bottom: "conv4" + top: "conv5" + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + group: 2 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "relu5" + type: RELU + bottom: "conv5" + top: "conv5" +} +layers { + name: "roi_pool5" + type: ROI_POOLING + bottom: "conv5" + bottom: "rois" + top: "pool5" + roi_pooling_param { + pooled_w: 6 + pooled_h: 6 + } +} +layers { + name: "fc6" + type: INNER_PRODUCT + bottom: "pool5" + top: "fc6" + inner_product_param { + num_output: 4096 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "relu6" + type: RELU + bottom: "fc6" + top: "fc6" +} +layers { + name: "drop6" + type: DROPOUT + bottom: "fc6" + top: "fc6" + dropout_param { + dropout_ratio: 0.5 + } +} +layers { + name: "fc7" + type: INNER_PRODUCT + bottom: "fc6" + top: "fc7" + inner_product_param { + num_output: 4096 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "relu7" + type: RELU + bottom: "fc7" + top: "fc7" +} +layers { + name: "drop7" + type: DROPOUT + bottom: "fc7" + top: "fc7" + dropout_param { + dropout_ratio: 0.5 + } +} +layers { + name: "fc8_pascal" + type: INNER_PRODUCT + bottom: "fc7" + top: "fc8_pascal" + inner_product_param { + num_output: 21 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "fc8_pascal_bbox" + type: INNER_PRODUCT + bottom: "fc7" + top: "fc8_pascal_bbox" + inner_product_param { + num_output: 84 + weight_filler { + type: "gaussian" + std: 0.001 + } + bias_filler { + type: "constant" + value: 0 + } + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "fc8_pascal" + bottom: "labels" + top: "loss" +} +layers { + name: "loss_bbox" + type: SMOOTH_L1_LOSS + bottom: "fc8_pascal_bbox" + bottom: "bbox_targets" + bottom: "bbox_loss_weights" + top: "loss_bbox" + loss_weight: 1 +} diff --git a/models/caffenet_bbox_reg_deploy.prototxt b/models/caffenet_bbox_reg_deploy.prototxt new file mode 100644 index 0000000000000000000000000000000000000000..b72ebc3c8e77d825d15ba56e10a5f3188ef55a13 --- /dev/null +++ b/models/caffenet_bbox_reg_deploy.prototxt @@ -0,0 +1,287 @@ +name: "CaffeNet" +input: "data" +input_dim: 1 +input_dim: 3 +input_dim: 227 +input_dim: 227 +input: "rois" +input_dim: 1 # to be changed on-the-fly to num ROIs +input_dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing +input_dim: 1 +input_dim: 1 +layers { + name: "conv1" + type: CONVOLUTION + bottom: "data" + top: "conv1" + convolution_param { + num_output: 96 + kernel_size: 11 + stride: 4 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + name: "relu1" + type: RELU + bottom: "conv1" + top: "conv1" +} +layers { + name: "pool1" + type: POOLING + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layers { + name: "norm1" + type: LRN + bottom: "pool1" + top: "norm1" + lrn_param { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } +} +layers { + name: "conv2" + type: CONVOLUTION + bottom: "norm1" + top: "conv2" + convolution_param { + num_output: 256 + pad: 2 + kernel_size: 5 + group: 2 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "relu2" + type: RELU + bottom: "conv2" + top: "conv2" +} +layers { + name: "pool2" + type: POOLING + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layers { + name: "norm2" + type: LRN + bottom: "pool2" + top: "norm2" + lrn_param { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } +} +layers { + name: "conv3" + type: CONVOLUTION + bottom: "norm2" + top: "conv3" + convolution_param { + num_output: 384 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "relu3" + type: RELU + bottom: "conv3" + top: "conv3" +} +layers { + name: "conv4" + type: CONVOLUTION + bottom: "conv3" + top: "conv4" + convolution_param { + num_output: 384 + pad: 1 + kernel_size: 3 + group: 2 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "relu4" + type: RELU + bottom: "conv4" + top: "conv4" +} +layers { + name: "conv5" + type: CONVOLUTION + bottom: "conv4" + top: "conv5" + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + group: 2 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "relu5" + type: RELU + bottom: "conv5" + top: "conv5" +} +layers { + name: "roi_pool5" + type: ROI_POOLING + bottom: "conv5" + bottom: "rois" + top: "pool5" + roi_pooling_param { + pooled_w: 6 + pooled_h: 6 + } +} +layers { + name: "fc6" + type: INNER_PRODUCT + bottom: "pool5" + top: "fc6" + inner_product_param { + num_output: 4096 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "relu6" + type: RELU + bottom: "fc6" + top: "fc6" +} +layers { + name: "drop6" + type: DROPOUT + bottom: "fc6" + top: "fc6" + dropout_param { + dropout_ratio: 0.5 + } +} +layers { + name: "fc7" + type: INNER_PRODUCT + bottom: "fc6" + top: "fc7" + inner_product_param { + num_output: 4096 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "relu7" + type: RELU + bottom: "fc7" + top: "fc7" +} +layers { + name: "drop7" + type: DROPOUT + bottom: "fc7" + top: "fc7" + dropout_param { + dropout_ratio: 0.5 + } +} +layers { + name: "fc8_pascal" + type: INNER_PRODUCT + bottom: "fc7" + top: "fc8_pascal" + inner_product_param { + num_output: 21 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "fc8_pascal_bbox" + type: INNER_PRODUCT + bottom: "fc7" + top: "fc8_pascal_bbox" + inner_product_param { + num_output: 84 + weight_filler { + type: "gaussian" + std: 0.001 + } + bias_filler { + type: "constant" + value: 0 + } + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "prob" + type: SOFTMAX + bottom: "fc8_pascal" + top: "prob" +} diff --git a/models/caffenet_solver.prototxt b/models/caffenet_solver.prototxt new file mode 100644 index 0000000000000000000000000000000000000000..37012d40e998c4f1904b9b7ebbfe94f23b91c8bc --- /dev/null +++ b/models/caffenet_solver.prototxt @@ -0,0 +1,18 @@ +#train_net: "models/vgg16.prototxt" +train_net: "models/caffenet_bbox_reg.prototxt" +base_lr: 0.001 +lr_policy: "step" +gamma: 0.1 +stepsize: 30000 +display: 20 +average_loss: 100 +max_iter: 500000 +iter_size: 1 +momentum: 0.9 +weight_decay: 0.0005 +# We disable standard caffe solver snapshotting and implement our own snapshot +# function +snapshot: 0 +# We still use the snapshot prefix, though +snapshot_prefix: "snapshots/caffenet_fast_rcnn_randscale" +#debug_info: true