diff --git a/deep_pyramid.py b/deep_pyramid.py index 678f5e4fab4a53758123b285a22c6c655af7b6a9..f514f7ed33a7b32926889b0d6054ff645d176472 100644 --- a/deep_pyramid.py +++ b/deep_pyramid.py @@ -51,7 +51,7 @@ class DeepPyramid(object): return caffe_input def get_feat_pyramid(self, im): - im_pyra = self.get_image_pyramid(im) + im_pyra, _ = self.get_image_pyramid(im) feat_pyra = [] for i in xrange(self.num_levels): caffe_input = im_pyra[i].astype(np.float32) @@ -69,7 +69,7 @@ class DeepPyramid(object): return feat_pyra def get_feat_pyramid_batch(self, im): - im_pyra = self.get_image_pyramid(im) + im_pyra, _ = self.get_image_pyramid(im) caffe_input = self.image_pyramid_to_batch(im_pyra) base_shape = im_pyra[0].shape self.net.blobs['data'].reshape(self.num_levels, base_shape[2], diff --git a/fast_rcnn_config.py b/fast_rcnn_config.py index e87f5e092f62c014a1f10568d3b136b1f5e868fe..808fbbc59f975ec7b601786a265f302a91b1c881 100644 --- a/fast_rcnn_config.py +++ b/fast_rcnn_config.py @@ -1,9 +1,17 @@ import numpy as np # Scales used in the SPP-net paper -SCALES = (480, 576, 688, 864, 1200) +# SCALES = (480, 576, 688, 864, 1200) +# SCALES = (480, 576, 688, 864, 1000) +SCALES = (480, 576, 600) +# Max pixel size of a scaled input image +# MAX_SIZE = 2000 +# MAX_SIZE = 1200 +MAX_SIZE = 1000 +# Images per batch +IMS_PER_BATCH = 2 # 4 # Minibatch size -BATCH_SIZE = 128 +BATCH_SIZE = 128 # 128 # Fraction of minibatch that is foreground labeled (class > 0) FG_FRACTION = 0.25 # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) @@ -16,5 +24,3 @@ BG_THRESH_LO = 0.1 PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) # Stride in input image pixels at ROI pooling level FEAT_STRIDE = 16 -# Max pixel size of a scaled input image -MAX_SIZE = 2000 diff --git a/finetuning.py b/finetuning.py index 5ae657452de8c4c4978c833d67a23ecdfc188392..8fbc076628f6453ee57083af068c574483d6dcde 100644 --- a/finetuning.py +++ b/finetuning.py @@ -92,7 +92,8 @@ def get_minibatch(window_db, random_flip=False): random_scale_inds = \ np.random.randint(0, high=len(conf.SCALES), size=num_images) assert(conf.BATCH_SIZE % num_images == 0), \ - 'num_images must divide BATCH_SIZE' + 'num_images ({}) must divide BATCH_SIZE ({})'.format(num_images, + conf.BATCH_SIZE) rois_per_image = conf.BATCH_SIZE / num_images fg_rois_per_image = np.round(conf.FG_FRACTION * rois_per_image) # Get the input blob, formatted for caffe @@ -119,8 +120,8 @@ def get_minibatch(window_db, random_flip=False): assert((feat_rois[:, 2] >= feat_rois[:, 0]).all()) assert((feat_rois[:, 3] >= feat_rois[:, 1]).all()) assert((feat_rois >= 0).all()) - assert((feat_rois < np.max(im_blob.shape[2:4]) * - im_scale_factors[im_i] / conf.FEAT_STRIDE).all()) + # assert((feat_rois < np.max(im_blob.shape[2:4]) * + # im_scale_factors[im_i] / conf.FEAT_STRIDE).all()) rois_blob_this_image = \ np.append(im_i * np.ones((feat_rois.shape[0], 1)), feat_rois, axis=1) diff --git a/model-defs/pyramid.prototxt b/model-defs/pyramid.prototxt index 83c980a06e6b48dc40f1eb3f3f5bceba003f0663..8d1fa73ad6bed887b2fb05137a4406884c0851f5 100644 --- a/model-defs/pyramid.prototxt +++ b/model-defs/pyramid.prototxt @@ -1,9 +1,9 @@ name: "CaffeNet" input: "data" -input_dim: 7 +input_dim: 1 input_dim: 3 -input_dim: 1713 -input_dim: 1713 +input_dim: 227 +input_dim: 227 input: "rois" input_dim: 1 # to be changed on-the-fly input_dim: 5 # [level, x1, y1, x2, y2] zero-based indexing @@ -270,4 +270,8 @@ layers { type: SOFTMAX_LOSS bottom: "fc8_pascal" bottom: "labels" + top: "loss" + #softmax_param { + # margin: 1 + #} } diff --git a/model-defs/vgg16.prototxt b/model-defs/vgg16.prototxt new file mode 100644 index 0000000000000000000000000000000000000000..e913d78fb0398cc16ff2db9a5351860fa5627f01 --- /dev/null +++ b/model-defs/vgg16.prototxt @@ -0,0 +1,444 @@ +name: "VGG_ILSVRC_16_layers" +input: "data" +input_dim: 1 +input_dim: 3 +input_dim: 224 +input_dim: 224 +input: "rois" +input_dim: 1 # to be changed on-the-fly +input_dim: 5 # [level, x1, y1, x2, y2] zero-based indexing +input_dim: 1 +input_dim: 1 +input: "labels" +input_dim: 1 # to be changed on-the-fly +input_dim: 1 +input_dim: 1 +input_dim: 1 +layers { + bottom: "data" + top: "conv1_1" + name: "conv1_1" + type: CONVOLUTION + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv1_1" + top: "conv1_1" + name: "relu1_1" + type: RELU +} +layers { + bottom: "conv1_1" + top: "conv1_2" + name: "conv1_2" + type: CONVOLUTION + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv1_2" + top: "conv1_2" + name: "relu1_2" + type: RELU +} +layers { + bottom: "conv1_2" + top: "pool1" + name: "pool1" + type: POOLING + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layers { + bottom: "pool1" + top: "conv2_1" + name: "conv2_1" + type: CONVOLUTION + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv2_1" + top: "conv2_1" + name: "relu2_1" + type: RELU +} +layers { + bottom: "conv2_1" + top: "conv2_2" + name: "conv2_2" + type: CONVOLUTION + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv2_2" + top: "conv2_2" + name: "relu2_2" + type: RELU +} +layers { + bottom: "conv2_2" + top: "pool2" + name: "pool2" + type: POOLING + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layers { + bottom: "pool2" + top: "conv3_1" + name: "conv3_1" + type: CONVOLUTION + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + bottom: "conv3_1" + top: "conv3_1" + name: "relu3_1" + type: RELU +} +layers { + bottom: "conv3_1" + top: "conv3_2" + name: "conv3_2" + type: CONVOLUTION + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + bottom: "conv3_2" + top: "conv3_2" + name: "relu3_2" + type: RELU +} +layers { + bottom: "conv3_2" + top: "conv3_3" + name: "conv3_3" + type: CONVOLUTION + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + bottom: "conv3_3" + top: "conv3_3" + name: "relu3_3" + type: RELU +} +layers { + bottom: "conv3_3" + top: "pool3" + name: "pool3" + type: POOLING + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layers { + bottom: "pool3" + top: "conv4_1" + name: "conv4_1" + type: CONVOLUTION + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + bottom: "conv4_1" + top: "conv4_1" + name: "relu4_1" + type: RELU +} +layers { + bottom: "conv4_1" + top: "conv4_2" + name: "conv4_2" + type: CONVOLUTION + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + bottom: "conv4_2" + top: "conv4_2" + name: "relu4_2" + type: RELU +} +layers { + bottom: "conv4_2" + top: "conv4_3" + name: "conv4_3" + type: CONVOLUTION + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + bottom: "conv4_3" + top: "conv4_3" + name: "relu4_3" + type: RELU +} +layers { + bottom: "conv4_3" + top: "pool4" + name: "pool4" + type: POOLING + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layers { + bottom: "pool4" + top: "conv5_1" + name: "conv5_1" + type: CONVOLUTION + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + bottom: "conv5_1" + top: "conv5_1" + name: "relu5_1" + type: RELU +} +layers { + bottom: "conv5_1" + top: "conv5_2" + name: "conv5_2" + type: CONVOLUTION + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + bottom: "conv5_2" + top: "conv5_2" + name: "relu5_2" + type: RELU +} +layers { + bottom: "conv5_2" + top: "conv5_3" + name: "conv5_3" + type: CONVOLUTION + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + bottom: "conv5_3" + top: "conv5_3" + name: "relu5_3" + type: RELU +} +layers { + name: "roi_pool5" + type: ROI_POOLING + bottom: "conv5_3" + bottom: "rois" + top: "pool5" + roi_pooling_param { + pooled_w: 7 + pooled_h: 7 + } +} +layers { + bottom: "pool5" + top: "fc6" + name: "fc6" + type: INNER_PRODUCT + inner_product_param { + num_output: 4096 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + bottom: "fc6" + top: "fc6" + name: "relu6" + type: RELU +} +layers { + bottom: "fc6" + top: "fc6" + name: "drop6" + type: DROPOUT + dropout_param { + dropout_ratio: 0.5 + } +} +layers { + bottom: "fc6" + top: "fc7" + name: "fc7" + type: INNER_PRODUCT + inner_product_param { + num_output: 4096 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + bottom: "fc7" + top: "fc7" + name: "relu7" + type: RELU +} +layers { + bottom: "fc7" + top: "fc7" + name: "drop7" + type: DROPOUT + dropout_param { + dropout_ratio: 0.5 + } +} +layers { + name: "fc8_pascal" + type: INNER_PRODUCT + bottom: "fc7" + top: "fc8_pascal" + inner_product_param { + num_output: 21 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "fc8_pascal" + bottom: "labels" + top: "loss" +} diff --git a/model-defs/vgg16_fcs_only.prototxt b/model-defs/vgg16_fcs_only.prototxt new file mode 100644 index 0000000000000000000000000000000000000000..f5bb25c30f208a274a2e229e5567af1586c4b4ae --- /dev/null +++ b/model-defs/vgg16_fcs_only.prototxt @@ -0,0 +1,444 @@ +name: "VGG_ILSVRC_16_layers" +input: "data" +input_dim: 1 +input_dim: 3 +input_dim: 224 +input_dim: 224 +input: "rois" +input_dim: 1 # to be changed on-the-fly +input_dim: 5 # [level, x1, y1, x2, y2] zero-based indexing +input_dim: 1 +input_dim: 1 +input: "labels" +input_dim: 1 # to be changed on-the-fly +input_dim: 1 +input_dim: 1 +input_dim: 1 +layers { + bottom: "data" + top: "conv1_1" + name: "conv1_1" + type: CONVOLUTION + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv1_1" + top: "conv1_1" + name: "relu1_1" + type: RELU +} +layers { + bottom: "conv1_1" + top: "conv1_2" + name: "conv1_2" + type: CONVOLUTION + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv1_2" + top: "conv1_2" + name: "relu1_2" + type: RELU +} +layers { + bottom: "conv1_2" + top: "pool1" + name: "pool1" + type: POOLING + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layers { + bottom: "pool1" + top: "conv2_1" + name: "conv2_1" + type: CONVOLUTION + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv2_1" + top: "conv2_1" + name: "relu2_1" + type: RELU +} +layers { + bottom: "conv2_1" + top: "conv2_2" + name: "conv2_2" + type: CONVOLUTION + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv2_2" + top: "conv2_2" + name: "relu2_2" + type: RELU +} +layers { + bottom: "conv2_2" + top: "pool2" + name: "pool2" + type: POOLING + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layers { + bottom: "pool2" + top: "conv3_1" + name: "conv3_1" + type: CONVOLUTION + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv3_1" + top: "conv3_1" + name: "relu3_1" + type: RELU +} +layers { + bottom: "conv3_1" + top: "conv3_2" + name: "conv3_2" + type: CONVOLUTION + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv3_2" + top: "conv3_2" + name: "relu3_2" + type: RELU +} +layers { + bottom: "conv3_2" + top: "conv3_3" + name: "conv3_3" + type: CONVOLUTION + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv3_3" + top: "conv3_3" + name: "relu3_3" + type: RELU +} +layers { + bottom: "conv3_3" + top: "pool3" + name: "pool3" + type: POOLING + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layers { + bottom: "pool3" + top: "conv4_1" + name: "conv4_1" + type: CONVOLUTION + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv4_1" + top: "conv4_1" + name: "relu4_1" + type: RELU +} +layers { + bottom: "conv4_1" + top: "conv4_2" + name: "conv4_2" + type: CONVOLUTION + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv4_2" + top: "conv4_2" + name: "relu4_2" + type: RELU +} +layers { + bottom: "conv4_2" + top: "conv4_3" + name: "conv4_3" + type: CONVOLUTION + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv4_3" + top: "conv4_3" + name: "relu4_3" + type: RELU +} +layers { + bottom: "conv4_3" + top: "pool4" + name: "pool4" + type: POOLING + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layers { + bottom: "pool4" + top: "conv5_1" + name: "conv5_1" + type: CONVOLUTION + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv5_1" + top: "conv5_1" + name: "relu5_1" + type: RELU +} +layers { + bottom: "conv5_1" + top: "conv5_2" + name: "conv5_2" + type: CONVOLUTION + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv5_2" + top: "conv5_2" + name: "relu5_2" + type: RELU +} +layers { + bottom: "conv5_2" + top: "conv5_3" + name: "conv5_3" + type: CONVOLUTION + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } + # Learning parameters + blobs_lr: 0 + blobs_lr: 0 + weight_decay: 0 + weight_decay: 0 +} +layers { + bottom: "conv5_3" + top: "conv5_3" + name: "relu5_3" + type: RELU +} +layers { + name: "roi_pool5" + type: ROI_POOLING + bottom: "conv5_3" + bottom: "rois" + top: "pool5" + roi_pooling_param { + pooled_w: 7 + pooled_h: 7 + } +} +layers { + bottom: "pool5" + top: "fc6" + name: "fc6" + type: INNER_PRODUCT + inner_product_param { + num_output: 4096 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + bottom: "fc6" + top: "fc6" + name: "relu6" + type: RELU +} +layers { + bottom: "fc6" + top: "fc6" + name: "drop6" + type: DROPOUT + dropout_param { + dropout_ratio: 0.5 + } +} +layers { + bottom: "fc6" + top: "fc7" + name: "fc7" + type: INNER_PRODUCT + inner_product_param { + num_output: 4096 + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + bottom: "fc7" + top: "fc7" + name: "relu7" + type: RELU +} +layers { + bottom: "fc7" + top: "fc7" + name: "drop7" + type: DROPOUT + dropout_param { + dropout_ratio: 0.5 + } +} +layers { + name: "fc8_pascal" + type: INNER_PRODUCT + bottom: "fc7" + top: "fc8_pascal" + inner_product_param { + num_output: 21 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } + # Learning parameters + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 +} +layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "fc8_pascal" + bottom: "labels" + top: "loss" +} diff --git a/model-defs/vgg16_solver.prototxt b/model-defs/vgg16_solver.prototxt new file mode 100644 index 0000000000000000000000000000000000000000..57371115fd2887eea40dd51f17bfbf9ef9f4497a --- /dev/null +++ b/model-defs/vgg16_solver.prototxt @@ -0,0 +1,16 @@ +#train_net: "model-defs/vgg16_fcs_only.prototxt" +train_net: "model-defs/vgg16.prototxt" +#test_iter: 100 +#test_interval: 1000 +base_lr: 0.001 +lr_policy: "step" +gamma: 0.1 +stepsize: 30000 +display: 20 +average_loss: 20 +max_iter: 500000 +iter_size: 1 +momentum: 0.9 +weight_decay: 0.0005 +snapshot: 10000 +snapshot_prefix: "snapshots/vgg16_finetune_all" diff --git a/pyramid_train.py b/pyramid_train.py index c32c6a3fcc5ce660a48eb933be12f78f04d5f33d..293b20b46b8d60ce2bad6d7eb70770ce42f46ab6 100755 --- a/pyramid_train.py +++ b/pyramid_train.py @@ -24,6 +24,7 @@ import cv2 import time import matplotlib.pyplot as plt import finetuning +import fast_rcnn_config as conf from keyboard import keyboard def print_label_stats(labels): @@ -133,7 +134,6 @@ def train_model(solver_def_path, window_db_path, pretrained_model=None, def train_model_random_scales(solver_def_path, window_db_path, pretrained_model=None, GPU_ID=None): - IMAGES_PER_BATCH = 4 solver, window_db = \ load_solver_and_window_db(solver_def_path, window_db_path, @@ -146,11 +146,11 @@ def train_model_random_scales(solver_def_path, window_db_path, max_epochs = 100 for epoch in xrange(max_epochs): shuffled_inds = np.random.permutation(np.arange(len(window_db))) - lim = (len(shuffled_inds) / IMAGES_PER_BATCH) * IMAGES_PER_BATCH + lim = (len(shuffled_inds) / conf.IMS_PER_BATCH) * conf.IMS_PER_BATCH shuffled_inds = shuffled_inds[0:lim] - for shuffled_i in xrange(0, len(shuffled_inds), 4): + for shuffled_i in xrange(0, len(shuffled_inds), conf.IMS_PER_BATCH): start_t = time.time() - db_inds = shuffled_inds[shuffled_i:shuffled_i + 4] + db_inds = shuffled_inds[shuffled_i:shuffled_i + conf.IMS_PER_BATCH] minibatch_db = [window_db[i] for i in db_inds] im_blob, rois_blob, labels_blob = \ finetuning.get_minibatch(minibatch_db) @@ -180,10 +180,14 @@ def train_model_random_scales(solver_def_path, window_db_path, if __name__ == '__main__': - CAFFE_MODEL = '/data/reference_caffe_nets/ilsvrc_2012_train_iter_310k' - SOLVER_DEF = './model-defs/pyramid_solver.prototxt' + # CAFFE_MODEL = '/data/reference_caffe_nets/ilsvrc_2012_train_iter_310k' + # SOLVER_DEF = './model-defs/pyramid_solver.prototxt' + CAFFE_MODEL = '/data/reference_caffe_nets/VGG_ILSVRC_16_layers.caffemodel' + SOLVER_DEF = './model-defs/vgg16_solver.prototxt' + # CAFFE_MODEL = '/data/reference_caffe_nets/bvlc_googlenet.caffemodel' + # SOLVER_DEF = './model-defs/googlenet_solver.prototxt' WINDOW_DB = './data/window_file_voc_2007_trainval.txt.pz' - GPU_ID = 0 + GPU_ID = 0 if len(sys.argv) == 1 else int(sys.argv[1]) train_model_random_scales(SOLVER_DEF, WINDOW_DB, pretrained_model=CAFFE_MODEL, GPU_ID=GPU_ID)