diff --git a/deep_pyramid.py b/deep_pyramid.py
index 678f5e4fab4a53758123b285a22c6c655af7b6a9..f514f7ed33a7b32926889b0d6054ff645d176472 100644
--- a/deep_pyramid.py
+++ b/deep_pyramid.py
@@ -51,7 +51,7 @@ class DeepPyramid(object):
         return caffe_input
 
     def get_feat_pyramid(self, im):
-        im_pyra = self.get_image_pyramid(im)
+        im_pyra, _ = self.get_image_pyramid(im)
         feat_pyra = []
         for i in xrange(self.num_levels):
             caffe_input = im_pyra[i].astype(np.float32)
@@ -69,7 +69,7 @@ class DeepPyramid(object):
         return feat_pyra
 
     def get_feat_pyramid_batch(self, im):
-        im_pyra = self.get_image_pyramid(im)
+        im_pyra, _ = self.get_image_pyramid(im)
         caffe_input = self.image_pyramid_to_batch(im_pyra)
         base_shape = im_pyra[0].shape
         self.net.blobs['data'].reshape(self.num_levels, base_shape[2],
diff --git a/fast_rcnn_config.py b/fast_rcnn_config.py
index e87f5e092f62c014a1f10568d3b136b1f5e868fe..808fbbc59f975ec7b601786a265f302a91b1c881 100644
--- a/fast_rcnn_config.py
+++ b/fast_rcnn_config.py
@@ -1,9 +1,17 @@
 import numpy as np
 
 # Scales used in the SPP-net paper
-SCALES          = (480, 576, 688, 864, 1200)
+# SCALES          = (480, 576, 688, 864, 1200)
+# SCALES          = (480, 576, 688, 864, 1000)
+SCALES          = (480, 576, 600)
+# Max pixel size of a scaled input image
+# MAX_SIZE        = 2000
+# MAX_SIZE        = 1200
+MAX_SIZE        = 1000
+# Images per batch
+IMS_PER_BATCH   = 2 # 4
 # Minibatch size
-BATCH_SIZE      = 128
+BATCH_SIZE      = 128 # 128
 # Fraction of minibatch that is foreground labeled (class > 0)
 FG_FRACTION     = 0.25
 # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
@@ -16,5 +24,3 @@ BG_THRESH_LO    = 0.1
 PIXEL_MEANS     = np.array([[[102.9801, 115.9465, 122.7717]]])
 # Stride in input image pixels at ROI pooling level
 FEAT_STRIDE     = 16
-# Max pixel size of a scaled input image
-MAX_SIZE        = 2000
diff --git a/finetuning.py b/finetuning.py
index 5ae657452de8c4c4978c833d67a23ecdfc188392..8fbc076628f6453ee57083af068c574483d6dcde 100644
--- a/finetuning.py
+++ b/finetuning.py
@@ -92,7 +92,8 @@ def get_minibatch(window_db, random_flip=False):
     random_scale_inds = \
         np.random.randint(0, high=len(conf.SCALES), size=num_images)
     assert(conf.BATCH_SIZE % num_images == 0), \
-        'num_images must divide BATCH_SIZE'
+        'num_images ({}) must divide BATCH_SIZE ({})'.format(num_images,
+                                                             conf.BATCH_SIZE)
     rois_per_image = conf.BATCH_SIZE / num_images
     fg_rois_per_image = np.round(conf.FG_FRACTION * rois_per_image)
     # Get the input blob, formatted for caffe
@@ -119,8 +120,8 @@ def get_minibatch(window_db, random_flip=False):
         assert((feat_rois[:, 2] >= feat_rois[:, 0]).all())
         assert((feat_rois[:, 3] >= feat_rois[:, 1]).all())
         assert((feat_rois >= 0).all())
-        assert((feat_rois < np.max(im_blob.shape[2:4]) *
-                            im_scale_factors[im_i] / conf.FEAT_STRIDE).all())
+        # assert((feat_rois < np.max(im_blob.shape[2:4]) *
+        #                     im_scale_factors[im_i] / conf.FEAT_STRIDE).all())
         rois_blob_this_image = \
             np.append(im_i * np.ones((feat_rois.shape[0], 1)), feat_rois,
                       axis=1)
diff --git a/model-defs/pyramid.prototxt b/model-defs/pyramid.prototxt
index 83c980a06e6b48dc40f1eb3f3f5bceba003f0663..8d1fa73ad6bed887b2fb05137a4406884c0851f5 100644
--- a/model-defs/pyramid.prototxt
+++ b/model-defs/pyramid.prototxt
@@ -1,9 +1,9 @@
 name: "CaffeNet"
 input: "data"
-input_dim: 7
+input_dim: 1
 input_dim: 3
-input_dim: 1713
-input_dim: 1713
+input_dim: 227
+input_dim: 227
 input: "rois"
 input_dim: 1 # to be changed on-the-fly
 input_dim: 5 # [level, x1, y1, x2, y2] zero-based indexing
@@ -270,4 +270,8 @@ layers {
   type: SOFTMAX_LOSS
   bottom: "fc8_pascal"
   bottom: "labels"
+  top: "loss"
+  #softmax_param {
+  #  margin: 1
+  #}
 }
diff --git a/model-defs/vgg16.prototxt b/model-defs/vgg16.prototxt
new file mode 100644
index 0000000000000000000000000000000000000000..e913d78fb0398cc16ff2db9a5351860fa5627f01
--- /dev/null
+++ b/model-defs/vgg16.prototxt
@@ -0,0 +1,444 @@
+name: "VGG_ILSVRC_16_layers"
+input: "data"
+input_dim: 1
+input_dim: 3
+input_dim: 224
+input_dim: 224
+input: "rois"
+input_dim: 1 # to be changed on-the-fly
+input_dim: 5 # [level, x1, y1, x2, y2] zero-based indexing
+input_dim: 1
+input_dim: 1
+input: "labels"
+input_dim: 1 # to be changed on-the-fly
+input_dim: 1
+input_dim: 1
+input_dim: 1
+layers {
+  bottom: "data"
+  top: "conv1_1"
+  name: "conv1_1"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv1_1"
+  top: "conv1_1"
+  name: "relu1_1"
+  type: RELU
+}
+layers {
+  bottom: "conv1_1"
+  top: "conv1_2"
+  name: "conv1_2"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv1_2"
+  top: "conv1_2"
+  name: "relu1_2"
+  type: RELU
+}
+layers {
+  bottom: "conv1_2"
+  top: "pool1"
+  name: "pool1"
+  type: POOLING
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layers {
+  bottom: "pool1"
+  top: "conv2_1"
+  name: "conv2_1"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv2_1"
+  top: "conv2_1"
+  name: "relu2_1"
+  type: RELU
+}
+layers {
+  bottom: "conv2_1"
+  top: "conv2_2"
+  name: "conv2_2"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv2_2"
+  top: "conv2_2"
+  name: "relu2_2"
+  type: RELU
+}
+layers {
+  bottom: "conv2_2"
+  top: "pool2"
+  name: "pool2"
+  type: POOLING
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layers {
+  bottom: "pool2"
+  top: "conv3_1"
+  name: "conv3_1"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  bottom: "conv3_1"
+  top: "conv3_1"
+  name: "relu3_1"
+  type: RELU
+}
+layers {
+  bottom: "conv3_1"
+  top: "conv3_2"
+  name: "conv3_2"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  bottom: "conv3_2"
+  top: "conv3_2"
+  name: "relu3_2"
+  type: RELU
+}
+layers {
+  bottom: "conv3_2"
+  top: "conv3_3"
+  name: "conv3_3"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  bottom: "conv3_3"
+  top: "conv3_3"
+  name: "relu3_3"
+  type: RELU
+}
+layers {
+  bottom: "conv3_3"
+  top: "pool3"
+  name: "pool3"
+  type: POOLING
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layers {
+  bottom: "pool3"
+  top: "conv4_1"
+  name: "conv4_1"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  bottom: "conv4_1"
+  top: "conv4_1"
+  name: "relu4_1"
+  type: RELU
+}
+layers {
+  bottom: "conv4_1"
+  top: "conv4_2"
+  name: "conv4_2"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  bottom: "conv4_2"
+  top: "conv4_2"
+  name: "relu4_2"
+  type: RELU
+}
+layers {
+  bottom: "conv4_2"
+  top: "conv4_3"
+  name: "conv4_3"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  bottom: "conv4_3"
+  top: "conv4_3"
+  name: "relu4_3"
+  type: RELU
+}
+layers {
+  bottom: "conv4_3"
+  top: "pool4"
+  name: "pool4"
+  type: POOLING
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layers {
+  bottom: "pool4"
+  top: "conv5_1"
+  name: "conv5_1"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  bottom: "conv5_1"
+  top: "conv5_1"
+  name: "relu5_1"
+  type: RELU
+}
+layers {
+  bottom: "conv5_1"
+  top: "conv5_2"
+  name: "conv5_2"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  bottom: "conv5_2"
+  top: "conv5_2"
+  name: "relu5_2"
+  type: RELU
+}
+layers {
+  bottom: "conv5_2"
+  top: "conv5_3"
+  name: "conv5_3"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  bottom: "conv5_3"
+  top: "conv5_3"
+  name: "relu5_3"
+  type: RELU
+}
+layers {
+  name: "roi_pool5"
+  type: ROI_POOLING
+  bottom: "conv5_3"
+  bottom: "rois"
+  top: "pool5"
+  roi_pooling_param {
+    pooled_w: 7
+    pooled_h: 7
+  }
+}
+layers {
+  bottom: "pool5"
+  top: "fc6"
+  name: "fc6"
+  type: INNER_PRODUCT
+  inner_product_param {
+    num_output: 4096
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  bottom: "fc6"
+  top: "fc6"
+  name: "relu6"
+  type: RELU
+}
+layers {
+  bottom: "fc6"
+  top: "fc6"
+  name: "drop6"
+  type: DROPOUT
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layers {
+  bottom: "fc6"
+  top: "fc7"
+  name: "fc7"
+  type: INNER_PRODUCT
+  inner_product_param {
+    num_output: 4096
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  bottom: "fc7"
+  top: "fc7"
+  name: "relu7"
+  type: RELU
+}
+layers {
+  bottom: "fc7"
+  top: "fc7"
+  name: "drop7"
+  type: DROPOUT
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layers {
+  name: "fc8_pascal"
+  type: INNER_PRODUCT
+  bottom: "fc7"
+  top: "fc8_pascal"
+  inner_product_param {
+    num_output: 21
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  name: "loss"
+  type: SOFTMAX_LOSS
+  bottom: "fc8_pascal"
+  bottom: "labels"
+  top: "loss"
+}
diff --git a/model-defs/vgg16_fcs_only.prototxt b/model-defs/vgg16_fcs_only.prototxt
new file mode 100644
index 0000000000000000000000000000000000000000..f5bb25c30f208a274a2e229e5567af1586c4b4ae
--- /dev/null
+++ b/model-defs/vgg16_fcs_only.prototxt
@@ -0,0 +1,444 @@
+name: "VGG_ILSVRC_16_layers"
+input: "data"
+input_dim: 1
+input_dim: 3
+input_dim: 224
+input_dim: 224
+input: "rois"
+input_dim: 1 # to be changed on-the-fly
+input_dim: 5 # [level, x1, y1, x2, y2] zero-based indexing
+input_dim: 1
+input_dim: 1
+input: "labels"
+input_dim: 1 # to be changed on-the-fly
+input_dim: 1
+input_dim: 1
+input_dim: 1
+layers {
+  bottom: "data"
+  top: "conv1_1"
+  name: "conv1_1"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv1_1"
+  top: "conv1_1"
+  name: "relu1_1"
+  type: RELU
+}
+layers {
+  bottom: "conv1_1"
+  top: "conv1_2"
+  name: "conv1_2"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv1_2"
+  top: "conv1_2"
+  name: "relu1_2"
+  type: RELU
+}
+layers {
+  bottom: "conv1_2"
+  top: "pool1"
+  name: "pool1"
+  type: POOLING
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layers {
+  bottom: "pool1"
+  top: "conv2_1"
+  name: "conv2_1"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv2_1"
+  top: "conv2_1"
+  name: "relu2_1"
+  type: RELU
+}
+layers {
+  bottom: "conv2_1"
+  top: "conv2_2"
+  name: "conv2_2"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv2_2"
+  top: "conv2_2"
+  name: "relu2_2"
+  type: RELU
+}
+layers {
+  bottom: "conv2_2"
+  top: "pool2"
+  name: "pool2"
+  type: POOLING
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layers {
+  bottom: "pool2"
+  top: "conv3_1"
+  name: "conv3_1"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv3_1"
+  top: "conv3_1"
+  name: "relu3_1"
+  type: RELU
+}
+layers {
+  bottom: "conv3_1"
+  top: "conv3_2"
+  name: "conv3_2"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv3_2"
+  top: "conv3_2"
+  name: "relu3_2"
+  type: RELU
+}
+layers {
+  bottom: "conv3_2"
+  top: "conv3_3"
+  name: "conv3_3"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv3_3"
+  top: "conv3_3"
+  name: "relu3_3"
+  type: RELU
+}
+layers {
+  bottom: "conv3_3"
+  top: "pool3"
+  name: "pool3"
+  type: POOLING
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layers {
+  bottom: "pool3"
+  top: "conv4_1"
+  name: "conv4_1"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv4_1"
+  top: "conv4_1"
+  name: "relu4_1"
+  type: RELU
+}
+layers {
+  bottom: "conv4_1"
+  top: "conv4_2"
+  name: "conv4_2"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv4_2"
+  top: "conv4_2"
+  name: "relu4_2"
+  type: RELU
+}
+layers {
+  bottom: "conv4_2"
+  top: "conv4_3"
+  name: "conv4_3"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv4_3"
+  top: "conv4_3"
+  name: "relu4_3"
+  type: RELU
+}
+layers {
+  bottom: "conv4_3"
+  top: "pool4"
+  name: "pool4"
+  type: POOLING
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layers {
+  bottom: "pool4"
+  top: "conv5_1"
+  name: "conv5_1"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv5_1"
+  top: "conv5_1"
+  name: "relu5_1"
+  type: RELU
+}
+layers {
+  bottom: "conv5_1"
+  top: "conv5_2"
+  name: "conv5_2"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv5_2"
+  top: "conv5_2"
+  name: "relu5_2"
+  type: RELU
+}
+layers {
+  bottom: "conv5_2"
+  top: "conv5_3"
+  name: "conv5_3"
+  type: CONVOLUTION
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+  }
+  # Learning parameters
+  blobs_lr: 0
+  blobs_lr: 0
+  weight_decay: 0
+  weight_decay: 0
+}
+layers {
+  bottom: "conv5_3"
+  top: "conv5_3"
+  name: "relu5_3"
+  type: RELU
+}
+layers {
+  name: "roi_pool5"
+  type: ROI_POOLING
+  bottom: "conv5_3"
+  bottom: "rois"
+  top: "pool5"
+  roi_pooling_param {
+    pooled_w: 7
+    pooled_h: 7
+  }
+}
+layers {
+  bottom: "pool5"
+  top: "fc6"
+  name: "fc6"
+  type: INNER_PRODUCT
+  inner_product_param {
+    num_output: 4096
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  bottom: "fc6"
+  top: "fc6"
+  name: "relu6"
+  type: RELU
+}
+layers {
+  bottom: "fc6"
+  top: "fc6"
+  name: "drop6"
+  type: DROPOUT
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layers {
+  bottom: "fc6"
+  top: "fc7"
+  name: "fc7"
+  type: INNER_PRODUCT
+  inner_product_param {
+    num_output: 4096
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  bottom: "fc7"
+  top: "fc7"
+  name: "relu7"
+  type: RELU
+}
+layers {
+  bottom: "fc7"
+  top: "fc7"
+  name: "drop7"
+  type: DROPOUT
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layers {
+  name: "fc8_pascal"
+  type: INNER_PRODUCT
+  bottom: "fc7"
+  top: "fc8_pascal"
+  inner_product_param {
+    num_output: 21
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+  # Learning parameters
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+}
+layers {
+  name: "loss"
+  type: SOFTMAX_LOSS
+  bottom: "fc8_pascal"
+  bottom: "labels"
+  top: "loss"
+}
diff --git a/model-defs/vgg16_solver.prototxt b/model-defs/vgg16_solver.prototxt
new file mode 100644
index 0000000000000000000000000000000000000000..57371115fd2887eea40dd51f17bfbf9ef9f4497a
--- /dev/null
+++ b/model-defs/vgg16_solver.prototxt
@@ -0,0 +1,16 @@
+#train_net: "model-defs/vgg16_fcs_only.prototxt"
+train_net: "model-defs/vgg16.prototxt"
+#test_iter: 100
+#test_interval: 1000
+base_lr: 0.001
+lr_policy: "step"
+gamma: 0.1
+stepsize: 30000
+display: 20
+average_loss: 20
+max_iter: 500000
+iter_size: 1
+momentum: 0.9
+weight_decay: 0.0005
+snapshot: 10000
+snapshot_prefix: "snapshots/vgg16_finetune_all"
diff --git a/pyramid_train.py b/pyramid_train.py
index c32c6a3fcc5ce660a48eb933be12f78f04d5f33d..293b20b46b8d60ce2bad6d7eb70770ce42f46ab6 100755
--- a/pyramid_train.py
+++ b/pyramid_train.py
@@ -24,6 +24,7 @@ import cv2
 import time
 import matplotlib.pyplot as plt
 import finetuning
+import fast_rcnn_config as conf
 from keyboard import keyboard
 
 def print_label_stats(labels):
@@ -133,7 +134,6 @@ def train_model(solver_def_path, window_db_path, pretrained_model=None,
 
 def train_model_random_scales(solver_def_path, window_db_path,
                               pretrained_model=None, GPU_ID=None):
-    IMAGES_PER_BATCH = 4
     solver, window_db = \
         load_solver_and_window_db(solver_def_path,
                                   window_db_path,
@@ -146,11 +146,11 @@ def train_model_random_scales(solver_def_path, window_db_path,
     max_epochs = 100
     for epoch in xrange(max_epochs):
         shuffled_inds = np.random.permutation(np.arange(len(window_db)))
-        lim = (len(shuffled_inds) / IMAGES_PER_BATCH) * IMAGES_PER_BATCH
+        lim = (len(shuffled_inds) / conf.IMS_PER_BATCH) * conf.IMS_PER_BATCH
         shuffled_inds = shuffled_inds[0:lim]
-        for shuffled_i in xrange(0, len(shuffled_inds), 4):
+        for shuffled_i in xrange(0, len(shuffled_inds), conf.IMS_PER_BATCH):
             start_t = time.time()
-            db_inds = shuffled_inds[shuffled_i:shuffled_i + 4]
+            db_inds = shuffled_inds[shuffled_i:shuffled_i + conf.IMS_PER_BATCH]
             minibatch_db = [window_db[i] for i in db_inds]
             im_blob, rois_blob, labels_blob = \
                 finetuning.get_minibatch(minibatch_db)
@@ -180,10 +180,14 @@ def train_model_random_scales(solver_def_path, window_db_path,
 
 
 if __name__ == '__main__':
-    CAFFE_MODEL = '/data/reference_caffe_nets/ilsvrc_2012_train_iter_310k'
-    SOLVER_DEF = './model-defs/pyramid_solver.prototxt'
+    # CAFFE_MODEL = '/data/reference_caffe_nets/ilsvrc_2012_train_iter_310k'
+    # SOLVER_DEF = './model-defs/pyramid_solver.prototxt'
+    CAFFE_MODEL = '/data/reference_caffe_nets/VGG_ILSVRC_16_layers.caffemodel'
+    SOLVER_DEF = './model-defs/vgg16_solver.prototxt'
+    # CAFFE_MODEL = '/data/reference_caffe_nets/bvlc_googlenet.caffemodel'
+    # SOLVER_DEF = './model-defs/googlenet_solver.prototxt'
     WINDOW_DB = './data/window_file_voc_2007_trainval.txt.pz'
-    GPU_ID = 0
+    GPU_ID = 0 if len(sys.argv) == 1 else int(sys.argv[1])
     train_model_random_scales(SOLVER_DEF, WINDOW_DB,
                               pretrained_model=CAFFE_MODEL,
                               GPU_ID=GPU_ID)