diff --git a/lib/datasets/imdb.py b/lib/datasets/imdb.py index d712cd42ad82d2a204608691b716b7ce41e21c3d..e133cedf414cc422e077f814a5b4fa17d1f3713e 100644 --- a/lib/datasets/imdb.py +++ b/lib/datasets/imdb.py @@ -85,7 +85,7 @@ class imdb(object): """ raise NotImplementedError - def append_flipped_roidb(self): + def append_flipped_images(self): num_images = self.num_images widths = [PIL.Image.open(self.image_path_at(i)).size[0] for i in xrange(num_images)] diff --git a/lib/fast_rcnn/config.py b/lib/fast_rcnn/config.py index 86086a12c1fb9c738a60f916765747eb0a52bf2d..d8d2a547b9ff9a157bd53ffc87e39041333f311a 100644 --- a/lib/fast_rcnn/config.py +++ b/lib/fast_rcnn/config.py @@ -13,7 +13,7 @@ # and use cfg_from_file(yaml_file) to load it and override the default options. # # - See tools/{train,test}_net.py for example code that uses cfg_from_file(). -# - See examples/multiscale.yml for an example YAML config override file. +# - See experiments/cfgs/*.yml for example YAML config override files. # import os @@ -125,7 +125,7 @@ __C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) # Place outputs under an experiments directory __C.EXP_DIR = 'default' -def get_output_path(imdb, net): +def get_output_dir(imdb, net): path = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name)) if net is None: return path diff --git a/lib/fast_rcnn/test.py b/lib/fast_rcnn/test.py index 42724427abde41f406415942d32964e3b6deb1e3..e80636a1cb533c96be76e2bde391e9df0edb9cf3 100644 --- a/lib/fast_rcnn/test.py +++ b/lib/fast_rcnn/test.py @@ -5,7 +5,7 @@ # Written by Ross Girshick # -------------------------------------------------------- -from fast_rcnn.config import cfg, get_output_path +from fast_rcnn.config import cfg, get_output_dir import argparse from utils.timer import Timer import numpy as np @@ -212,7 +212,7 @@ def test_net(net, imdb): all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] - output_dir = get_output_path(imdb, net) + output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) diff --git a/lib/fast_rcnn/train.py b/lib/fast_rcnn/train.py index 82ab4f5b098d939f3193da6dbb5ec48601eb5d54..664f4759e2c4afc798582744546e77b2bfcda7f5 100644 --- a/lib/fast_rcnn/train.py +++ b/lib/fast_rcnn/train.py @@ -5,154 +5,100 @@ # Written by Ross Girshick # -------------------------------------------------------- -from fast_rcnn.config import cfg, get_output_path -import numpy as np import caffe -import fast_rcnn.finetuning as finetuning -import fast_rcnn.bbox_regression_targets as bbox_regression_targets +from fast_rcnn.config import cfg +import roi_data_layer.roidb as rdl_roidb +import numpy as np import os from caffe.proto import caffe_pb2 import google.protobuf as pb2 class SolverWrapper(object): - def __init__(self, solver_prototxt, imdb, pretrained_model=None): - self.bbox_means = None - self.bbox_stds = None - self.imdb = imdb + def __init__(self, solver_prototxt, roidb, output_dir, + pretrained_model=None): + self.output_dir = output_dir + + print 'Computing bounding-box regression targets...' + self.bbox_means, self.bbox_stds = \ + rdl_roidb.add_bbox_regression_targets(roidb) + print 'done' self.solver = caffe.SGDSolver(solver_prototxt) if pretrained_model is not None: - print 'Loading pretrained model weights from {:s}' \ - .format(pretrained_model) + print ('Loading pretrained model ' + 'weights from {:s}').format(pretrained_model) self.solver.net.copy_from(pretrained_model) self.solver_param = caffe_pb2.SolverParameter() with open(solver_prototxt, 'rt') as f: pb2.text_format.Merge(f.read(), self.solver_param) + self.solver.net.layers[0].set_roidb(roidb) + def snapshot(self): - if cfg.TRAIN.BBOX_REG: - assert self.bbox_stds is not None - assert self.bbox_means is not None + net = self.solver.net + if cfg.TRAIN.BBOX_REG: # save original values - orig_0 = self.solver.net.params['bbox_pred'][0].data.copy() - orig_1 = self.solver.net.params['bbox_pred'][1].data.copy() + orig_0 = net.params['bbox_pred'][0].data.copy() + orig_1 = net.params['bbox_pred'][1].data.copy() # scale and shift with bbox reg unnormalization; then save snapshot - self.solver.net.params['bbox_pred'][0].data[...] = \ - (self.solver.net.params['bbox_pred'][0].data * + net.params['bbox_pred'][0].data[...] = \ + (net.params['bbox_pred'][0].data * self.bbox_stds[:, np.newaxis]) - self.solver.net.params['bbox_pred'][1].data[...] = \ - (self.solver.net.params['bbox_pred'][1].data * + net.params['bbox_pred'][1].data[...] = \ + (net.params['bbox_pred'][1].data * self.bbox_stds + self.bbox_means) - output_dir = get_output_path(self.imdb, None) - if not os.path.exists(output_dir): - os.makedirs(output_dir) + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') - filename = self.solver_param.snapshot_prefix + infix + \ - '_iter_{:d}'.format(self.solver.iter) + '.caffemodel' - filename = os.path.join(output_dir, filename) + filename = (self.solver_param.snapshot_prefix + infix + + '_iter_{:d}'.format(self.solver.iter) + '.caffemodel') + filename = os.path.join(self.output_dir, filename) - self.solver.net.save(str(filename)) + net.save(str(filename)) print 'Wrote snapshot to: {:s}'.format(filename) if cfg.TRAIN.BBOX_REG: # restore net to original state - self.solver.net.params['bbox_pred'][0].data[...] = orig_0 - self.solver.net.params['bbox_pred'][1].data[...] = orig_1 + net.params['bbox_pred'][0].data[...] = orig_0 + net.params['bbox_pred'][1].data[...] = orig_1 - def train_model(self, roidb, max_iters): + def train_model(self, max_iters): last_snapshot_iter = -1 while self.solver.iter < max_iters: - shuffled_inds = np.random.permutation(np.arange(len(roidb))) - lim = (len(shuffled_inds) / cfg.TRAIN.IMS_PER_BATCH) * \ - cfg.TRAIN.IMS_PER_BATCH - shuffled_inds = shuffled_inds[0:lim] - for shuffled_i in xrange(0, len(shuffled_inds), - cfg.TRAIN.IMS_PER_BATCH): - db_inds = shuffled_inds[shuffled_i:shuffled_i + - cfg.TRAIN.IMS_PER_BATCH] - minibatch_db = [roidb[i] for i in db_inds] - blobs = finetuning.get_minibatch(minibatch_db) - - net = self.solver.net - - for blob_name, blob in blobs.iteritems(): - # Reshape net's input blobs - net.blobs[blob_name].reshape(*(blob.shape)) - # Copy data into net's input blobs - net.blobs[blob_name].data[...] = blob.astype(np.float32, - copy=False) - - # Make one SGD update - self.solver.step(1) - - if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: - last_snapshot_iter = self.solver.iter - self.snapshot() - - if self.solver.iter >= max_iters: - break + # Make one SGD update + self.solver.step(1) + + if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: + last_snapshot_iter = self.solver.iter + self.snapshot() if last_snapshot_iter != self.solver.iter: self.snapshot() -def prepare_training_roidb(imdb): - """ - Enrich the imdb's roidb by adding some derived quantities that - are useful for training. This function precomputes the maximum - overlap, taken over ground-truth boxes, between each ROI and - each ground-truth box. The class with maximum overlap is also - recorded. - """ - roidb = imdb.roidb - for i in xrange(len(imdb.image_index)): - roidb[i]['image'] = imdb.image_path_at(i) - # need gt_overlaps as a dense array for argmax - gt_overlaps = roidb[i]['gt_overlaps'].toarray() - # max overlap with gt over classes (columns) - max_overlaps = gt_overlaps.max(axis=1) - # gt class that had the max overlap - max_classes = gt_overlaps.argmax(axis=1) - roidb[i]['max_classes'] = max_classes - roidb[i]['max_overlaps'] = max_overlaps - # sanity checks - # max overlap of 0 => class should be zero (background) - zero_inds = np.where(max_overlaps == 0)[0] - assert all(max_classes[zero_inds] == 0) - # max overlap > 0 => class should not be zero (must be a fg class) - nonzero_inds = np.where(max_overlaps > 0)[0] - assert all(max_classes[nonzero_inds] != 0) - - return roidb - -def train_net(solver_prototxt, imdb, pretrained_model=None, max_iters=40000): - # enhance roidb to contain flipped examples +def get_training_roidb(imdb): if cfg.TRAIN.USE_FLIPPED: print 'Appending horizontally-flipped training examples...' - imdb.append_flipped_roidb() + imdb.append_flipped_images() print 'done' - # enhance roidb to contain some useful derived quanties print 'Preparing training data...' - roidb = prepare_training_roidb(imdb) + rdl_roidb.prepare_roidb(imdb) print 'done' - # enhance roidb to contain bounding-box regression targets - print 'Computing bounding-box regression targets...' - means, stds = \ - bbox_regression_targets.append_bbox_regression_targets(roidb) - print 'done' + return imdb.roidb - sw = SolverWrapper(solver_prototxt, imdb, pretrained_model=pretrained_model) - sw.bbox_means = means - sw.bbox_stds = stds +def train_net(solver_prototxt, roidb, output_dir, + pretrained_model=None, max_iters=40000): + sw = SolverWrapper(solver_prototxt, roidb, output_dir, + pretrained_model=pretrained_model) print 'Solving...' - sw.train_model(roidb, max_iters=max_iters) + sw.train_model(max_iters) print 'done solving' diff --git a/lib/roi_data_layer/__init__.py b/lib/roi_data_layer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7ba6a65c52798566f77fff479f6e62fd89ca35ce --- /dev/null +++ b/lib/roi_data_layer/__init__.py @@ -0,0 +1,6 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- diff --git a/lib/roi_data_layer/layer.py b/lib/roi_data_layer/layer.py new file mode 100644 index 0000000000000000000000000000000000000000..2f1250b721dd424ca94f28516040e042434b1472 --- /dev/null +++ b/lib/roi_data_layer/layer.py @@ -0,0 +1,85 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import caffe +from fast_rcnn.config import cfg +from roi_data_layer.minibatch import get_minibatch +import numpy as np +import yaml + +class DataLayer(caffe.Layer): + """Fast R-CNN data layer.""" + + def _shuffle_roidb_inds(self): + self._perm = np.random.permutation(np.arange(len(self._roidb))) + self._cur = 0 + + def _get_next_minibatch_inds(self): + if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): + self._shuffle_roidb_inds() + + db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] + self._cur += cfg.TRAIN.IMS_PER_BATCH + return db_inds + + def _set_next_minibatch(self): + db_inds = self._get_next_minibatch_inds() + minibatch_db = [self._roidb[i] for i in db_inds] + self._blobs = get_minibatch(minibatch_db, self._num_classes) + + def set_roidb(self, roidb): + self._roidb = roidb + self._shuffle_roidb_inds() + + def setup(self, bottom, top): + layer_params = yaml.load(self.param_str_) + + self._num_classes = layer_params['num_classes'] + + self._name_to_top_map = { + 'data': 0, + 'rois': 1, + 'labels': 2, + 'bbox_targets': 3, + 'bbox_loss_weights': 4} + + # data + top[0].reshape(1, 3, 1, 1) + # rois + top[1].reshape(1, 5) + # labels + top[2].reshape(1) + # bbox_targets + top[3].reshape(1, self._num_classes * 4) + # bbox_loss_weights + top[4].reshape(1, self._num_classes * 4) + + # TODO(rbg): + # Start a prefetch thread that calls self._get_next_minibatch() + + def forward(self, bottom, top): + # TODO(rbg): + # wait for prefetch thread to finish + self._set_next_minibatch() + + for blob_name, blob in self._blobs.iteritems(): + top_ind = self._name_to_top_map[blob_name] + # Reshape net's input blobs + top[top_ind].reshape(*(blob.shape)) + # Copy data into net's input blobs + top[top_ind].data[...] = blob.astype(np.float32, copy=False) + + # TODO(rbg): + # start next prefetch thread + + def backward(self, top, propagate_down, bottom): + """This layer does not propagate gradients.""" + pass + + def reshape(self, bottom, top): + """Reshaping happens during the call to forward.""" + pass diff --git a/lib/fast_rcnn/finetuning.py b/lib/roi_data_layer/minibatch.py similarity index 94% rename from lib/fast_rcnn/finetuning.py rename to lib/roi_data_layer/minibatch.py index 97111300e2a81bf41714e3e325f5530197713bd2..58625cf33f94dbaa84d10f1dd7727544454c70b8 100644 --- a/lib/fast_rcnn/finetuning.py +++ b/lib/roi_data_layer/minibatch.py @@ -8,17 +8,14 @@ import numpy as np import numpy.random as npr import cv2 -import matplotlib.pyplot as plt from fast_rcnn.config import cfg from utils.blob import prep_im_for_blob, im_list_to_blob -def get_minibatch(roidb): +def get_minibatch(roidb, num_classes): """ Given a roidb, construct a minibatch sampled from it. """ num_images = len(roidb) - # Infer number of classes from the number of columns in gt_overlaps - num_classes = roidb[0]['gt_overlaps'].shape[1] # Sample random scales to use for each image in this batch random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), size=num_images) @@ -39,7 +36,8 @@ def get_minibatch(roidb): # all_overlaps = [] for im_i in xrange(num_images): labels, overlaps, im_rois, bbox_targets, bbox_loss \ - = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image) + = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, + num_classes) # Add to RoIs blob rois = _scale_im_rois(im_rois, im_scales[im_i]) @@ -66,7 +64,7 @@ def get_minibatch(roidb): return blobs -def _sample_rois(roidb, fg_rois_per_image, rois_per_image): +def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes): """ Generate a random sample of RoIs comprising foreground and background examples. @@ -108,8 +106,6 @@ def _sample_rois(roidb, fg_rois_per_image, rois_per_image): overlaps = overlaps[keep_inds] rois = rois[keep_inds] - # Infer number of classes from the number of columns in gt_overlaps - num_classes = roidb['gt_overlaps'].shape[1] bbox_targets, bbox_loss_weights = \ _get_bbox_regression_labels(roidb['bbox_targets'][keep_inds, :], num_classes) @@ -167,6 +163,7 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes): def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps): """Visualize a mini-batch for debugging.""" + import matplotlib.pyplot as plt for i in xrange(rois_blob.shape[0]): rois = rois_blob[i, :] im_ind = rois[0] diff --git a/lib/fast_rcnn/bbox_regression_targets.py b/lib/roi_data_layer/roidb.py similarity index 70% rename from lib/fast_rcnn/bbox_regression_targets.py rename to lib/roi_data_layer/roidb.py index 51be74605651350de93c05bb8fdd29cf4cda2760..763e1ae80f8267258bcc25d30f11bd71cf0dacd3 100644 --- a/lib/fast_rcnn/bbox_regression_targets.py +++ b/lib/roi_data_layer/roidb.py @@ -9,6 +9,76 @@ import numpy as np from fast_rcnn.config import cfg import utils.cython_bbox +def prepare_roidb(imdb): + """ + Enrich the imdb's roidb by adding some derived quantities that + are useful for training. This function precomputes the maximum + overlap, taken over ground-truth boxes, between each ROI and + each ground-truth box. The class with maximum overlap is also + recorded. + """ + roidb = imdb.roidb + for i in xrange(len(imdb.image_index)): + roidb[i]['image'] = imdb.image_path_at(i) + # need gt_overlaps as a dense array for argmax + gt_overlaps = roidb[i]['gt_overlaps'].toarray() + # max overlap with gt over classes (columns) + max_overlaps = gt_overlaps.max(axis=1) + # gt class that had the max overlap + max_classes = gt_overlaps.argmax(axis=1) + roidb[i]['max_classes'] = max_classes + roidb[i]['max_overlaps'] = max_overlaps + # sanity checks + # max overlap of 0 => class should be zero (background) + zero_inds = np.where(max_overlaps == 0)[0] + assert all(max_classes[zero_inds] == 0) + # max overlap > 0 => class should not be zero (must be a fg class) + nonzero_inds = np.where(max_overlaps > 0)[0] + assert all(max_classes[nonzero_inds] != 0) + +def add_bbox_regression_targets(roidb): + assert len(roidb) > 0 + assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' + + num_images = len(roidb) + # Infer number of classes from the number of columns in gt_overlaps + num_classes = roidb[0]['gt_overlaps'].shape[1] + for im_i in xrange(num_images): + rois = roidb[im_i]['boxes'] + max_overlaps = roidb[im_i]['max_overlaps'] + max_classes = roidb[im_i]['max_classes'] + roidb[im_i]['bbox_targets'] = \ + _compute_targets(rois, max_overlaps, max_classes) + + # Compute values needed for means and stds + # var(x) = E(x^2) - E(x)^2 + class_counts = np.zeros((num_classes, 1)) + cfg.EPS + sums = np.zeros((num_classes, 4)) + squared_sums = np.zeros((num_classes, 4)) + for im_i in xrange(num_images): + targets = roidb[im_i]['bbox_targets'] + for cls in xrange(1, num_classes): + cls_inds = np.where(targets[:, 0] == cls)[0] + if cls_inds.size > 0: + class_counts[cls] += cls_inds.size + sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) + squared_sums[cls, :] += (targets[cls_inds, 1:] ** 2).sum(axis=0) + + means = sums / class_counts + stds = np.sqrt(squared_sums / class_counts - means ** 2) + + # Normalize targets + for im_i in xrange(num_images): + targets = roidb[im_i]['bbox_targets'] + for cls in xrange(1, num_classes): + cls_inds = np.where(targets[:, 0] == cls)[0] + roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] + roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] + + # These values will be needed for making predictions + # (the predicts will need to be unnormalized and uncentered) + return means.ravel(), stds.ravel() + def _compute_targets(rois, overlaps, labels): # Ensure ROIs are floats rois = rois.astype(np.float, copy=False) @@ -50,45 +120,3 @@ def _compute_targets(rois, overlaps, labels): targets[ex_inds, 3] = targets_dw targets[ex_inds, 4] = targets_dh return targets - -def append_bbox_regression_targets(roidb): - num_images = len(roidb) - # Infer number of classes from the number of columns in gt_overlaps - num_classes = roidb[0]['gt_overlaps'].shape[1] - for im_i in xrange(num_images): - rois = roidb[im_i]['boxes'] - max_overlaps = roidb[im_i]['max_overlaps'] - max_classes = roidb[im_i]['max_classes'] - roidb[im_i]['bbox_targets'] = \ - _compute_targets(rois, max_overlaps, max_classes) - - # Compute values needed for means and stds - # var(x) = E(x^2) - E(x)^2 - class_counts = np.zeros((num_classes, 1)) + cfg.EPS - sums = np.zeros((num_classes, 4)) - squared_sums = np.zeros((num_classes, 4)) - for im_i in xrange(num_images): - targets = roidb[im_i]['bbox_targets'] - for cls in xrange(1, num_classes): - cls_inds = np.where(targets[:, 0] == cls)[0] - if cls_inds.size > 0: - class_counts[cls] += cls_inds.size - sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) - squared_sums[cls, :] += (targets[cls_inds, 1:] ** 2).sum(axis=0) - - means = sums / class_counts - stds = np.sqrt(squared_sums / class_counts - means ** 2) - - # Normalize targets - for im_i in xrange(num_images): - targets = roidb[im_i]['bbox_targets'] - for cls in xrange(1, num_classes): - cls_inds = np.where(targets[:, 0] == cls)[0] - roidb[im_i]['bbox_targets'][cls_inds, 1:] \ - -= means[cls, :] - roidb[im_i]['bbox_targets'][cls_inds, 1:] \ - /= stds[cls, :] - - # These values will be needed for making predictions - # (the predicts will need to be unnormalized and uncentered) - return means.ravel(), stds.ravel() diff --git a/tools/train_net.py b/tools/train_net.py index 08f9f54a47e6ad94a6bfb596e862b2e89ad6c2c1..48a23b3378d814b5a30c2c5bf79f85da55856d59 100755 --- a/tools/train_net.py +++ b/tools/train_net.py @@ -8,8 +8,8 @@ # -------------------------------------------------------- import _init_paths -import fast_rcnn as frc -from fast_rcnn.config import cfg, cfg_from_file +from fast_rcnn.train import get_training_roidb, train_net +from fast_rcnn.config import cfg, cfg_from_file, get_output_dir from datasets.factory import get_imdb import caffe import argparse @@ -67,7 +67,11 @@ if __name__ == '__main__': imdb_train = get_imdb(args.imdb_name) print 'Loaded dataset `{:s}` for training'.format(imdb_train.name) + roidb = get_training_roidb(imdb_train) - frc.train.train_net(args.solver, imdb_train, - pretrained_model=args.pretrained_model, - max_iters=args.max_iters) + output_dir = get_output_dir(imdb_train, None) + print 'Output will be saved to `{:s}`'.format(output_dir) + + train_net(args.solver, roidb, output_dir, + pretrained_model=args.pretrained_model, + max_iters=args.max_iters)