tool for training SVMs (old skool)

209643b4 · Ross Girshick · 0c624c25 · 209643b4 · 209643b4
Commit 209643b4 authored 9 years ago by Ross Girshick
--- a/examples/svm.yml
+++ b/examples/svm.yml
+EXP_DIR: svm
+TEST:
+  BINARY: True
--- a/tools/extra/train_svms.py
+++ b/tools/extra/train_svms.py
+#!/usr/bin/env python
+
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+import sys
+import os
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__),
+                                                '..', '..', 'src')))
+
+from fast_rcnn_config import cfg, cfg_from_file
+from fast_rcnn_test import im_detect
+import fast_rcnn_train
+from datasets.factory import get_imdb
+from utils.timer import Timer
+import caffe
+import argparse
+import pprint
+import numpy as np
+import cv2
+from sklearn import svm
+
+class SVMTrainer(object):
+    def __init__(self, net, imdb):
+        self.imdb = imdb
+        self.net = net
+        dim = net.params['cls_score'][0].data.shape[1]
+        print('Feature dim: {}'.format(dim))
+        self.trainers = [SVMClassTrainer(cls, dim) for cls in imdb.classes]
+        self.layer = 'fc7'
+        self.hard_thresh = -1.0001
+        self.neg_iou_thresh = 0.3
+
+    def _get_pos_counts(self):
+        counts = np.zeros((len(self.imdb.classes)), dtype=np.int)
+        roidb = self.imdb.roidb
+        for i in xrange(len(roidb)):
+            for j in xrange(1, self.imdb.num_classes):
+                I = np.where(roidb[i]['gt_classes'] == j)[0]
+                counts[j] += len(I)
+
+        for j in xrange(1, self.imdb.num_classes):
+            print('class {:s} has {:d} positives'.
+                  format(self.imdb.classes[j], counts[j]))
+
+        return counts
+
+    def get_pos_examples(self):
+        counts = self._get_pos_counts()
+        for i in xrange(len(counts)):
+            self.trainers[i].alloc_pos(counts[i])
+
+        _t = Timer()
+        roidb = self.imdb.roidb
+        num_images = len(roidb)
+        # num_images = 100
+        for i in xrange(num_images):
+            im = cv2.imread(self.imdb.image_path_at(i))
+            gt_inds = np.where(roidb[i]['gt_classes'] > 0)[0]
+            gt_boxes = roidb[i]['boxes'][gt_inds]
+            _t.tic()
+            scores, boxes = im_detect(self.net, im, gt_boxes)
+            _t.toc()
+            feat = self.net.blobs[self.layer].data
+            for j in xrange(1, self.imdb.num_classes):
+                cls_inds = np.where(roidb[i]['gt_classes'][gt_inds] == j)[0]
+                if len(cls_inds) > 0:
+                    cls_feat = feat[cls_inds, :]
+                    self.trainers[j].append_pos(cls_feat)
+
+            print 'get_pos_examples: {:d}/{:d} {:.3f}s' \
+                  .format(i + 1, len(roidb), _t.average_time)
+
+    def initialize_net(self):
+        self.net.params['cls_score'][0].data[...] = 0
+        self.net.params['cls_score'][1].data[...] = 0
+
+        # Initialize SVMs in a smart way. Not doing this because its such
+        # a good initialization that we might not learn something close to
+        # the SVM solution.
+#        # subtract background weights and biases for the foreground classes
+#        w_bg = self.net.params['cls_score'][0].data[0, :]
+#        b_bg = self.net.params['cls_score'][1].data[0]
+#        self.net.params['cls_score'][0].data[1:, :] -= w_bg
+#        self.net.params['cls_score'][1].data[1:] -= b_bg
+#        # set the background weights and biases to 0 (where they shall remain)
+#        self.net.params['cls_score'][0].data[0, :] = 0
+#        self.net.params['cls_score'][1].data[0] = 0
+
+    def update_net(self, cls_ind, w, b):
+        self.net.params['cls_score'][0].data[cls_ind, :] = w
+        self.net.params['cls_score'][1].data[cls_ind] = b
+
+    def train_with_hard_negatives(self):
+        _t = Timer()
+        roidb = self.imdb.roidb
+        num_images = len(roidb)
+        # num_images = 100
+        for i in xrange(num_images):
+            im = cv2.imread(self.imdb.image_path_at(i))
+            _t.tic()
+            scores, boxes = im_detect(self.net, im, roidb[i]['boxes'])
+            _t.toc()
+            feat = self.net.blobs[self.layer].data
+            for j in xrange(1, self.imdb.num_classes):
+                hard_inds = np.where((scores[:, j] > self.hard_thresh) &
+                                     (roidb[i]['gt_overlaps'][:, j].toarray().ravel() <
+                                      self.neg_iou_thresh))[0]
+                if len(hard_inds) > 0:
+                    hard_feat = feat[hard_inds, :].copy()
+                    new_w_b = self.trainers[j].append_neg_and_retrain(feat=hard_feat)
+                    if new_w_b is not None:
+                        self.update_net(j, new_w_b[0], new_w_b[1])
+
+            print 'train_with_hard_negatives: {:d}/{:d} {:.3f}s' \
+                  .format(i + 1, len(roidb), _t.average_time)
+
+
+    def train(self):
+        # 3) Initialize SVMs using
+        #   a. w_i = fc8_w_i - fc8_w_0
+        #   b. b_i = fc8_b_i - fc8_b_0
+        #   c. Install SVMs into net
+        self.initialize_net()
+
+        # Pass over roidb to count num positives for each class
+        #   a. Pre-allocate arrays for positive feature vectors
+        # Pass over roidb, computing features for positives only
+        self.get_pos_examples()
+
+        # Pass over roidb
+        #   a. Compute cls_score with forward pass
+        #   b. For each class
+        #       i. Select hard negatives
+        #       ii. Add them to cache
+        #   c. For each class
+        #       i. If SVM retrain criteria met, update SVM
+        #       ii. Install new SVM into net
+        self.train_with_hard_negatives()
+
+        # One final SVM retraining for each class
+        # Install SVMs into net
+        for j in xrange(1, self.imdb.num_classes):
+            new_w_b = self.trainers[j].append_neg_and_retrain(force=True)
+            self.update_net(j, new_w_b[0], new_w_b[1])
+
+
+
+#        7) Save net
+
+
+
+class SVMClassTrainer(object):
+    def __init__(self, cls, dim, C=0.001, B=10.0, pos_weight=2.0):
+        self.pos = np.zeros((0, dim), dtype=np.float32)
+        self.neg = np.zeros((0, dim), dtype=np.float32)
+        self.B = B
+        self.C = C
+        self.cls = cls
+        self.pos_weight = pos_weight
+        self.dim = dim
+        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
+                                 intercept_scaling=B, verbose=1,
+                                 penalty='l2', loss='l1',
+                                 random_state=cfg.RNG_SEED, dual=True)
+        self.pos_cur = 0
+        self.num_neg_added = 0
+        self.retrain_limit = 2000
+        self.evict_thresh = -1.1
+        self.loss_history = []
+
+    def alloc_pos(self, count):
+        self.pos_cur = 0
+        self.pos = np.zeros((count, self.dim), dtype=np.float32)
+
+    def append_pos(self, feat):
+        num = feat.shape[0]
+        self.pos[self.pos_cur:self.pos_cur + num, :] = feat
+        self.pos_cur += num
+
+    def train(self):
+        print('>>> Updating {} detector <<<'.format(self.cls))
+        num_pos = self.pos.shape[0]
+        num_neg = self.neg.shape[0]
+        print('Cache holds {} pos examples and {} neg examples'.
+              format(num_pos, num_neg))
+        X = np.vstack((self.pos, self.neg))
+        y = np.hstack((np.ones(num_pos),
+                       -np.ones(num_neg)))
+        self.svm.fit(X, y)
+        w = self.svm.coef_
+        b = self.svm.intercept_[0]
+        scores = self.svm.decision_function(X)
+        pos_scores = scores[:num_pos]
+        neg_scores = scores[num_pos:]
+
+        pos_loss = self.C * self.pos_weight * np.maximum(0, 1 - pos_scores).sum()
+        neg_loss = self.C * np.maximum(0, 1 + neg_scores).sum()
+        reg_loss = 0.5 * np.dot(w.ravel(), w.ravel()) + 0.5 * b ** 2
+        tot_loss = pos_loss + neg_loss + reg_loss
+        self.loss_history.append((tot_loss, pos_loss, neg_loss, reg_loss))
+
+        for i, losses in enumerate(self.loss_history):
+            print('    {:d}: obj val: {:.3f} = {:.3f} (pos) + {:.3f} (neg) + {:.3f} (reg)'.
+                  format(i, *losses))
+
+        return (w, b), pos_scores, neg_scores
+
+    def append_neg_and_retrain(self, feat=None, force=False):
+        if feat is not None:
+            num = feat.shape[0]
+            self.neg = np.vstack((self.neg, feat))
+            self.num_neg_added += num
+        if self.num_neg_added > self.retrain_limit or force:
+            self.num_neg_added = 0
+            new_w_b, pos_scores, neg_scores = self.train()
+            # scores = np.dot(self.neg, new_w_b[0].T) + new_w_b[1]
+            # easy_inds = np.where(neg_scores < self.evict_thresh)[0]
+            not_easy_inds = np.where(neg_scores >= self.evict_thresh)[0]
+            if len(not_easy_inds) > 0:
+                self.neg = self.neg[not_easy_inds, :]
+                # self.neg = np.delete(self.neg, easy_inds)
+            print('    Pruning easy negatives')
+            print('    Cache holds {} pos examples and {} neg examples'.
+                  format(self.pos.shape[0], self.neg.shape[0]))
+            print('    {} pos support vectors'.format((pos_scores <= 1).sum()))
+            print('    {} neg support vectors'.format((neg_scores >= -1).sum()))
+            return new_w_b
+        else:
+            return None
+
+def parse_args():
+    """
+    Parse input arguments
+    """
+    parser = argparse.ArgumentParser(description='Train SVMs (old skool)')
+    parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
+                        default=0, type=int)
+    parser.add_argument('--def', dest='prototxt',
+                        help='prototxt file defining the network',
+                        default=None, type=str)
+    parser.add_argument('--net', dest='caffemodel',
+                        help='model to test',
+                        default=None, type=str)
+    parser.add_argument('--cfg', dest='cfg_file',
+                        help='optional config file', default=None, type=str)
+    parser.add_argument('--imdb', dest='imdb_name',
+                        help='dataset to train on',
+                        default='voc_2007_trainval', type=str)
+
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+
+    args = parser.parse_args()
+    return args
+
+if __name__ == '__main__':
+    # Must turn this off to prevent issues when digging into the net blobs to
+    # pull out features
+    cfg.DEDUP_BOXES = 0
+
+    cfg.TEST.BINARY = True
+
+    args = parse_args()
+
+    print('Called with args:')
+    print(args)
+
+    if args.cfg_file is not None:
+        cfg_from_file(args.cfg_file)
+
+    print('Using fast_rcnn_config:')
+    pprint.pprint(cfg)
+
+    # fix the random seed for reproducibility
+    np.random.seed(cfg.RNG_SEED)
+
+    # set up caffe
+    caffe.set_mode_gpu()
+    if args.gpu_id is not None:
+        caffe.set_device(args.gpu_id)
+    net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
+    net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
+    out = os.path.splitext(os.path.basename(args.caffemodel))[0] + '_svm'
+    out_dir = os.path.dirname(args.caffemodel)
+
+    imdb_train = get_imdb(args.imdb_name)
+    print 'Loaded dataset `{:s}` for training'.format(imdb_train.name)
+
+    trainer = SVMTrainer(net, imdb_train)
+    trainer.train()
+
+    filename = '{}/{}.caffemodel'.format(out_dir, out)
+    net.save(filename)