From d2793d66fa519c441e101de00a47bbd0960772be Mon Sep 17 00:00:00 2001
From: Ross Girshick <ross.girshick@gmail.com>
Date: Fri, 24 Apr 2015 10:45:54 -0700
Subject: [PATCH] add feature scaling used in slow R-CNN

---
 tools/train_svms.py | 82 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 59 insertions(+), 23 deletions(-)

diff --git a/tools/train_svms.py b/tools/train_svms.py
index 890c643..3001ac2 100755
--- a/tools/train_svms.py
+++ b/tools/train_svms.py
@@ -16,21 +16,54 @@ import caffe
 import argparse
 import pprint
 import numpy as np
+import numpy.random as npr
 import cv2
 from sklearn import svm
 import os, sys
 
 class SVMTrainer(object):
+    """
+    Trains post-hoc detection SVMs for all classes using the algorithm
+    and hyper-parameters of traditional R-CNN.
+    """
+
     def __init__(self, net, imdb):
         self.imdb = imdb
         self.net = net
-        dim = net.params['cls_score'][0].data.shape[1]
-        print('Feature dim: {}'.format(dim))
-        self.trainers = [SVMClassTrainer(cls, dim) for cls in imdb.classes]
         self.layer = 'fc7'
         self.hard_thresh = -1.0001
         self.neg_iou_thresh = 0.3
 
+        dim = net.params['cls_score'][0].data.shape[1]
+        scale = self._get_feature_scale()
+        print('Feature dim: {}'.format(dim))
+        print('Feature scale: {:.3f}'.format(scale))
+        self.trainers = [SVMClassTrainer(cls, dim, feature_scale=scale)
+                         for cls in imdb.classes]
+
+    def _get_feature_scale(self, num_images=100):
+        TARGET_NORM = 20.0 # Magic value from traditional R-CNN
+        _t = Timer()
+        roidb = self.imdb.roidb
+        total_norm = 0.0
+        count = 0.0
+        inds = npr.choice(xrange(self.imdb.num_images), size=num_images,
+                          replace=False)
+        for i_, i in enumerate(inds):
+            im = cv2.imread(self.imdb.image_path_at(i))
+            if roidb[i]['flipped']:
+                im = im[:, ::-1, :]
+            _t.tic()
+            scores, boxes = im_detect(self.net, im, roidb[i]['boxes'])
+            _t.toc()
+            feat = self.net.blobs[self.layer].data
+            total_norm += np.sqrt((feat ** 2).sum(axis=1)).sum()
+            count += feat.shape[0]
+            print('{}/{}: avg feature norm: {:.3f}'.format(i_ + 1, num_images,
+                                                           total_norm / count))
+
+        return TARGET_NORM * 1.0 / (total_norm / count)
+
     def _get_pos_counts(self):
         counts = np.zeros((len(self.imdb.classes)), dtype=np.int)
         roidb = self.imdb.roidb
@@ -74,6 +107,7 @@ class SVMTrainer(object):
                   .format(i + 1, len(roidb), _t.average_time)
 
     def initialize_net(self):
+        # Start all SVM parameters at zero
         self.net.params['cls_score'][0].data[...] = 0
         self.net.params['cls_score'][1].data[...] = 0
 
@@ -107,21 +141,23 @@ class SVMTrainer(object):
             _t.toc()
             feat = self.net.blobs[self.layer].data
             for j in xrange(1, self.imdb.num_classes):
-                hard_inds = np.where((scores[:, j] > self.hard_thresh) &
-                                     (roidb[i]['gt_overlaps'][:, j].toarray().ravel() <
-                                      self.neg_iou_thresh))[0]
+                hard_inds = \
+                    np.where((scores[:, j] > self.hard_thresh) &
+                             (roidb[i]['gt_overlaps'][:, j].toarray().ravel() <
+                              self.neg_iou_thresh))[0]
                 if len(hard_inds) > 0:
                     hard_feat = feat[hard_inds, :].copy()
-                    new_w_b = self.trainers[j].append_neg_and_retrain(feat=hard_feat)
+                    new_w_b = \
+                        self.trainers[j].append_neg_and_retrain(feat=hard_feat)
                     if new_w_b is not None:
                         self.update_net(j, new_w_b[0], new_w_b[1])
 
-            print 'train_with_hard_negatives: {:d}/{:d} {:.3f}s' \
-                  .format(i + 1, len(roidb), _t.average_time)
-
+            print(('train_with_hard_negatives: '
+                   '{:d}/{:d} {:.3f}s').format(i + 1, len(roidb),
+                                               _t.average_time))
 
     def train(self):
-        # 3) Initialize SVMs using
+        # Initialize SVMs using
         #   a. w_i = fc8_w_i - fc8_w_0
         #   b. b_i = fc8_b_i - fc8_b_0
         #   c. Install SVMs into net
@@ -148,14 +184,11 @@ class SVMTrainer(object):
             new_w_b = self.trainers[j].append_neg_and_retrain(force=True)
             self.update_net(j, new_w_b[0], new_w_b[1])
 
-
-
-#        7) Save net
-
-
-
 class SVMClassTrainer(object):
-    def __init__(self, cls, dim, C=0.001, B=10.0, pos_weight=2.0):
+    """Manages post-hoc SVM training for a single object class."""
+
+    def __init__(self, cls, dim, feature_scale=1.0,
+                 C=0.001, B=10.0, pos_weight=2.0):
         self.pos = np.zeros((0, dim), dtype=np.float32)
         self.neg = np.zeros((0, dim), dtype=np.float32)
         self.B = B
@@ -163,6 +196,7 @@ class SVMClassTrainer(object):
         self.cls = cls
         self.pos_weight = pos_weight
         self.dim = dim
+        self.feature_scale = feature_scale
         self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                  intercept_scaling=B, verbose=1,
                                  penalty='l2', loss='l1',
@@ -188,7 +222,7 @@ class SVMClassTrainer(object):
         num_neg = self.neg.shape[0]
         print('Cache holds {} pos examples and {} neg examples'.
               format(num_pos, num_neg))
-        X = np.vstack((self.pos, self.neg))
+        X = np.vstack((self.pos, self.neg)) * self.feature_scale
         y = np.hstack((np.ones(num_pos),
                        -np.ones(num_neg)))
         self.svm.fit(X, y)
@@ -198,17 +232,19 @@ class SVMClassTrainer(object):
         pos_scores = scores[:num_pos]
         neg_scores = scores[num_pos:]
 
-        pos_loss = self.C * self.pos_weight * np.maximum(0, 1 - pos_scores).sum()
+        pos_loss = (self.C * self.pos_weight *
+                    np.maximum(0, 1 - pos_scores).sum())
         neg_loss = self.C * np.maximum(0, 1 + neg_scores).sum()
         reg_loss = 0.5 * np.dot(w.ravel(), w.ravel()) + 0.5 * b ** 2
         tot_loss = pos_loss + neg_loss + reg_loss
         self.loss_history.append((tot_loss, pos_loss, neg_loss, reg_loss))
 
         for i, losses in enumerate(self.loss_history):
-            print('    {:d}: obj val: {:.3f} = {:.3f} (pos) + {:.3f} (neg) + {:.3f} (reg)'.
-                  format(i, *losses))
+            print(('    {:d}: obj val: {:.3f} = {:.3f} '
+                   '(pos) + {:.3f} (neg) + {:.3f} (reg)').format(i, *losses))
 
-        return (w, b), pos_scores, neg_scores
+        return ((w * self.feature_scale, b * self.feature_scale),
+                pos_scores, neg_scores)
 
     def append_neg_and_retrain(self, feat=None, force=False):
         if feat is not None:
-- 
GitLab