initial version of pascal imdb and roidb

5231e12a · Ross Girshick · 7f805572 · 5231e12a · 5231e12a · 5231e12a
Commit 5231e12a authored 10 years ago by Ross Girshick
--- a/datasets/__init__.py
+++ b/datasets/__init__.py
+from .imdb import imdb
+from .pascal_voc import pascal_voc
--- a/datasets/imdb.py
+++ b/datasets/imdb.py
+#imdb.name = 'voc_train_2007'
+#imdb.image_dir = '/work4/rbg/VOC2007/VOCdevkit/VOC2007/JPEGImages/'
+#imdb.extension = '.jpg'
+#imdb.image_ids = {'000001', ... }
+#imdb.sizes = [numimages x 2]
+#imdb.classes = {'aeroplane', ... }
+#imdb.num_classes
+#imdb.class_to_id
+#imdb.class_ids
+#imdb.eval_func = pointer to the function that evaluates detections
+#imdb.roidb_func = pointer to the function that returns regions of interest
+
+# imdb.name
+# imdb.image_index
+# imdb.image_path_from_index
+# imdb.classes
+
+import os
+
+class imdb(object):
+    def __init__(self, name):
+        self._name = name
+        self._num_classes = 0
+        self._classes = []
+        self._image_index = []
+        self._obj_proposer = 'selective_search'
+        self._roidb = None
+        self._roidb_handler = self.default_roidb
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def num_classes(self):
+        return len(self._classes)
+
+    @property
+    def classes(self):
+        return self._classes
+
+    @property
+    def image_index(self):
+        return self._image_index
+
+    @property
+    def roidb_handler(self):
+        return self._roidb_handler
+
+    @roidb_handler.setter
+    def roidb_handler(self, val):
+        self._roidb_handler = val
+
+    def image_path_at(self, i):
+        raise NotImplementedError
+
+    @property
+    def roidb(self):
+        if self._roidb is not None:
+            return self._roidb
+        self._roidb = self.roidb_handler()
+        return self._roidb
+
+    def default_roidb(self):
+        raise NotImplementedError
+
+    @property
+    def cache_path(self):
+        return os.path.abspath(os.path.join(
+            os.path.dirname(__file__),
+            'cache'))
--- a/datasets/pascal_voc.py
+++ b/datasets/pascal_voc.py
+import os
+import datasets.imdb
+import glob
+import xml.dom.minidom as minidom
+import time
+import numpy as np
+import scipy.sparse
+import scipy.io as sio
+import utils.cython_bbox
+import cPickle
+
+# Not bothering with image size for now
+# can use:
+# sizes =
+#   [PIL.Image.open(d.image_path_at(i)).size
+#    for i in xrange(0, len(d.image_index))]
+
+class pascal_voc(datasets.imdb):
+    def __init__(self, image_set, year, devkit_path=None):
+        datasets.imdb.__init__(self, 'voc_' + year + '_' + image_set)
+        self._year = year
+        self._image_set = image_set
+        self._devkit_path = self._get_default_path() if devkit_path is None \
+                            else devkit_path
+        self._base_path = os.path.join(self._devkit_path, 'VOC' + self._year)
+        self._classes = ('aeroplane', 'bicycle', 'bird', 'boat',
+                         'bottle', 'bus', 'car', 'cat', 'chair',
+                         'cow', 'diningtable', 'dog', 'horse',
+                         'motorbike', 'person', 'pottedplant',
+                         'sheep', 'sofa', 'train', 'tvmonitor')
+        self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes)))
+        self._image_ext = '.jpg'
+        self._image_index = self._load_image_set_index()
+        # Default to roidb handler
+        self._roidb_handler = self.selective_search_roidb
+
+    def image_path_at(self, i):
+        return self.image_path_from_index(self._image_index[i])
+
+    def image_path_from_index(self, index):
+        image_path = os.path.join(self._base_path, 'JPEGImages',
+                                  index + self._image_ext)
+        assert os.path.exists(image_path)
+        return image_path
+
+    def _load_image_set_index(self):
+        # Example path to image set file:
+        # self._devkit_path + /VOCdevkit2007/VOC2007/ImageSets/Main/val.txt
+        image_set_file = os.path.join(self._base_path, 'ImageSets', 'Main',
+                                      self._image_set + '.txt')
+        assert os.path.exists(image_set_file)
+        with open(image_set_file) as f:
+            image_index = tuple([x.strip() for x in f.readlines()])
+        return image_index
+
+    def _get_default_path(self):
+        path = os.path.abspath(os.path.join(
+                    os.path.dirname(__file__),
+                    '..', 'datasets', 'VOCdevkit' + self._year))
+        assert os.path.exists(path)
+        return path
+
+    def gt_roidb(self):
+        """
+        Return the ground-truth ROI db
+        """
+        cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
+        if os.path.exists(cache_file):
+            with open(cache_file, 'rb') as fid:
+                roidb = cPickle.load(fid)
+            print '{} gt roidb loaded from {}'.format(self.name, cache_file)
+            return roidb
+
+        # Load all annotation file data (should take < 30 s).
+        gt_roidb = [self._load_pascal_annotation(index)
+                    for index in self.image_index]
+        with open(cache_file, 'wb') as fid:
+            cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL)
+        print 'wrote gt roidb to {}'.format(cache_file)
+
+        return gt_roidb
+
+    def selective_search_roidb(self):
+        cache_file = os.path.join(self.cache_path,
+                                  self.name + '_selective_search_roidb.pkl')
+
+        if os.path.exists(cache_file):
+            with open(cache_file, 'rb') as fid:
+                roidb = cPickle.load(fid)
+            print '{} ss roidb loaded from {}'.format(self.name, cache_file)
+            return roidb
+
+        gt_roidb = self.gt_roidb()
+        ss_roidb = self._load_selective_search_roidb(gt_roidb)
+        roidb = self._merge_roidbs(gt_roidb, ss_roidb)
+        with open(cache_file, 'wb') as fid:
+            cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
+        print 'wrote gt roidb to {}'.format(cache_file)
+
+        return roidb
+
+    def _merge_roidbs(self, a, b):
+        assert len(a) == len(b)
+        for i in xrange(len(a)):
+            a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
+            a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
+                                            b[i]['gt_classes']))
+            a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
+                                                       b[i]['gt_overlaps']])
+        return a
+
+    def _load_selective_search_roidb(self, gt_roidb):
+        filename = os.path.join(self.cache_path, 'selective_search_data',
+                                self.name + '.mat')
+        assert os.path.exists(filename)
+        raw_data = sio.loadmat(filename)
+
+        num_images = raw_data['boxes'].ravel().shape[0]
+        ss_roidb = []
+        for i in xrange(num_images):
+            boxes = raw_data['boxes'].ravel()[i][:, (1, 0, 3, 2)]
+            num_boxes = boxes.shape[0]
+            gt_boxes = gt_roidb[i]['boxes']
+            gt_classes = gt_roidb[i]['gt_classes']
+            gt_overlaps = \
+                    utils.cython_bbox.bbox_overlaps(boxes.astype(np.float),
+                                                    gt_boxes.astype(np.float))
+            argmaxes = gt_overlaps.argmax(axis=1)
+            maxes = gt_overlaps.max(axis=1)
+            I = np.where(maxes > 0)[0]
+            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)
+            overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
+            overlaps = scipy.sparse.csr_matrix(overlaps)
+            ss_roidb.append({'boxes' : boxes,
+                             'gt_classes' : -np.ones((num_boxes,),
+                                                     dtype=np.int32),
+                             'gt_overlaps' : overlaps})
+        return ss_roidb
+
+    def _load_pascal_annotation(self, index):
+        """
+        Load image and bounding boxes info from XML file in the PASCAL VOC
+        format.
+        """
+        filename = os.path.join(self._base_path, 'Annotations', index + '.xml')
+        # print 'Loading: {}'.format(filename)
+        def get_data_from_tag(node, tag):
+            try:
+                return node.getElementsByTagName(tag)[0].childNodes[0].data
+            except:
+                return -1
+
+        with open(filename) as f:
+            data = minidom.parseString(f.read())
+
+        objs = data.getElementsByTagName('object')
+        num_objs = len(objs)
+
+        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
+        gt_classes = -np.ones((num_objs), dtype=np.int32)
+        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
+
+        # Load object bounding boxes into a data frame.
+        for ix, obj in enumerate(objs):
+            # Make pixel indexes 0-based
+            x1 = float(get_data_from_tag(obj, 'xmin')) - 1
+            y1 = float(get_data_from_tag(obj, 'ymin')) - 1
+            x2 = float(get_data_from_tag(obj, 'xmax')) - 1
+            y2 = float(get_data_from_tag(obj, 'ymax')) - 1
+            cls = self._class_to_ind[
+                    str(get_data_from_tag(obj, "name")).lower().strip()]
+            boxes[ix, :] = [x1, y1, x2, y2]
+            gt_classes[ix] = cls
+            overlaps[ix, cls] = 1.0
+
+        overlaps = scipy.sparse.csr_matrix(overlaps)
+
+        return {'boxes' : boxes,
+                'gt_classes': gt_classes,
+                'gt_overlaps' : overlaps}
+
+if __name__ == '__main__':
+    d = datasets.pascal_voc('trainval', '2007')
+    res = d.roidb
+    from IPython import embed; embed()