log ffmpeg command line & export interpolation to VOC (#312)

7 years ago · cc55c30794
parent df10fe19e2
commit cc55c30794
4 changed files with 114 additions and 39 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Ability to rotate images/video in the client part (Ctrl+R, Shift+Ctrl+R shortcuts) (#305)
 - The ReID application for automatic bounding box merging has been added (#299)
 - Keyboard shortcuts to switch next/previous default shape type (box, polygon etc) [Alt + <, Alt + >] (#316)
-
+- Converter for VOC now supports interpolation tracks 
 ### Changed
 - Propagation setup has been moved from settings to bottom player panel
--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@ -283,6 +283,8 @@ class _FrameExtractor:
        ff = FFmpeg(
            inputs  = {source_path: None},
            outputs = {target_path: output_opts})
        slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd))
        ff.run()
    def getframepath(self, k):
--- a/utils/voc/converter.py
+++ b/utils/voc/converter.py
@ -6,9 +6,9 @@ Given a CVAT XML and a directory with the image dataset, this script reads the
 CVAT XML and writes the annotations in PASCAL VOC format into a given
 directory.
-This implementation only supports bounding boxes in CVAT annotation format, and
+This implementation supports both interpolation tracks from video and 
-warns if it encounter any tracks or annotations that are not bounding boxes,
+annotated images.  If it encounters any tracks or annotations that are 
-ignoring them in both cases.
+not bounding boxes, it ignores them.
 """
 import os
@ -56,38 +56,97 @@ def process_cvat_xml(xml_file, image_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    cvat_xml = etree.parse(xml_file)
-    tracks = [(x.get('id'), x.get('label'))
+    basename = os.path.splitext( os.path.basename( xml_file ) )[0]
-              for x in cvat_xml.findall('track')]
+
-    if tracks:
+    tracks= cvat_xml.findall( './/track' )
-        log.warn('Cannot parse interpolation tracks, ignoring {} tracks'.format(len(tracks)))
+
-
+    if (tracks is not None) and (len(tracks) > 0):
-    for img_tag in cvat_xml.findall('image'):
+        frames = {}
-        image_name = img_tag.get('name')
+
-        width = img_tag.get('width')
+        for track in tracks:
-        height = img_tag.get('height')
+            trackid = int(track.get("id"))
-        image_path = os.path.join(image_dir, image_name)
+            label = track.get("label")
-        if not os.path.exists(image_path):
+            boxes = track.findall( './box' )
-            log.warn('{} image cannot be found. Is `{}` image directory correct?'.
+            for box in boxes:
-                format(image_path, image_dir))
+                frameid  = int(box.get('frame'))
-        writer = Writer(image_path, width, height)
+                outside  = int(box.get('outside'))
-
+                #occluded = int(box.get('occluded'))  #currently unused
-        unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS)
+                #keyframe = int(box.get('keyframe'))  #currently unused
-        if unknown_tags:
+                xtl      = float(box.get('xtl'))
-            log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags))
+                ytl      = float(box.get('ytl'))
-
+                xbr      = float(box.get('xbr'))
-        for box in img_tag.findall('box'):
+                ybr      = float(box.get('ybr'))
-            label = box.get('label')
+                
-            xmin = float(box.get('xtl'))
+                frame = frames.get( frameid, {} )
-            ymin = float(box.get('ytl'))
+                
-            xmax = float(box.get('xbr'))
+                if outside == 0:
-            ymax = float(box.get('ybr'))
+                    frame[ trackid ] = { 'xtl': xtl, 'ytl': ytl, 'xbr': xbr, 'ybr': ybr, 'label': label }
-
+
-            writer.addObject(label, xmin, ymin, xmax, ymax)
+                frames[ frameid ] = frame
-
+
-        anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml')
+        width = int(cvat_xml.find('.//original_size/width').text)
-        anno_dir = os.path.dirname(os.path.join(output_dir, image_name))
+        height  = int(cvat_xml.find('.//original_size/height').text)
-        os.makedirs(anno_dir, exist_ok=True)
+
-        writer.save(os.path.join(anno_dir, anno_name))
+        # Spit out a list of each object for each frame
        for frameid in sorted(frames.keys()):
            #print( frameid )
            image_name = "%s_%08d.jpg" % (basename, frameid)
            image_path = os.path.join(image_dir, image_name)
            if not os.path.exists(image_path):
                log.warn('{} image cannot be found. Is `{}` image directory correct?'.
                    format(image_path, image_dir))
            writer = Writer(image_path, width, height)
            frame = frames[frameid]
            objids = sorted(frame.keys())
            for objid in objids:
                box = frame[objid]
                label = box.get('label')
                xmin = float(box.get('xtl'))
                ymin = float(box.get('ytl'))
                xmax = float(box.get('xbr'))
                ymax = float(box.get('ybr'))
                writer.addObject(label, xmin, ymin, xmax, ymax)
            anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml')
            anno_dir = os.path.dirname(os.path.join(output_dir, image_name))
            os.makedirs(anno_dir, exist_ok=True)
            writer.save(os.path.join(anno_dir, anno_name))
    else:
        for img_tag in cvat_xml.findall('image'):
            image_name = img_tag.get('name')
            width = img_tag.get('width')
            height = img_tag.get('height')
            image_path = os.path.join(image_dir, image_name)
            if not os.path.exists(image_path):
                log.warn('{} image cannot be found. Is `{}` image directory correct?'.
                    format(image_path, image_dir))
            writer = Writer(image_path, width, height)
            unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS)
            if unknown_tags:
                log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags))
            for box in img_tag.findall('box'):
                label = box.get('label')
                xmin = float(box.get('xtl'))
                ymin = float(box.get('ytl'))
                xmax = float(box.get('xbr'))
                ymax = float(box.get('ybr'))
                writer.addObject(label, xmin, ymin, xmax, ymax)
            anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml')
            anno_dir = os.path.dirname(os.path.join(output_dir, image_name))
            os.makedirs(anno_dir, exist_ok=True)
            writer.save(os.path.join(anno_dir, anno_name))
 def main():
--- a/utils/voc/tests/test_process_cvat_xml.py
+++ b/utils/voc/tests/test_process_cvat_xml.py
@ -103,6 +103,10 @@ XML_INTERPOLATION_EXAMPLE = """<?xml version="1.0" encoding="utf-8"?>
        <username>admin</username>
        <email></email>
      </owner>
      <original_size>
         <width>1024</width>
         <height>768</height>
      </original_size>
    </task>
    <dumped>2018-06-06 15:52:11.138470+03:00</dumped>
  </meta>
@ -153,6 +157,7 @@ class TestProcessCvatXml(TestCase):
        process_cvat_xml(xml_filename, 'img_dir', voc_dir)
        for exp in expected_xmls:
            self.assertTrue(os.path.exists(exp))
            # We should add in some code to parse the resulting xml files
    @mock.patch('utils.voc.converter.log')
    def test_parse_interpolation_xml(self, mock_log):
@ -161,10 +166,19 @@ class TestProcessCvatXml(TestCase):
            file.write(XML_INTERPOLATION_EXAMPLE)
        voc_dir = os.path.join(self.test_dir, 'voc_dir')
-        expected_warn = 'Cannot parse interpolation tracks, ignoring 2 tracks'
+
        frames = [0, 1, 2, 110, 111, 112 ]
        expected_xmls = [os.path.join(voc_dir, 'interpolations_%08d.xml' % x )
                         for x in frames]
        process_cvat_xml(xml_filename, 'img_dir', voc_dir)
        self.assertTrue(os.path.exists(voc_dir))
-        self.assertTrue(len(os.listdir(voc_dir)) == 0)
+        self.assertTrue(len(os.listdir(voc_dir)) == len(frames))
-        mock_log.warn.assert_called_once_with(expected_warn)
+        for exp in expected_xmls:
            self.assertTrue(os.path.exists(exp))
            # We should add in some code to parse the resulting xml files