log ffmpeg command line & export interpolation to VOC (#312)

main
jrjbertram 7 years ago committed by Nikita Manovich
parent df10fe19e2
commit cc55c30794

@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Ability to rotate images/video in the client part (Ctrl+R, Shift+Ctrl+R shortcuts) (#305) - Ability to rotate images/video in the client part (Ctrl+R, Shift+Ctrl+R shortcuts) (#305)
- The ReID application for automatic bounding box merging has been added (#299) - The ReID application for automatic bounding box merging has been added (#299)
- Keyboard shortcuts to switch next/previous default shape type (box, polygon etc) [Alt + <, Alt + >] (#316) - Keyboard shortcuts to switch next/previous default shape type (box, polygon etc) [Alt + <, Alt + >] (#316)
- Converter for VOC now supports interpolation tracks
### Changed ### Changed
- Propagation setup has been moved from settings to bottom player panel - Propagation setup has been moved from settings to bottom player panel

@ -283,6 +283,8 @@ class _FrameExtractor:
ff = FFmpeg( ff = FFmpeg(
inputs = {source_path: None}, inputs = {source_path: None},
outputs = {target_path: output_opts}) outputs = {target_path: output_opts})
slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd))
ff.run() ff.run()
def getframepath(self, k): def getframepath(self, k):

@ -6,9 +6,9 @@ Given a CVAT XML and a directory with the image dataset, this script reads the
CVAT XML and writes the annotations in PASCAL VOC format into a given CVAT XML and writes the annotations in PASCAL VOC format into a given
directory. directory.
This implementation only supports bounding boxes in CVAT annotation format, and This implementation supports both interpolation tracks from video and
warns if it encounter any tracks or annotations that are not bounding boxes, annotated images. If it encounters any tracks or annotations that are
ignoring them in both cases. not bounding boxes, it ignores them.
""" """
import os import os
@ -56,38 +56,97 @@ def process_cvat_xml(xml_file, image_dir, output_dir):
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
cvat_xml = etree.parse(xml_file) cvat_xml = etree.parse(xml_file)
tracks = [(x.get('id'), x.get('label')) basename = os.path.splitext( os.path.basename( xml_file ) )[0]
for x in cvat_xml.findall('track')]
if tracks: tracks= cvat_xml.findall( './/track' )
log.warn('Cannot parse interpolation tracks, ignoring {} tracks'.format(len(tracks)))
if (tracks is not None) and (len(tracks) > 0):
for img_tag in cvat_xml.findall('image'): frames = {}
image_name = img_tag.get('name')
width = img_tag.get('width') for track in tracks:
height = img_tag.get('height') trackid = int(track.get("id"))
image_path = os.path.join(image_dir, image_name) label = track.get("label")
if not os.path.exists(image_path): boxes = track.findall( './box' )
log.warn('{} image cannot be found. Is `{}` image directory correct?'. for box in boxes:
format(image_path, image_dir)) frameid = int(box.get('frame'))
writer = Writer(image_path, width, height) outside = int(box.get('outside'))
#occluded = int(box.get('occluded')) #currently unused
unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS) #keyframe = int(box.get('keyframe')) #currently unused
if unknown_tags: xtl = float(box.get('xtl'))
log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags)) ytl = float(box.get('ytl'))
xbr = float(box.get('xbr'))
for box in img_tag.findall('box'): ybr = float(box.get('ybr'))
label = box.get('label')
xmin = float(box.get('xtl')) frame = frames.get( frameid, {} )
ymin = float(box.get('ytl'))
xmax = float(box.get('xbr')) if outside == 0:
ymax = float(box.get('ybr')) frame[ trackid ] = { 'xtl': xtl, 'ytl': ytl, 'xbr': xbr, 'ybr': ybr, 'label': label }
writer.addObject(label, xmin, ymin, xmax, ymax) frames[ frameid ] = frame
anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml') width = int(cvat_xml.find('.//original_size/width').text)
anno_dir = os.path.dirname(os.path.join(output_dir, image_name)) height = int(cvat_xml.find('.//original_size/height').text)
os.makedirs(anno_dir, exist_ok=True)
writer.save(os.path.join(anno_dir, anno_name)) # Spit out a list of each object for each frame
for frameid in sorted(frames.keys()):
#print( frameid )
image_name = "%s_%08d.jpg" % (basename, frameid)
image_path = os.path.join(image_dir, image_name)
if not os.path.exists(image_path):
log.warn('{} image cannot be found. Is `{}` image directory correct?'.
format(image_path, image_dir))
writer = Writer(image_path, width, height)
frame = frames[frameid]
objids = sorted(frame.keys())
for objid in objids:
box = frame[objid]
label = box.get('label')
xmin = float(box.get('xtl'))
ymin = float(box.get('ytl'))
xmax = float(box.get('xbr'))
ymax = float(box.get('ybr'))
writer.addObject(label, xmin, ymin, xmax, ymax)
anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml')
anno_dir = os.path.dirname(os.path.join(output_dir, image_name))
os.makedirs(anno_dir, exist_ok=True)
writer.save(os.path.join(anno_dir, anno_name))
else:
for img_tag in cvat_xml.findall('image'):
image_name = img_tag.get('name')
width = img_tag.get('width')
height = img_tag.get('height')
image_path = os.path.join(image_dir, image_name)
if not os.path.exists(image_path):
log.warn('{} image cannot be found. Is `{}` image directory correct?'.
format(image_path, image_dir))
writer = Writer(image_path, width, height)
unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS)
if unknown_tags:
log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags))
for box in img_tag.findall('box'):
label = box.get('label')
xmin = float(box.get('xtl'))
ymin = float(box.get('ytl'))
xmax = float(box.get('xbr'))
ymax = float(box.get('ybr'))
writer.addObject(label, xmin, ymin, xmax, ymax)
anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml')
anno_dir = os.path.dirname(os.path.join(output_dir, image_name))
os.makedirs(anno_dir, exist_ok=True)
writer.save(os.path.join(anno_dir, anno_name))
def main(): def main():

@ -103,6 +103,10 @@ XML_INTERPOLATION_EXAMPLE = """<?xml version="1.0" encoding="utf-8"?>
<username>admin</username> <username>admin</username>
<email></email> <email></email>
</owner> </owner>
<original_size>
<width>1024</width>
<height>768</height>
</original_size>
</task> </task>
<dumped>2018-06-06 15:52:11.138470+03:00</dumped> <dumped>2018-06-06 15:52:11.138470+03:00</dumped>
</meta> </meta>
@ -153,6 +157,7 @@ class TestProcessCvatXml(TestCase):
process_cvat_xml(xml_filename, 'img_dir', voc_dir) process_cvat_xml(xml_filename, 'img_dir', voc_dir)
for exp in expected_xmls: for exp in expected_xmls:
self.assertTrue(os.path.exists(exp)) self.assertTrue(os.path.exists(exp))
# We should add in some code to parse the resulting xml files
@mock.patch('utils.voc.converter.log') @mock.patch('utils.voc.converter.log')
def test_parse_interpolation_xml(self, mock_log): def test_parse_interpolation_xml(self, mock_log):
@ -161,10 +166,19 @@ class TestProcessCvatXml(TestCase):
file.write(XML_INTERPOLATION_EXAMPLE) file.write(XML_INTERPOLATION_EXAMPLE)
voc_dir = os.path.join(self.test_dir, 'voc_dir') voc_dir = os.path.join(self.test_dir, 'voc_dir')
expected_warn = 'Cannot parse interpolation tracks, ignoring 2 tracks'
frames = [0, 1, 2, 110, 111, 112 ]
expected_xmls = [os.path.join(voc_dir, 'interpolations_%08d.xml' % x )
for x in frames]
process_cvat_xml(xml_filename, 'img_dir', voc_dir) process_cvat_xml(xml_filename, 'img_dir', voc_dir)
self.assertTrue(os.path.exists(voc_dir)) self.assertTrue(os.path.exists(voc_dir))
self.assertTrue(len(os.listdir(voc_dir)) == 0) self.assertTrue(len(os.listdir(voc_dir)) == len(frames))
mock_log.warn.assert_called_once_with(expected_warn) for exp in expected_xmls:
self.assertTrue(os.path.exists(exp))
# We should add in some code to parse the resulting xml files

Loading…
Cancel
Save