YOLO adapted version of PASCAL VOC converter.py (#454)
parent
4298166403
commit
a3116488b8
@ -0,0 +1,38 @@
|
|||||||
|
# Utility for converting CVAT XML annotation file to YOLO format
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Given a CVAT XML, this script reads the CVAT XML and writes the
|
||||||
|
annotations in YOLO format into a given directory. This implementation
|
||||||
|
supports both interpolation tracks from video and annotated images.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
Install necessary packages and create a virtual environment.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y --no-install-recommends python3-pip python3-venv python3-dev
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 -m venv .env
|
||||||
|
. .env/bin/activate
|
||||||
|
cat requirements.txt | xargs -n 1 -L 1 pip install
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Run the script inside the virtual environment:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python converter.py --cvat-xml </path/to/cvat/xml> --image-dir </path/to/images> --output-dir </path/to/output/directory>
|
||||||
|
```
|
||||||
|
|
||||||
|
Case you need download frames from annotated video file submited to CVAT:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python converter.py --cvat-xml </path/to/cvat/xml> --output-dir </path/to/output/directory> --username <CVAT Username> --password <CVAT Password>
|
||||||
|
```
|
||||||
|
|
||||||
|
Please run `python converter.py --help` for more details.
|
||||||
@ -0,0 +1,264 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
"""
|
||||||
|
Given a CVAT XML, this script reads the CVAT XML and writes the
|
||||||
|
annotations in YOLO format into a given directory.
|
||||||
|
|
||||||
|
This implementation supports both interpolation tracks from video and
|
||||||
|
annotated images.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
import glog as log
|
||||||
|
from lxml import etree
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
"""Parse arguments of command line"""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Convert CVAT XML annotations to YOLO format'
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--cvat-xml', metavar='FILE', required=True,
|
||||||
|
help='input file with CVAT annotation in xml format'
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--image-dir', metavar='DIRECTORY', required=False,
|
||||||
|
help='directory which contains original images'
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--output-dir', metavar='DIRECTORY', required=True,
|
||||||
|
help='directory for output annotations in YOLO format'
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--username', metavar='USERNAME', required=False,
|
||||||
|
help='Username from CVAT Login page, required to download images'
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--password', metavar='PASSWORD', required=False,
|
||||||
|
help='Password from CVAT Login page, required to download images'
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--labels', metavar='ILABELS', required=False,
|
||||||
|
help='Labels (separated by comma) to extract. Example: car,truck,motorcycle'
|
||||||
|
)
|
||||||
|
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def process_cvat_xml(xml_file, image_dir, output_dir,username,password,ilabels):
|
||||||
|
"""
|
||||||
|
Transforms a single XML in CVAT format to YOLO TXT files and download images when not in IMAGE_DIR
|
||||||
|
|
||||||
|
:param xml_file: CVAT format XML
|
||||||
|
:param image_dir: image directory of the dataset
|
||||||
|
:param output_dir: directory of annotations with YOLO format
|
||||||
|
:param username: Username used to login CVAT. Required to download images
|
||||||
|
:param password: Password used to login CVAT. Required to download images
|
||||||
|
:param ilabels: Comma separated ordered labels
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
KNOWN_TAGS = {'box', 'image', 'attribute'}
|
||||||
|
|
||||||
|
if (image_dir is None):
|
||||||
|
image_dir=os.path.join(output_dir,"data/obj")
|
||||||
|
os.makedirs(image_dir, exist_ok=True)
|
||||||
|
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
cvat_xml = etree.parse(xml_file)
|
||||||
|
basename = os.path.splitext( os.path.basename( xml_file ) )[0]
|
||||||
|
current_labels = {}
|
||||||
|
traintxt = ""
|
||||||
|
auto_lbl_count = 0
|
||||||
|
|
||||||
|
if (ilabels is not None):
|
||||||
|
vlabels=ilabels.split(',')
|
||||||
|
for _label in vlabels:
|
||||||
|
current_labels[_label]=auto_lbl_count
|
||||||
|
auto_lbl_count+=1
|
||||||
|
|
||||||
|
tracks= cvat_xml.findall( './/track' )
|
||||||
|
|
||||||
|
if (tracks is not None) and (len(tracks) > 0):
|
||||||
|
frames = {}
|
||||||
|
|
||||||
|
for track in tracks:
|
||||||
|
trackid = int(track.get("id"))
|
||||||
|
label = track.get("label")
|
||||||
|
boxes = track.findall( './box' )
|
||||||
|
for box in boxes:
|
||||||
|
frameid = int(box.get('frame'))
|
||||||
|
outside = int(box.get('outside'))
|
||||||
|
#occluded = int(box.get('occluded')) #currently unused
|
||||||
|
#keyframe = int(box.get('keyframe')) #currently unused
|
||||||
|
xtl = float(box.get('xtl'))
|
||||||
|
ytl = float(box.get('ytl'))
|
||||||
|
xbr = float(box.get('xbr'))
|
||||||
|
ybr = float(box.get('ybr'))
|
||||||
|
|
||||||
|
frame = frames.get( frameid, {} )
|
||||||
|
|
||||||
|
if outside == 0:
|
||||||
|
frame[ trackid ] = { 'xtl': xtl, 'ytl': ytl, 'xbr': xbr, 'ybr': ybr, 'label': label }
|
||||||
|
|
||||||
|
frames[ frameid ] = frame
|
||||||
|
|
||||||
|
width = int(cvat_xml.find('.//original_size/width').text)
|
||||||
|
height = int(cvat_xml.find('.//original_size/height').text)
|
||||||
|
|
||||||
|
taskid = int(cvat_xml.find('.//task/id').text)
|
||||||
|
|
||||||
|
urlsegment = cvat_xml.find(".//segments/segment/url").text
|
||||||
|
urlbase = urlsegment.split("?")[0]
|
||||||
|
|
||||||
|
httpclient = requests.session()
|
||||||
|
httpclient.get(urlbase)
|
||||||
|
|
||||||
|
csrftoken = "none"
|
||||||
|
sessionid = "none"
|
||||||
|
|
||||||
|
# Spit out a list of each object for each frame
|
||||||
|
for frameid in sorted(frames.keys()):
|
||||||
|
image_name = "%s_%08d.jpg" % (basename, frameid)
|
||||||
|
image_path = os.path.join(image_dir, image_name)
|
||||||
|
if not os.path.exists(image_path):
|
||||||
|
if username is None:
|
||||||
|
log.warn('{} image cannot be found. Is `{}` image directory correct?\n'.format(image_path, image_dir))
|
||||||
|
else:
|
||||||
|
log.info('{} image cannot be found. Downloading from task ID {}\n'.format(image_path, taskid))
|
||||||
|
|
||||||
|
if sessionid == "none":
|
||||||
|
if "csrftoken" in httpclient.cookies:
|
||||||
|
csrftoken = httpclient.cookies["csrftoken"]
|
||||||
|
elif "csrf" in httpclient.cookies:
|
||||||
|
csrftoken = httpclient.cookies["csrf"]
|
||||||
|
|
||||||
|
login_data = dict(username=username, password=password,
|
||||||
|
csrfmiddlewaretoken=csrftoken, next='/dashboard')
|
||||||
|
|
||||||
|
urllogin = urlbase+"/auth/login"
|
||||||
|
httpclient.post(urllogin, data=login_data,
|
||||||
|
headers=dict(Referer=urllogin))
|
||||||
|
|
||||||
|
if ("sessionid" in httpclient.cookies):
|
||||||
|
sessionid = httpclient.cookies["sessionid"]
|
||||||
|
|
||||||
|
url = urlbase+"/api/v1/tasks/"+str(taskid)+"/frames/"+ str(frameid)
|
||||||
|
|
||||||
|
req = httpclient.get(url, headers=dict(
|
||||||
|
csrftoken=csrftoken, sessionid=sessionid))
|
||||||
|
|
||||||
|
with open(image_path, 'wb') as fo:
|
||||||
|
fo.write(req.content)
|
||||||
|
print('Url saved as %s\n' % image_path)
|
||||||
|
|
||||||
|
|
||||||
|
frame = frames[frameid]
|
||||||
|
|
||||||
|
_yoloAnnotationContent=""
|
||||||
|
|
||||||
|
objids = sorted(frame.keys())
|
||||||
|
|
||||||
|
for objid in objids:
|
||||||
|
|
||||||
|
box = frame[objid]
|
||||||
|
|
||||||
|
label = box.get('label')
|
||||||
|
xmin = float(box.get('xtl'))
|
||||||
|
ymin = float(box.get('ytl'))
|
||||||
|
xmax = float(box.get('xbr'))
|
||||||
|
ymax = float(box.get('ybr'))
|
||||||
|
|
||||||
|
if not label in current_labels:
|
||||||
|
current_labels[label] = auto_lbl_count
|
||||||
|
auto_lbl_count+=1
|
||||||
|
|
||||||
|
labelid=current_labels[label]
|
||||||
|
yolo_x= (xmin + ((xmax-xmin)/2))/width
|
||||||
|
yolo_y= (ymin + ((ymax-ymin)/2))/height
|
||||||
|
yolo_w = (xmax - xmin) / width
|
||||||
|
yolo_h = (ymax - ymin) / height
|
||||||
|
|
||||||
|
if len(_yoloAnnotationContent) != 0:
|
||||||
|
_yoloAnnotationContent += "\n"
|
||||||
|
|
||||||
|
_yoloAnnotationContent+=str(labelid)+" "+"{:.6f}".format(yolo_x) +" "+"{:.6f}".format(yolo_y) +" "+"{:.6f}".format(yolo_w) +" "+"{:.6f}".format(yolo_h)
|
||||||
|
anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.txt')
|
||||||
|
anno_path = os.path.join(image_dir, anno_name)
|
||||||
|
|
||||||
|
_yoloFile = open(anno_path, "w", newline="\n")
|
||||||
|
_yoloFile.write(_yoloAnnotationContent)
|
||||||
|
_yoloFile.close()
|
||||||
|
|
||||||
|
if len(traintxt)!=0:
|
||||||
|
traintxt+="\n"
|
||||||
|
|
||||||
|
traintxt+=image_path
|
||||||
|
|
||||||
|
else:
|
||||||
|
for img_tag in cvat_xml.findall('image'):
|
||||||
|
image_name = img_tag.get('name')
|
||||||
|
width = img_tag.get('width')
|
||||||
|
height = img_tag.get('height')
|
||||||
|
image_path = os.path.join(image_dir, image_name)
|
||||||
|
if not os.path.exists(image_path):
|
||||||
|
log.warn('{} image cannot be found. Is `{}` image directory correct?'.
|
||||||
|
format(image_path, image_dir))
|
||||||
|
|
||||||
|
unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS)
|
||||||
|
if unknown_tags:
|
||||||
|
log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags))
|
||||||
|
|
||||||
|
_yoloAnnotationContent = ""
|
||||||
|
|
||||||
|
for box in img_tag.findall('box'):
|
||||||
|
label = box.get('label')
|
||||||
|
xmin = float(box.get('xtl'))
|
||||||
|
ymin = float(box.get('ytl'))
|
||||||
|
xmax = float(box.get('xbr'))
|
||||||
|
ymax = float(box.get('ybr'))
|
||||||
|
|
||||||
|
if not label in current_labels:
|
||||||
|
current_labels[label] = auto_lbl_count
|
||||||
|
auto_lbl_count += 1
|
||||||
|
|
||||||
|
labelid = current_labels[label]
|
||||||
|
yolo_x = (xmin + ((xmax-xmin)/2))/width
|
||||||
|
yolo_y = (ymin + ((ymax-ymin)/2))/height
|
||||||
|
yolo_w = (xmax - xmin) / width
|
||||||
|
yolo_h = (ymax - ymin) / height
|
||||||
|
|
||||||
|
if len(_yoloAnnotationContent) != 0:
|
||||||
|
_yoloAnnotationContent += "\n"
|
||||||
|
|
||||||
|
_yoloAnnotationContent += str(labelid)+" "+"{:.6f}".format(yolo_x) + " "+"{:.6f}".format(
|
||||||
|
yolo_y) + " "+"{:.6f}".format(yolo_w) + " "+"{:.6f}".format(yolo_h)
|
||||||
|
|
||||||
|
anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.txt')
|
||||||
|
anno_path = os.path.join(image_dir, anno_name)
|
||||||
|
|
||||||
|
_yoloFile = open(anno_path, "w", newline="\n")
|
||||||
|
_yoloFile.write(_yoloAnnotationContent)
|
||||||
|
_yoloFile.close()
|
||||||
|
|
||||||
|
traintxt_file=open(output_dir+"/train.txt","w",newline="\n")
|
||||||
|
traintxt_file.write(traintxt)
|
||||||
|
traintxt_file.close()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parse_args()
|
||||||
|
process_cvat_xml(args.cvat_xml, args.image_dir, args.output_dir, args.username,args.password,args.labels)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -0,0 +1,4 @@
|
|||||||
|
argparse>=1.1
|
||||||
|
lxml>=3.5.0
|
||||||
|
glog>=0.3.1
|
||||||
|
requests==2.22.0
|
||||||
Loading…
Reference in New Issue