Remove deprecated utils (#1477)

* removed deprecated convert scripts * updated changelog
6 years ago · f4ae611392
parent 3d4e7268e1
commit f4ae611392
25 changed files with 3 additions and 1930 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -17,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 -

 ### Removed
-
+- Annotation convertation utils, currently supported natively via Datumaro framework (https://github.com/opencv/cvat/pull/1477)

 ### Fixed
 - Auto annotation, TF annotation and Auto segmentation apps (https://github.com/opencv/cvat/pull/1409)
--- a/utils/README.md
+++ b/utils/README.md
@ -4,8 +4,5 @@
 ## Description

 This folder contains some useful utilities for Computer Vision Annotation Tool (CVAT). To read about a certain utility please choose a link:
- [Convert CVAT XML to PASCAL VOC](voc/converter.md)
- [Convert CVAT XML to MS COCO](coco/converter.md)
- [Convert CVAT XML to PNG mask](mask/converter.md)
- [Convert CVAT XML to TFRECORDS](tfrecords/converter.md)
- [Convert CVAT XML to YOLO](yolo/converter.md)
+  [Auto Annotation Runner](auto_annotation/README.md)
+- [Command line interface for working with CVAT tasks](cli/README.md)
--- a/utils/coco/init.py
+++ b/utils/coco/init.py
--- a/utils/coco/converter.md
+++ b/utils/coco/converter.md
@ -1,58 +0,0 @@
-# Utility for converting CVAT XML annotation file to MS COCO json format
-
-## Description
-
-This utility gets annotation obtained from CVAT and converts to annotation in COCO representation. Input annotation must contain segmentation because bounding boxes of objects are calculated from its segments.
-
-## Installation
-
-Install necessary packages and create a virtual environment.
-
-```bash
-$ sudo apt-get update
-$ sudo apt-get install -y --no-install-recommends python3-pip python3-venv python3-dev python3-tk libgtk-3-dev gcc
-```
-
-```
-$ python3 -m venv .env
-$ . .env/bin/activate
-$ cat ../requirements.txt requirements.txt | xargs -n 1 -L 1 pip install
-```
-
-## Usage
-
-Run the script inside the virtual environment.
-
-```bash
-python converter.py --cvat-xml </path/to/cvat/annotation.xml> --output </path/to/output/coco/annotation.json> --image-dir </path/to/directory/with/images> --labels </path/to/file/with/labels.txt> --draw </path/to/save/directory> --draw_labels --use_background_label
-```
-
-Please run `python converter.py --help` for more details.
-
-#### Labels
-If '--labels' argument is used, the script gets names of labels from a file. If file with labels is not defined, the script parses input annotation and find field `labels` to find which labels are presented. File with labels should include labels in one string separated by spaces or one label per string and also their combinations. For example:
-```
-label1 label2
-label3
-```
-
-## Merge several annotations in COCO representation into one
-
-Run the script `merge_annotations.py`
-
-```bash
-python merge_annotations.py --input-dir /path/to/directory/with/datasets --output /path/to/result/annotation.json --images-map /path/to/file/with/matched/datasets/and/images.txt --draw /path/to/directory/where/save/images
-```
-
-Please run `python merge_annotations.py --help` for more details.
-
-Example of a file for `--images-map`:
-
-```bash
-{
-    "dataset1_part1.json": "images/dataset1/part1",
-    "dataset1_part2.json": "images/dataset1/part2",
-    "dataset2_part1.json": "images/dataset2/part1",
-    "dataset2_part2.json": "images/dataset2/part2"
-}
-```
--- a/utils/coco/converter.py
+++ b/utils/coco/converter.py
@ -1,493 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright (C) 2018 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from __future__ import absolute_import, division, print_function
-
-import argparse
-import glog as log
-import numpy as np
-import os.path as osp
-import json
-import cv2
-import sys
-from lxml import etree
-from tqdm import tqdm
-from skimage import measure
-from pycocotools import mask as mask_util
-from pycocotools import coco as coco_loader
-
-
-def parse_args():
-    """Parse arguments of command line"""
-    parser = argparse.ArgumentParser(
-        description='Convert CVAT annotation with instance segmentation ''to COCO representation'
-    )
-    parser.add_argument(
-        '--cvat-xml', required=True,
-        help='input file with CVAT annotation in *.xml format'
-    )
-    parser.add_argument(
-        '--output', default=None,
-        help='output annotation file. If not defined, the output file name will be created from input file name'
-    )
-    parser.add_argument(
-        '--image-dir', required=True,
-        help='directory with images from annotation'
-    )
-    parser.add_argument(
-        '--labels', default=None,
-        help='path to file with labels'
-    )
-    parser.add_argument(
-        '--draw', default=None,
-        help='directory to save images with its segments. By default is disabled'
-    )
-    parser.add_argument(
-        '--draw_labels', action='store_true',
-        help='insert in output images labels of objects. By default is false'
-    )
-    parser.add_argument(
-        '--use_background_label', action='store_true',
-        help='insert in output annotation objects with label \'background\'. By default is false'
-    )
-    parser.add_argument(
-        '--polygon-area-threshold', type=int, default=1,
-        help='polygons with area less than this value will be ignored. By default set to 1'
-    )
-    return parser.parse_args()
-
-
-def mask_to_polygon(mask, tolerance=1.0, area_threshold=1):
-    """Convert object's mask to polygon [[x1,y1, x2,y2 ...], [...]]
-    Args:
-        mask: object's mask presented as 2D array of 0 and 1
-        tolerance: maximum distance from original points of polygon to approximated
-        area_threshold: if area of a polygon is less than this value, remove this small object
-    """
-    polygons = []
-    # pad mask with 0 around borders
-    padded_mask = np.pad(mask, pad_width=1, mode='constant', constant_values=0)
-    contours = measure.find_contours(padded_mask, 0.5)
-    # Fix coordinates after padding
-    contours = np.subtract(contours, 1)
-    for contour in contours:
-        if not np.array_equal(contour[0], contour[-1]):
-            contour = np.vstack((contour, contour[0]))
-        contour = measure.approximate_polygon(contour, tolerance)
-        if len(contour) > 2:
-            contour = np.flip(contour, axis=1)
-            reshaped_contour = []
-            for xy in contour:
-                reshaped_contour.append(xy[0])
-                reshaped_contour.append(xy[1])
-            for i in range(0, len(reshaped_contour)):
-                if reshaped_contour[i] < 0:
-                    reshaped_contour[i] = 0
-            # Check if area of a polygon is enough
-            rle = mask_util.frPyObjects([reshaped_contour], mask.shape[0], mask.shape[1])
-            area = mask_util.area(rle)
-            if sum(area) > area_threshold:
-                polygons.append(reshaped_contour)
-    return polygons
-
-
-def draw_polygons(polygons, img_name, input_dir, output_dir, draw_labels):
-    """Draw on image contours of its objects and save
-    Args:
-        polygons: all objects on image represented as 2D array of objects' contours
-        img_name: name of image file
-        input_dir: path to directory with images from annotation
-        output_dir: directory to save images
-    """
-    name = osp.basename(img_name)
-    input_file = osp.join(input_dir, name)
-    output_file = osp.join(output_dir, name)
-    img = cv2.imread(input_file)
-    yellow = (0, 255, 255)
-    red = (0, 0, 255)
-    for poly in polygons:
-        label = poly['label']
-        _, bbox = polygon_area_and_bbox(poly['points'], img.shape[0], img.shape[1])
-        for j in range(0, len(poly['points'])):
-            i = 0
-            points = []
-            while i < len(poly['points'][j]):
-                x = int(poly['points'][j][i])
-                y = int(poly['points'][j][i + 1])
-                points.append([x, y])
-                i += 2
-            bbox = [int(value) for value in bbox]
-            img = cv2.polylines(img, np.int32([points]), True, yellow, 1)
-            img = cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), red, 2)
-            if draw_labels:
-                x = bbox[0] + bbox[2] // 4
-                y = bbox[1] + bbox[3] // 2
-                cv2.putText(img, label, (x, y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, red, 1)
-    cv2.imwrite(output_file, img)
-
-
-def fix_segments_intersections(polygons, height, width, img_name, use_background_label,
-                               threshold=0.0, ratio_tolerance=0.001, area_threshold=1):
-    """Find all intersected regions and crop contour for back object by objects which
-        are in front of the first one. It is related to a specialty of segmentation
-        in CVAT annotation. Intersection is calculated via function 'iou' from cocoapi
-    Args:
-        polygons: all objects on image represented as 2D array of objects' contours
-        height: height of image
-        width: width of image
-        img_name: name of image file
-        threshold: threshold of intersection over union of two objects.
-            By default is set to 0 and processes any two intersected objects
-        ratio_tolerance: used for situation when one object is fully or almost fully
-            inside another one and we don't want make "hole" in one of objects
-    """
-    converted_polygons = []
-    empty_polygon = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
-    # Convert points of polygons from string to coco's array.
-    # All polygons must be sorted in order from bottom to top
-    for polygon in polygons:
-        label = polygon['label']
-        points = polygon['points'].split(';')
-        new_polygon = []
-        for xy in points:
-            x = float(xy.split(',')[0])
-            y = float(xy.split(',')[1])
-            new_polygon.append(x)
-            new_polygon.append(y)
-        converted_polygons.append({'label': label, 'points': new_polygon})
-
-    for i in range(0, len(converted_polygons)):
-        rle_bottom = mask_util.frPyObjects([converted_polygons[i]['points']], height, width)
-        segment_overlapped = False
-        for j in range(i + 1, len(converted_polygons)):
-            rle_top = mask_util.frPyObjects([converted_polygons[j]['points']], height, width)
-            iou = mask_util.iou(rle_bottom, rle_top, [0, 0])
-            area_top = sum(mask_util.area(rle_top))
-            area_bottom = sum(mask_util.area(rle_bottom))
-            if area_bottom == 0:
-                continue
-            area_ratio = area_top / area_bottom
-            sum_iou = sum(iou)
-
-            # If segment is fully inside another one, save this segment as is
-            if area_ratio - ratio_tolerance < sum_iou[0] < area_ratio + ratio_tolerance:
-                continue
-            # Check situation when bottom segment is fully inside top.
-            # It means that in annotation is mistake. Save this segment as is
-            if 1 / area_ratio - ratio_tolerance < sum_iou[0] < 1 / area_ratio + ratio_tolerance:
-                continue
-
-            if sum_iou[0] > threshold:
-                segment_overlapped = True
-                bottom_mask = np.array(mask_util.decode(rle_bottom), dtype=np.uint8)
-                top_mask = np.array(mask_util.decode(rle_top), dtype=np.uint8)
-
-                bottom_mask = np.subtract(bottom_mask, top_mask)
-                bottom_mask[bottom_mask > 1] = 0
-
-                bottom_mask = np.sum(bottom_mask, axis=2)
-                bottom_mask = np.array(bottom_mask > 0, dtype=np.uint8)
-                converted_polygons[i]['points'] = mask_to_polygon(bottom_mask, area_threshold=area_threshold)
-                # If some segment is empty, do small fix to avoid error in cocoapi function
-                if len(converted_polygons[i]['points']) == 0:
-                    converted_polygons[i]['points'] = [empty_polygon]
-                rle_bottom = mask_util.frPyObjects(converted_polygons[i]['points'], height, width)
-        if not segment_overlapped:
-            converted_polygons[i]['points'] = [converted_polygons[i]['points']]
-
-    output_polygons = []
-    for i in range(0, len(converted_polygons)):
-        if not use_background_label and converted_polygons[i]['label'] == 'background':
-            continue
-        poly_len = len(converted_polygons[i]['points'])
-        if poly_len == 0 or converted_polygons[i]['points'] == [empty_polygon]:
-            log.warning('Image <{}> has an empty polygon with label <{}>. '
-                        'Perhaps there is a mistake in annotation'.
-                        format(img_name, converted_polygons[i]['label']))
-        else:
-            output_polygons.append(converted_polygons[i])
-
-    return output_polygons
-
-
-def polygon_area_and_bbox(polygon, height, width):
-    """Calculate area of object's polygon and bounding box around it
-    Args:
-        polygon: objects contour represented as 2D array
-        height: height of object's region (use full image)
-        width: width of object's region (use full image)
-    """
-    rle = mask_util.frPyObjects(polygon, height, width)
-    area = mask_util.area(rle)
-    bbox = mask_util.toBbox(rle)
-    bbox = [min(bbox[:, 0]),
-            min(bbox[:, 1]),
-            max(bbox[:, 0] + bbox[:, 2]) - min(bbox[:, 0]),
-            max(bbox[:, 1] + bbox[:, 3]) - min(bbox[:, 1])]
-    return area, bbox
-
-
-def insert_license_data(result_annotation):
-    """Fill license fields in annotation by blank data
-    Args:
-        result_annotation: output annotation in COCO representation
-    """
-    result_annotation['licenses'].append({
-        'name': '',
-        'id': 0,
-        'url': ''
-    })
-
-
-def insert_info_data(xml_root, result_annotation):
-    """Fill available information of annotation
-    Args:
-        xml_root: root for xml parser
-        result_annotation: output annotation in COCO representation
-    """
-    log.info('Reading information data...')
-    version = ''
-    date = ''
-    description = ''
-    year = ''
-    for child in xml_root:
-        if child.tag == 'version':
-            version = child.text
-        if child.tag == 'meta':
-            for task in child:
-                for entry in task:
-                    if entry.tag == 'name':
-                        description = entry.text
-                    if entry.tag == 'created':
-                        date = entry.text
-    date = date.split(' ')[0]
-    year = date.split('-')[0]
-    result_annotation['info'] = {
-        'contributor': '',
-        'date_created': date,
-        'description': description,
-        'url': '',
-        'version': version,
-        'year': year
-    }
-    log.info('Found the next information data: {}'.format(result_annotation['info']))
-
-
-def insert_categories_data(xml_root, use_background_label, result_annotation, labels_file=None):
-    """Get labels from input annotation and fill categories field in output annotation
-    Args:
-        xml_root: root for xml parser
-        use_background_label: key to enable using label background
-        result_annotation: output annotation in COCO representation
-        labels_file: path to file with labels names.
-                     If not defined, parse annotation to get labels names
-    """
-    def get_categories(names, bg_found, use_background_label, sort=False):
-        bg_used = False
-        category_map = {}
-        categories = []
-        # Sort labels by its names to make the same order of ids for different annotations
-        if sort:
-            names.sort()
-        # Always use id = 0 for background
-        if bg_found and use_background_label:
-            category_map['background'] = 0
-            bg_used = True
-        cat_id = 1
-        # Define id for all labels beginning from 1
-        for name in names:
-            if name == 'background':
-                continue
-            category_map[name] = cat_id
-            categories.append({'id': cat_id, 'name': name, 'supercategory': ''})
-            cat_id += 1
-        return category_map, categories, bg_used
-
-    categories = []
-    category_map = {}
-    label_names = []
-    bg_found = False
-    bg_used = False
-
-    if labels_file is None:
-        log.info('Reading labels from annotation...')
-        for label in xml_root.iter('label'):
-            for name in label.findall("./name"):
-                if name.text == 'background':
-                    bg_found = True
-                    continue
-                label_names.append(name.text)
-        if len(label_names) == 0:
-            log.info('Labels in annotation were not found. Please use \'--labels\' argument to define file with labels.')
-        else:
-            category_map, categories, bg_used = get_categories(label_names, bg_found, use_background_label, sort=True)
-    else:
-        log.info('Parsing labels from file <{}>...'.format(labels_file))
-        with open(labels_file, 'r') as file:
-            string = '  '
-            while string != '' and string != '\n':
-                string = file.readline()
-                labels = string.split(' ')
-                for label in labels:
-                    if label == '\n' or label == '':
-                        continue
-                    label = label.replace('\n', '')
-                    if label == 'background':
-                        bg_found = True
-                        continue
-                    label_names.append(label)
-            category_map, categories, bg_used = get_categories(label_names, bg_found, use_background_label)
-
-    if len(categories) == 0:
-        raise ValueError('Categories list is empty. Something wrong.')
-
-    result_annotation['categories'] = categories
-    log.info('Found the next labels: {}'.format(category_map))
-    if bg_found and not bg_used:
-        log.warning('Label <background> was found but not used. '
-                    'To enable it should use command line argument [--use_background_label]')
-    return category_map
-
-
-def insert_image_data(image, result_annotation):
-    """Get data from input annotation for image and fill fields for this image in output annotation
-    Args:
-        image: dictionary with data for image from original annotation
-        path_to_images: path to directory with images from annotation
-        result_annotation: output annotation in COCO representation
-    """
-    new_img = {}
-    new_img['coco_url'] = ''
-    new_img['date_captured'] = ''
-    new_img['flickr_url'] = ''
-    new_img['license'] = 0
-    new_img['id'] = image['id']
-    new_img['file_name'] = osp.basename(image['name'])
-    new_img['height'] = int(image['height'])
-    new_img['width'] = int(image['width'])
-    result_annotation['images'].append(new_img)
-
-
-def insert_annotation_data(image, category_map, segm_id, object, img_dims, result_annotation):
-    """Get data from input annotation for object and fill fields for this object in output annotation
-    Args:
-        image: dictionary with data for image from input CVAT annotation
-        category_map: map for categories represented in the annotation {name: id}
-        segm_id: identificator of current object
-        object: includes data for the object [label, polygon]
-        img_dims: dimensions of image [height, width]
-        result_annotation: output annotation in COCO representation
-    """
-    new_anno = {}
-    new_anno['category_id'] = category_map[object['label']]
-    new_anno['id'] = segm_id
-    new_anno['image_id'] = image['id']
-    new_anno['iscrowd'] = 0
-    new_anno['segmentation'] = object['points']
-    area, bbox = polygon_area_and_bbox(object['points'], img_dims[0], img_dims[1])
-    new_anno['area'] = float(np.sum(area))
-    new_anno['bbox'] = bbox
-    result_annotation['annotations'].append(new_anno)
-
-
-def main():
-    args = parse_args()
-    xml_file_name = args.cvat_xml
-    if args.output is not None:
-        output_file_name = args.output
-    else:
-        output_file_name = args.cvat_xml.split('.xml')[0] + '.json'
-        log.info('Output file name set to: {}'.format(output_file_name))
-    root = etree.parse(xml_file_name).getroot()
-
-    if args.draw is not None:
-        log.info('Draw key was enabled. Images will be saved in directory <{}>'.format(args.draw))
-
-    result_annotation = {
-        'licenses': [],
-        'info': {},
-        'categories': [],
-        'images': [],
-        'annotations': []
-    }
-
-    insert_license_data(result_annotation)
-    insert_info_data(root, result_annotation)
-    category_map = insert_categories_data(root, args.use_background_label, result_annotation, labels_file=args.labels)
-
-    if len(category_map) == 0:
-        sys.exit('Labels were not found. Be sure that annotation <{}> includes field <labels> or '
-                 'annotation directory includes file <labels.txt>'.format(xml_file_name))
-
-    segm_id = 0
-    z_order_off_counter = 0
-    # Parse original annotation
-    for img in tqdm(root.iter('image'), desc='Processing images from ' + xml_file_name):
-        image = {}
-        for key, value in img.items():
-            image[key] = value
-        img_name = osp.join(args.image_dir, osp.basename(image['name']))
-        if not osp.isfile(img_name):
-            log.warning('Image <{}> is not available'.format(img_name))
-        image['polygon'] = []
-        z_order_on_counter = 0
-        polygon_counter = 0
-        for poly in img.iter('polygon'):
-            polygon = {}
-            for key, value in poly.items():
-                polygon[key] = value
-                if key == 'z_order':
-                    z_order_on_counter += 1
-            polygon_counter += 1
-            image['polygon'].append(polygon)
-        # If at least one of polygons on image does not have field 'z_order' do not sort them
-        if z_order_on_counter == polygon_counter:
-            image['polygon'].sort(key=lambda x: int(x['z_order']))
-        else:
-            z_order_off_counter += 1
-
-        # Create new image
-        image['id'] = int(image['id'])
-        insert_image_data(image, result_annotation)
-        height = result_annotation['images'][-1]['height']
-        width = result_annotation['images'][-1]['width']
-        image['polygon'] = fix_segments_intersections(image['polygon'], height, width,
-                                                      image['name'], args.use_background_label,
-                                                      area_threshold=args.polygon_area_threshold)
-
-        # Create new annotation for this image
-        for poly in image['polygon']:
-            insert_annotation_data(image, category_map, segm_id, poly, [height, width], result_annotation)
-            segm_id += 1
-
-        # Draw contours of objects on image
-        if args.draw is not None:
-            draw_polygons(image['polygon'], image['name'], args.image_dir, args.draw, args.draw_labels)
-
-    log.info('Processed images: {}'.format(len(result_annotation['images'])))
-    log.info('Processed objects: {}'.format(len(result_annotation['annotations'])))
-    if z_order_off_counter > 0:
-        log.warning('Annotation does not have a field \'z_order\' for {} image(s). '
-                    'Overlapped objects may be cropped incorrectly!'. format(z_order_off_counter))
-
-    # Save created annotation
-    log.info('Saving annotation...')
-    with open(output_file_name, 'w') as outfile:
-        json.dump(result_annotation, outfile)
-    log.info('Annotation was saved in <{}> successfully'.format(output_file_name))
-
-    # Try to load created annotation via cocoapi
-    try:
-        log.info('Trying to load annotation <{}> via cocoapi...'.format(output_file_name))
-        coco_loader.COCO(output_file_name)
-    except:
-        raise
-    else:
-        log.info('Conversion <{}> --> <{}> has finished successfully!'.format(xml_file_name, output_file_name))
-
-
-if __name__ == "__main__":
-    main()
--- a/utils/coco/merge_annotations.py
+++ b/utils/coco/merge_annotations.py
@ -1,211 +0,0 @@
-import argparse
-import cv2
-import glog
-import json
-import numpy as np
-import os
-
-from tqdm import tqdm
-
-from pycocotools import coco as coco_loader
-
-
-def parse_args():
-    """Parse arguments of command line"""
-    parser = argparse.ArgumentParser(
-        description='Merge annotations in COCO representation into one'
-    )
-    parser.add_argument(
-        '--input-dir', required=True,
-        help='directory with input annotations in *.json format'
-    )
-    parser.add_argument(
-        '--output', required=True,
-        help='output annotation file'
-    )
-    parser.add_argument(
-        '--images-map', required=True,
-        help='file with map of datasets and its images path (json format)'
-    )
-    parser.add_argument(
-        '--draw', default=None,
-        help='directory to save images with its segments. By default is disabled'
-    )
-    return parser.parse_args()
-
-
-def draw_bboxes_and_masks(img, annotations, input_dir):
-    """ Draw bounding boxes and contours of masks on image and save it.
-    :param img: file name of image (is getting from the same field in annotation)
-    :param annotations: list of bonding boxes and segments on the image
-    :param input_dir: base directory to save images
-    """
-    input_file = os.path.join(input_dir, img['file_name'])
-    save_path = os.path.join(os.path.dirname(input_file), 'draw')
-    if not os.path.exists(save_path):
-        os.makedirs(save_path)
-    output_file = os.path.join(save_path, os.path.basename(input_file))
-
-    img = cv2.imread(input_file)
-
-    yellow = (0, 255, 255)
-    red = (0, 0, 255)
-
-    for ann in annotations:
-        cat_id = str(ann['category_id'])
-        bbox = [int(ann['bbox'][0]), int(ann['bbox'][1]),
-                int(ann['bbox'][0] + ann['bbox'][2]), int(ann['bbox'][1] + ann['bbox'][3])]
-        masks = ann['segmentation']
-
-        for mask in masks:
-            i = 0
-            points = []
-            while i < len(mask):
-                x = int(mask[i])
-                y = int(mask[i + 1])
-                points.append([x, y])
-                i += 2
-            img = cv2.polylines(img, np.int32([points]), True, yellow, 1)
-
-        img = cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), red, 1)
-        x = bbox[0] + (bbox[2] - bbox[0]) // 4
-        y = bbox[1] + (bbox[3] - bbox[1]) // 2
-        cv2.putText(img, cat_id, (x, y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, red, 1)
-    cv2.imwrite(output_file, img)
-
-
-def is_json_file(filename):
-    """ Check if file has a *.json type (just check an extension)
-    :param filename: name of file
-    :return: True if file has a *.json type
-    """
-    return True if filename.lower().endswith('.json') else False
-
-
-def get_anno_list(directory):
-    """ Get list of files in directory
-    :param directory: directory to parse
-    :return: list of files in the directory in format [name1.ext, name2.ext, ...]
-    """
-    files = []
-    for file in os.listdir(directory):
-        if is_json_file(file):
-            files.append(file)
-    return files
-
-
-def pretty_string(name_list):
-    """ Make a string from list of some names
-    :param name_list: list of names [name#0, name#1, ...]
-    :return: string in format:
-              -name#0
-              -name#1
-    """
-    output_string = ''
-    for s in name_list:
-        output_string += '\n -' + s
-    return output_string
-
-
-def common_path_images(images_map):
-    """ Define which part of paths to images is common for all of them
-    :param images_map: dictionary of matched datasets and its images paths. Format:
-                       {
-                           'dataset1.json': '/path/to/images/for/dataset1',
-                           'dataset2.json': '/path/to/images/for/dataset2',
-                            ...
-                        }
-    :return: string with a common part of the images paths
-    """
-    paths = [path for _, path in images_map.items()]
-    return os.path.commonpath(paths)
-
-
-def merge_annotations(directory, anno_list, images_map):
-    """ Merge several annotations in COCO representation into one
-    :param directory: base directory where is saved all datasets which is needed to merge
-    :param anno_list: list of annotations to merge. [dataset1.json, dataset2.json, ...]
-    :param images_map: dictionary of matched datasets and its images paths
-    :return: merged annotation, list of used annotations and list of skipped annotations
-    """
-    merged_anno = None
-    first_step = True
-    reference_classes = None
-    common_path = common_path_images(images_map)
-    valid_annos = []
-    skipped_annos = []
-    for anno_file in tqdm(anno_list, 'Parsing annotations...'):
-        if anno_file not in images_map:
-            glog.warning('Dataset <{}> is absent in \'images-map\' file and will be ignored!'.format(anno_file))
-            skipped_annos.append(anno_file)
-            continue
-        img_prefix = images_map[anno_file].replace(common_path, '')
-        if img_prefix[0] == '/':
-            img_prefix = img_prefix.replace('/', '', 1)
-        with open(os.path.join(directory, anno_file)) as f:
-            data = json.load(f)
-            for img in data['images']:
-                img['file_name'] = os.path.join(img_prefix, img['file_name'])
-            if first_step:
-                merged_anno = data
-                reference_classes = data['categories']
-                first_step = False
-            else:
-                classes = data['categories']
-                if classes != reference_classes:
-                    glog.warning('Categories field in dataset <{}> has another classes and will be ignored!'
-                                 .format(anno_file))
-                    skipped_annos.append(anno_file)
-                    continue
-                add_img_id = len(merged_anno['images'])
-                add_obj_id = len(merged_anno['annotations'])
-                for img in data['images']:
-                    img['id'] += add_img_id
-                for ann in data['annotations']:
-                    ann['id'] += add_obj_id
-                    ann['image_id'] += add_img_id
-                merged_anno['images'].extend(data['images'])
-                merged_anno['annotations'].extend(data['annotations'])
-        valid_annos.append(anno_file)
-    return merged_anno, valid_annos, skipped_annos
-
-
-def main():
-    args = parse_args()
-    anno_list = get_anno_list(args.input_dir)
-    with open(args.images_map) as f:
-        images_map = json.load(f)
-
-    result_annotation, valid_annos, skipped_annos = merge_annotations(args.input_dir, anno_list, images_map)
-
-    assert len(valid_annos) > 0, 'The result annotation is empty! Please check parameters and your \'images_map\' file.'
-
-    # Save created annotation
-    glog.info('Saving annotation...')
-    with open(args.output, 'w') as outfile:
-        json.dump(result_annotation, outfile)
-    glog.info('Annotation was saved in <{}> successfully'.format(args.output))
-
-    # Try to load created annotation via cocoapi
-    try:
-        glog.info('Trying to load annotation <{}> via cocoapi...'.format(args.output))
-        coco_loader.COCO(args.output)
-    except:
-        raise
-    else:
-        glog.info('Annotation in COCO representation <{}> successfully created from: {}'
-                  .format(args.output, pretty_string(valid_annos)))
-        if len(skipped_annos) > 0:
-            glog.info('The next annotations were skipped: {}'.format(pretty_string(skipped_annos)))
-
-    if args.draw:
-        for img in tqdm(result_annotation['images'], 'Drawing and saving images...'):
-            ann_for_img = []
-            for ann in result_annotation['annotations']:
-                if ann['image_id'] == img['id']:
-                    ann_for_img.append(ann)
-            draw_bboxes_and_masks(img, ann_for_img, args.draw)
-
-
-if __name__ == "__main__":
-    main()
--- a/utils/coco/requirements.txt
+++ b/utils/coco/requirements.txt
@ -1,11 +0,0 @@
-wheel
-setuptools
-cython>=0.28.3
-argparse>=1.1
-numpy==1.16.4
-lxml>=3.5.0
-glog>=0.3.1
-tqdm>=4.19.6
-opencv-python>=3.4.0
-scikit-image>=0.14.0
-pycocotools
--- a/utils/mask/init.py
+++ b/utils/mask/init.py
--- a/utils/mask/converter.md
+++ b/utils/mask/converter.md
@ -1,42 +0,0 @@
-# Utility for converting CVAT XML annotation file to PNG masks
-
-## Description
-
-The utility converts CVAT XML file into separate masks for each image. Mask is a png image with one (grayscale) or several (BGR) channels where each pixel has own color which corresponds to a label.
-
-## Installation
-
-Install necessary packages and create a virtual environment.
-
-```bash
-$ sudo apt-get update
-$ sudo apt-get install -y --no-install-recommends python3-pip python3-venv python3-dev python3-tk libgtk-3-dev
-```
-
-```
-$ python3 -m venv .env
-$ . .env/bin/activate
-$ cat ../requirements.txt | xargs -n 1 -L 1 pip install
-```
-
-## Usage
-
-Run the script inside the virtual environment.
-
-
-```bash
-$ python converter.py --cvat-xml </path/to/cvat/annotation.xml> --output-dir <output directory> --mask-bitness 24 --label-color car:255,0,0 --label-color person:0,255,0 --background-color 0,0,0
-```
-
-One more way to run the scripts with arguments is below. It is more convenient when there are a lot of labels. Arguments that are read from a file are read one argument per line.
-```bash
-$ cat labels.txt # an example of file with extra options
--label-color=car:255,0,0
--label-color
-person:0,255,0
--background-color=0,0,0
-
-$ python converter.py --cvat-xml </path/to/cvat/annotation.xml> --output-dir <output directory> --mask-bitness 24 @labels.txt
-```
-
-Please run `python converter.py --help` for more details.
--- a/utils/mask/converter.py
+++ b/utils/mask/converter.py
@ -1,117 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright (C) 2018 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-
-from __future__ import absolute_import, division, print_function
-
-import argparse
-import os
-import glog as log
-import numpy as np
-import cv2
-from lxml import etree
-from tqdm import tqdm
-
-
-def parse_args():
-    """Parse arguments of command line"""
-    parser = argparse.ArgumentParser(
-        fromfile_prefix_chars='@',
-        description='Convert CVAT XML annotations to masks'
-    )
-
-    parser.add_argument(
-        '--cvat-xml', metavar='FILE', required=True,
-        help='input file with CVAT annotation in xml format'
-    )
-
-    parser.add_argument(
-        '--background-color', metavar='COLOR_BGR', default="0,0,0",
-        help='specify background color (by default: 0,0,0)'
-    )
-
-    parser.add_argument(
-        '--label-color', metavar='LABEL:COLOR_BGR', action='append',
-        default=[],
-        help="specify a label's color (e.g. 255 or 255,0,0). The color will " +
-            "be interpreted in accordance with the mask format."
-    )
-
-    parser.add_argument(
-        '--mask-bitness', type=int, choices=[8, 24], default=8,
-        help='choose bitness for masks'
-    )
-
-    parser.add_argument(
-        '--output-dir', metavar='DIRECTORY', required=True,
-        help='directory for output masks'
-    )
-
-    return parser.parse_args()
-
-def parse_anno_file(cvat_xml):
-    root = etree.parse(cvat_xml).getroot()
-    anno = []
-    for image_tag in root.iter('image'):
-        image = {}
-        for key, value in image_tag.items():
-            image[key] = value
-        image['shapes'] = []
-        for poly_tag in image_tag.iter('polygon'):
-            polygon = {'type': 'polygon'}
-            for key, value in poly_tag.items():
-                polygon[key] = value
-            image['shapes'].append(polygon)
-        for box_tag in image_tag.iter('box'):
-            box = {'type': 'box'}
-            for key, value in box_tag.items():
-                box[key] = value
-            box['points'] = "{0},{1};{2},{1};{2},{3};{0},{3}".format(
-                box['xtl'], box['ytl'], box['xbr'], box['ybr'])
-            image['shapes'].append(box)
-
-        image['shapes'].sort(key=lambda x: int(x.get('z_order', 0)))
-        anno.append(image)
-
-    return anno
-
-def create_mask_file(mask_path, width, height, bitness, color_map, background, shapes):
-    mask = np.full((height, width, bitness // 8), background, dtype=np.uint8)
-    for shape in shapes:
-        color = color_map.get(shape['label'], background)
-        points = [tuple(map(float, p.split(','))) for p in shape['points'].split(';')]
-        points = np.array([(int(p[0]), int(p[1])) for p in points])
-
-        mask = cv2.fillPoly(mask, [points], color=color)
-    cv2.imwrite(mask_path, mask)
-
-def to_scalar(str, dim):
-    scalar = list(map(int, str.split(',')))
-    if len(scalar) < dim:
-        scalar.extend([scalar[-1]] * dim)
-    return tuple(scalar[0:dim])
-
-def main():
-    args = parse_args()
-    anno = parse_anno_file(args.cvat_xml)
-
-    color_map = {}
-    dim = args.mask_bitness // 8
-    for item in args.label_color:
-        label, color = item.split(':')
-        color_map[label] = to_scalar(color, dim)
-    background = to_scalar(args.background_color, dim)
-
-    for image in tqdm(anno, desc='Generate masks'):
-        mask_path = os.path.join(args.output_dir, os.path.splitext(image['name'])[0] + '.png')
-        mask_dir = os.path.dirname(mask_path)
-        if mask_dir:
-            os.makedirs(mask_dir, exist_ok=True)
-        create_mask_file(mask_path, int(image['width']), int(image['height']),
-            args.mask_bitness, color_map, background, image['shapes'])
-
-
-if __name__ == "__main__":
-    main()
--- a/utils/mask/requirements.txt
+++ b/utils/mask/requirements.txt
@ -1,9 +0,0 @@
-wheel
-setuptools
-cython>=0.28.3
-argparse>=1.1
-numpy==1.16.4
-lxml>=3.5.0
-glog>=0.3.1
-tqdm>=4.19.6
-opencv-python>=3.4.0
--- a/utils/tfrecords/init.py
+++ b/utils/tfrecords/init.py
--- a/utils/tfrecords/converter.md
+++ b/utils/tfrecords/converter.md
@ -1,77 +0,0 @@
-# Utility for converting CVAT XML annotation file to TFRECORDS format
-
-## Description
-
-Given a CVAT XML and a directory with the image dataset, this script reads the CVAT
-XML and writes the annotations in tfrecords format into a given directory in addition
-to the label map required for the tensorflow object detection API.
-
-This implementation supports **annotated images only**. Make sure to dump the
-**XML annotations and NOT interpolations** from CVAT.
-
-
-## Installation
-
-The conversion script depends on the tensorflow object detection API, 
-for installation steps.
-
-### 1. Install necessary packages (including tensorflow).
-
-```bash
-sudo apt-get update
-sudo apt-get install -y --no-install-recommends python3-pip python3-dev
-```
-
-``` bash
-python3 -m pip install -r requirements.txt
-```
-
-### 2. Install the tensorflow object detection API
- If it's already installed you can check your `$PYTHONPATH`and move on to the usage section. 
- Here's a quick (unofficial) guide on how to do that.
- For more details follow the official guide
- [INSTALL TENSORFLOW OBJECT DETECTION API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md).
- 
-```bash
-# clone the models repository
-git clone https://github.com/tensorflow/models.git
-```
-```bash
-# install some dependencies
-python3 -m pip install --user Cython
-python3 -m pip install --user contextlib2
-python3 -m pip install --user pillow
-python3 -m pip install --user lxml
-python3 -m pip install --user jupyter
-python3 -m pip install --user matplotlib
-```
-```bash
-# clone and compile the cocoapi
-git clone https://github.com/cocodataset/cocoapi.git
-cd cocoapi/PythonAPI
-make
-cp -r pycocotools <path_to_models_repo>/models/research/
-```
-```bash
-# Protobuf Compilation
-cd <path_to_models_repo>/models/research/
-protoc object_detection/protos/*.proto --python_out=.
-```
-```bash
-# setup the PYTHONPATH
-export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim
- ```
-
-## Usage
-
-Run the script.
-
-```bash
-$python3 converter.py --cvat-xml </path/to/cvat/xml> --image-dir </path/to/images>\
-  --output-dir </path/to/output/directory> --attribute <attribute>
-```
-
-Leave `--attribute` argument empty if you want the to consider CVAT labels as tfrecords labels,
-otherwise you can specify a used attribute name like `--attribute <attribute>`.
-
-Please run `python converter.py --help` for more details.
--- a/utils/tfrecords/converter.py
+++ b/utils/tfrecords/converter.py
@ -1,223 +0,0 @@
-#!/usr/bin/env python
-#
-# SPDX-License-Identifier: MIT
-# coding: utf-8
-# -*- coding: utf-8 -*-
-"""
-Given a CVAT XML and a directory with the image dataset, this script reads the
-CVAT XML and writes the annotations in tfrecords into a given
-directory.
-
-This implementation supports annotated images only.
-"""
-from __future__ import unicode_literals
-import xml.etree.ElementTree as ET
-import tensorflow as tf
-from object_detection.utils import dataset_util
-from collections import Counter
-import codecs
-import hashlib
-from pathlib import Path
-import argparse
-import os
-import string
-
-# we need it to filter out non-ASCII characters otherwise
-# trainning will crash
-printable = set(string.printable)
-
-def parse_args():
-    """Parse arguments of command line"""
-    parser = argparse.ArgumentParser(
-        description='Convert CVAT XML annotations to tfrecords format'
-    )
-
-    parser.add_argument(
-        '--cvat-xml', metavar='FILE', required=True,
-        help='input file with CVAT annotation in xml format'
-    )
-
-    parser.add_argument(
-        '--image-dir', metavar='DIRECTORY', required=True,
-        help='directory which contains original images'
-    )
-
-    parser.add_argument(
-        '--output-dir', metavar='DIRECTORY', required=True,
-        help='directory for output annotations in tfrecords format'
-    )
-
-    parser.add_argument(
-        '--train-percentage', metavar='PERCENTAGE', required=False, default=90, type=int,
-        help='the percentage of training data to total data (default: 90)'
-    )
-
-    parser.add_argument(
-        '--min-train', metavar='NUM', required=False, default=10, type=int,
-        help='The minimum number of images above which the label is considered (default: 10)'
-    )
-
-    parser.add_argument(
-        '--attribute', metavar='NAME', required=False, default="",
-        type=str,
-        help='The attribute name based on which the object can identified'
-    )
-
-    return parser.parse_args()
-
-def process_cvat_xml(args):
-  """Transforms a single XML in CVAT format to tfrecords.
-  """
-
-  train_percentage = int(args.train_percentage)
-  assert (train_percentage<=100 and train_percentage>=0)
-
-  cvat_xml = ET.parse(args.cvat_xml).getroot()
-
-  output_dir = Path(args.output_dir)
-  if not output_dir.exists():
-    print("Creating the output directory because it doesn't exist")
-    output_dir.mkdir()
-
-  cvat_name, output_dir, min_train = \
-          args.attribute, output_dir.absolute(), args.min_train
-
-  # Open the tfrecord files for writing
-  writer_train = tf.python_io.TFRecordWriter(
-      os.path.join(output_dir.absolute(), 'train.tfrecord'))
-  writer_eval  = tf.python_io.TFRecordWriter(
-      os.path.join(output_dir.absolute(), 'eval.tfrecord'))
-
-  # extract the object names
-  object_names = []
-  num_imgs = 0
-  for img in cvat_xml.findall('image'):
-        num_imgs += 1
-        for box in img:
-            if cvat_name == "" :
-                obj_name = ''.join(filter(lambda x: x in printable,
-                    box.attrib['label']))
-                object_names.append(obj_name)
-            else :
-                for attribute in box :
-                    if attribute.attrib['name'] == cvat_name :
-                        obj_name = ''.join(filter(lambda x: x in printable,
-                            attribute.text.lower()))
-                        object_names.append(obj_name)
-
-  labels, values = zip(*Counter(object_names).items())
-
-  # Create the label map file
-  saved_dict = dict()
-  reverse_dict = dict()
-  with codecs.open(os.path.join(output_dir,'label_map.pbtxt'),
-            'w', encoding='utf8') as f:
-        counter = 1
-        for iii, label in enumerate(labels):
-            if values[iii] < min_train :
-                continue
-            saved_dict[label] = counter
-            reverse_dict[counter] = label
-            f.write(u'item {\n')
-            f.write(u'\tid: {}\n'.format(counter))
-            f.write(u"\tname: '{}'\n".format(label))
-            f.write(u'}\n\n')
-            counter+=1
-
-  num_iter = num_imgs
-  eval_num = num_iter * (100 - train_percentage) // 100
-  train_num = num_iter - eval_num
-
-
-  for counter,example in enumerate(cvat_xml.findall('image')):
-    tf_example = create_tf_example(example, args.attribute, saved_dict,  args.image_dir)
-    if tf_example is None:
-        continue
-    if(counter < train_num):
-        writer_train.write(tf_example.SerializeToString())
-    else :
-        writer_eval.write(tf_example.SerializeToString())
-
-  writer_train.close()
-  writer_eval.close()
-
-
-  return saved_dict, num_imgs
-
-
-# Defining the main conversion function
-def create_tf_example(example, cvat_name, saved_dict, img_dir):
-  # Process one image data per run
-  height = int(example.attrib['height']) # Image height
-  width = int(example.attrib['width']) # Image width
-  filename = os.path.join(img_dir, example.attrib['name'])
-  _, ext = os.path.splitext(example.attrib['name'])
-
-  filename = filename.encode('utf8')
-  with tf.gfile.GFile(filename,'rb') as fid:
-    encoded_jpg = fid.read()
-
-  key = hashlib.sha256(encoded_jpg).hexdigest()
-
-  if ext.lower() in ['.jpg','.jpeg'] :
-    image_format = 'jpeg'.encode('utf8')
-  elif ext.lower() == '.png' :
-    image_format = 'png'.encode('utf8')
-  else:
-    print('File Format not supported, Skipping')
-    return None
-
-  xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
-  xmaxs = [] # List of normalized right x coordinates in bounding box
-             # (1 per box)
-  ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
-  ymaxs = [] # List of normalized bottom y coordinates in bounding box
-             # (1 per box)
-  classes_text = [] # List of string class name of bounding box (1 per box)
-  classes = [] # List of integer class id of bounding box (1 per box)
-
-  # Loop oer the boxes and fill the above fields
-  for box in example:
-    box_name = ''
-    if cvat_name == "" :
-        box_name = box.attrib['label']
-    else :
-        for attr in box:
-            if attr.attrib['name'] == cvat_name:
-                box_name = attr.text.lower()
-
-    # filter out non-ASCII characters
-    box_name = ''.join(filter(lambda x: x in printable, box_name))
-
-    if box_name in saved_dict.keys():
-        xmins.append(float(box.attrib['xtl']) / width)
-        xmaxs.append(float(box.attrib['xbr']) / width)
-        ymins.append(float(box.attrib['ytl']) / height)
-        ymaxs.append(float(box.attrib['ybr']) / height)
-        classes_text.append(box_name.encode('utf8'))
-        classes.append(saved_dict[box_name])
-
-  tf_example = tf.train.Example(features=tf.train.Features(feature={
-      'image/height': dataset_util.int64_feature(height),
-      'image/width': dataset_util.int64_feature(width),
-      'image/filename': dataset_util.bytes_feature(filename),
-      'image/source_id': dataset_util.bytes_feature(filename),
-      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
-      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
-      'image/format': dataset_util.bytes_feature(image_format),
-      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
-      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
-      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
-      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
-      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
-      'image/object/class/label': dataset_util.int64_list_feature(classes),
-  }))
-  return tf_example
-
-def main():
-  args = parse_args()
-  process_cvat_xml(args)
-
-if __name__== '__main__' :
-  main()
-
--- a/utils/tfrecords/requirements.txt
+++ b/utils/tfrecords/requirements.txt
@ -1,3 +0,0 @@
-argparse==1.1
-tensorflow==1.15.2
-pathlib==1.0.1
--- a/utils/voc/init.py
+++ b/utils/voc/init.py
--- a/utils/voc/converter.md
+++ b/utils/voc/converter.md
@ -1,30 +0,0 @@
-# Utility for converting CVAT XML annotation file to PASCAL VOC format
-
-## Description
-
-Given a CVAT XML and a directory with the image dataset, this script reads the CVAT XML and writes the annotations in PASCAL VOC format into a given directory. This implementation only supports bounding boxes in CVAT annotation format, and warns if it encounter any tracks or annotations that are not bounding boxes, ignoring them in both cases.
-
-## Installation
-
-Install necessary packages and create a virtual environment.
-
-```bash
-$ sudo apt-get update
-$ sudo apt-get install -y --no-install-recommends python3-pip python3-venv python3-dev
-```
-
-```
-$ python3 -m venv .env
-$ . .env/bin/activate
-$ cat requirements.txt | xargs -n 1 -L 1 pip install
-```
-
-## Usage
-
-Run the script inside the virtual environment.
-
-```bash
-$ python converter.py --cvat-xml </path/to/cvat/xml> --image-dir </path/to/images> --output-dir </path/to/output/directory>
-```
-
-Please run `python converter.py --help` for more details.
--- a/utils/voc/converter.py
+++ b/utils/voc/converter.py
@ -1,159 +0,0 @@
-#!/usr/bin/env python
-#
-# SPDX-License-Identifier: MIT
-"""
-Given a CVAT XML and a directory with the image dataset, this script reads the
-CVAT XML and writes the annotations in PASCAL VOC format into a given
-directory.
-
-This implementation supports both interpolation tracks from video and 
-annotated images.  If it encounters any tracks or annotations that are 
-not bounding boxes, it ignores them.
-"""
-
-import os
-import argparse
-import glog as log
-from lxml import etree
-from pascal_voc_writer import Writer
-
-
-def parse_args():
-    """Parse arguments of command line"""
-    parser = argparse.ArgumentParser(
-        description='Convert CVAT XML annotations to PASCAL VOC format'
-    )
-
-    parser.add_argument(
-        '--cvat-xml', metavar='FILE', required=True,
-        help='input file with CVAT annotation in xml format'
-    )
-
-    parser.add_argument(
-        '--image-dir', metavar='DIRECTORY', required=True,
-        help='directory which contains original images'
-    )
-
-    parser.add_argument(
-        '--output-dir', metavar='DIRECTORY', required=True,
-        help='directory for output annotations in PASCAL VOC format'
-    )
-
-    return parser.parse_args()
-
-
-def process_cvat_xml(xml_file, image_dir, output_dir):
-    """
-    Transforms a single XML in CVAT format to multiple PASCAL VOC format
-    XMls.
-
-    :param xml_file: CVAT format XML
-    :param image_dir: image directory of the dataset
-    :param output_dir: directory of annotations with PASCAL VOC format
-    :return:
-    """
-    KNOWN_TAGS = {'box', 'image', 'attribute'}
-    os.makedirs(output_dir, exist_ok=True)
-    cvat_xml = etree.parse(xml_file)
-
-    basename = os.path.splitext( os.path.basename( xml_file ) )[0]
-
-    tracks= cvat_xml.findall( './/track' )
-
-    if (tracks is not None) and (len(tracks) > 0):
-        frames = {}
-
-        for track in tracks:
-            trackid = int(track.get("id"))
-            label = track.get("label")
-            boxes = track.findall( './box' )
-            for box in boxes:
-                frameid  = int(box.get('frame'))
-                outside  = int(box.get('outside'))
-                #occluded = int(box.get('occluded'))  #currently unused
-                #keyframe = int(box.get('keyframe'))  #currently unused
-                xtl      = float(box.get('xtl'))
-                ytl      = float(box.get('ytl'))
-                xbr      = float(box.get('xbr'))
-                ybr      = float(box.get('ybr'))
-                
-                frame = frames.get( frameid, {} )
-                
-                if outside == 0:
-                    frame[ trackid ] = { 'xtl': xtl, 'ytl': ytl, 'xbr': xbr, 'ybr': ybr, 'label': label }
-
-                frames[ frameid ] = frame
-
-        width = int(cvat_xml.find('.//original_size/width').text)
-        height  = int(cvat_xml.find('.//original_size/height').text)
-
-        # Spit out a list of each object for each frame
-        for frameid in sorted(frames.keys()):
-            #print( frameid )
-
-            image_name = "%s_%08d.jpg" % (basename, frameid)
-            image_path = os.path.join(image_dir, image_name)
-            if not os.path.exists(image_path):
-                log.warn('{} image cannot be found. Is `{}` image directory correct?'.
-                    format(image_path, image_dir))
-            writer = Writer(image_path, width, height)
-
-            frame = frames[frameid]
-
-            objids = sorted(frame.keys())
-
-            for objid in objids:
-
-                box = frame[objid]
-
-                label = box.get('label')
-                xmin = float(box.get('xtl'))
-                ymin = float(box.get('ytl'))
-                xmax = float(box.get('xbr'))
-                ymax = float(box.get('ybr'))
-
-                writer.addObject(label, xmin, ymin, xmax, ymax)
-
-            anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml')
-            anno_dir = os.path.dirname(os.path.join(output_dir, image_name))
-            os.makedirs(anno_dir, exist_ok=True)
-            writer.save(os.path.join(anno_dir, anno_name))
-
-    else:
-        for img_tag in cvat_xml.findall('image'):
-            image_name = img_tag.get('name')
-            width = img_tag.get('width')
-            height = img_tag.get('height')
-            depth = img_tag.get('depth', 3)
-            image_path = os.path.join(image_dir, image_name)
-            if not os.path.exists(image_path):
-                log.warn('{} image cannot be found. Is `{}` image directory correct?'.
-                    format(image_path, image_dir))
-            writer = Writer(image_path, width, height, depth=depth)
-
-            unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS)
-            if unknown_tags:
-                log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags))
-
-            for box in img_tag.findall('box'):
-                label = box.get('label')
-                xmin = float(box.get('xtl'))
-                ymin = float(box.get('ytl'))
-                xmax = float(box.get('xbr'))
-                ymax = float(box.get('ybr'))
-
-                writer.addObject(label, xmin, ymin, xmax, ymax)
-
-            anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml')
-            anno_dir = os.path.dirname(os.path.join(output_dir, image_name))
-            os.makedirs(anno_dir, exist_ok=True)
-            writer.save(os.path.join(anno_dir, anno_name))
-
-
-def main():
-    args = parse_args()
-    process_cvat_xml(args.cvat_xml, args.image_dir, args.output_dir)
-
-
-if __name__ == "__main__":
-    main()
--- a/utils/voc/requirements.txt
+++ b/utils/voc/requirements.txt
@ -1,4 +0,0 @@
-argparse>=1.1
-lxml>=3.5.0
-glog>=0.3.1
-pascal-voc-writer==0.1.4
--- a/utils/voc/tests/init.py
+++ b/utils/voc/tests/init.py
--- a/utils/voc/tests/test_process_cvat_xml.py
+++ b/utils/voc/tests/test_process_cvat_xml.py
@ -1,184 +0,0 @@
-import tempfile
-import shutil
-import os
-from unittest import TestCase, mock
-from utils.voc.converter import process_cvat_xml
-
-XML_ANNOTATION_EXAMPLE = """<?xml version="1.0" encoding="utf-8"?>
-<annotations>
-  <version>1.0</version>
-  <meta>
-    <task>
-      <id>1063</id>
-      <name>My annotation task</name>
-      <size>75</size>
-      <mode>annotation</mode>
-      <overlap>0</overlap>
-      <bugtracker></bugtracker>
-      <created>2018-06-06 11:57:54.807162+03:00</created>
-      <updated>2018-06-06 12:42:29.375251+03:00</updated>
-      <labels>
-        <label>
-          <name>car</name>
-          <attributes>
-            <attribute>@select=model:a,b,c,d</attribute>
-          </attributes>
-        </label>
-      </labels>
-      <segments>
-        <segment>
-          <id>3086</id>
-          <start>0</start>
-          <stop>74</stop>
-          <url>http://cvat.examle.com:8080/?id=3086</url>
-        </segment>
-      </segments>
-      <owner>
-        <username>admin</username>
-        <email></email>
-      </owner>
-    </task>
-    <dumped>2018-06-06 15:47:04.386866+03:00</dumped>
-  </meta>
-  <image id="0" name="C15_L1_0001.jpg" width="600" height="400">
-    <box label="car" xtl="38.95" ytl="26.51" xbr="140.64" ybr="54.29" occluded="0">
-      <attribute name="parked">false</attribute>
-      <attribute name="model">a</attribute>
-    </box>
-  </image>
-  <image id="1" name="C15_L1_0002.jpg" width="600" height="400">
-    <box label="car" xtl="49.13" ytl="23.34" xbr="149.54" ybr="53.88" occluded="0">
-      <attribute name="parked">true</attribute>
-      <attribute name="model">a</attribute>
-    </box>
-  </image>
-  <image id="2" name="C15_L1_0003.jpg" width="600" height="400">
-    <box label="car" xtl="50.73" ytl="30.26" xbr="146.72" ybr="59.97" occluded="0">
-      <attribute name="parked">false</attribute>
-      <attribute name="model">b</attribute>
-    </box>
-  </image>
-  <image id="39" name="C15_L1_0040.jpg" width="600" height="400">
-    <box label="car" xtl="49.60" ytl="30.15" xbr="150.19" ybr="58.06" occluded="0">
-      <attribute name="parked">false</attribute>
-      <attribute name="model">c</attribute>
-    </box>
-    <point label="car" x="30.1" y="170.4" occluded="0">
-      <attribute name="parked">true</attribute>
-      <attribute name="model">a</attribute>
-    </point>
-  </image>
-</annotations>
-"""
-XML_INTERPOLATION_EXAMPLE = """<?xml version="1.0" encoding="utf-8"?>
-<annotations>
-  <version>1.0</version>
-  <meta>
-    <task>
-      <id>1062</id>
-      <name>My interpolation task</name>
-      <size>30084</size>
-      <mode>interpolation</mode>
-      <overlap>20</overlap>
-      <bugtracker></bugtracker>
-      <created>2018-05-31 14:13:36.483219+03:00</created>
-      <updated>2018-06-06 13:56:32.113705+03:00</updated>
-      <labels>
-        <label>
-          <name>car</name>
-          <attributes>
-            <attribute>@select=model:1,2,3,4</attribute>
-          </attributes>
-        </label>
-      </labels>
-      <segments>
-        <segment>
-          <id>3085</id>
-          <start>0</start>
-          <stop>30083</stop>
-          <url>http://cvat.example.com:8080/?id=3085</url>
-        </segment>
-      </segments>
-      <owner>
-        <username>admin</username>
-        <email></email>
-      </owner>
-      <original_size>
-         <width>1024</width>
-         <height>768</height>
-      </original_size>
-    </task>
-    <dumped>2018-06-06 15:52:11.138470+03:00</dumped>
-  </meta>
-  <track id="0" label="car">
-    <box frame="110" xtl="634.12" ytl="37.68" xbr="661.50" ybr="71.37" outside="0" occluded="1" keyframe="1">
-      <attribute name="model">1</attribute>
-    </box>
-    <box frame="111" xtl="634.21" ytl="38.50" xbr="661.59" ybr="72.19" outside="0" occluded="1" keyframe="0">
-      <attribute name="model">1</attribute>
-    </box>
-    <box frame="112" xtl="634.30" ytl="39.32" xbr="661.67" ybr="73.01" outside="1" occluded="1" keyframe="1">
-      <attribute name="model">1</attribute>
-    </box>
-  </track>
-  <track id="1" label="car">
-    <box frame="0" xtl="626.81" ytl="30.96" xbr="656.05" ybr="58.88" outside="0" occluded="0" keyframe="1">
-      <attribute name="model">3</attribute>
-    </box>
-    <box frame="1" xtl="626.63" ytl="31.56" xbr="655.87" ybr="59.48" outside="0" occluded="0" keyframe="0">
-      <attribute name="model">3</attribute>
-    </box>
-    <box frame="2" xtl="626.09" ytl="33.38" xbr="655.33" ybr="61.29" outside="1" occluded="0" keyframe="1">
-      <attribute name="model">3</attribute>
-    </box>
-  </track>
-</annotations>
-"""
-
-
-class TestProcessCvatXml(TestCase):
-    def setUp(self):
-        self.test_dir = tempfile.mkdtemp()
-
-    def tearDown(self):
-        shutil.rmtree(self.test_dir)
-
-    @mock.patch('utils.voc.converter.log')
-    def test_parse_annotation_xml(self, mock_log):
-        xml_filename = os.path.join(self.test_dir, 'annotations.xml')
-        with open(xml_filename, mode='x') as file:
-            file.write(XML_ANNOTATION_EXAMPLE)
-
-        voc_dir = os.path.join(self.test_dir, 'voc_dir')
-
-        images = ['C15_L1_0001', 'C15_L1_0002', 'C15_L1_0003', 'C15_L1_0040']
-        expected_xmls = [os.path.join(voc_dir, x + '.xml')
-                         for x in images]
-        process_cvat_xml(xml_filename, 'img_dir', voc_dir)
-        for exp in expected_xmls:
-            self.assertTrue(os.path.exists(exp))
-            # We should add in some code to parse the resulting xml files
-
-    @mock.patch('utils.voc.converter.log')
-    def test_parse_interpolation_xml(self, mock_log):
-        xml_filename = os.path.join(self.test_dir, 'interpolations.xml')
-        with open(xml_filename, mode='x') as file:
-            file.write(XML_INTERPOLATION_EXAMPLE)
-
-        voc_dir = os.path.join(self.test_dir, 'voc_dir')
-
-
-        frames = [0, 1, 2, 110, 111, 112 ]
-        expected_xmls = [os.path.join(voc_dir, 'interpolations_%08d.xml' % x )
-                         for x in frames]
-
-        process_cvat_xml(xml_filename, 'img_dir', voc_dir)
-
-        self.assertTrue(os.path.exists(voc_dir))
-        self.assertTrue(len(os.listdir(voc_dir)) == len(frames))
-        for exp in expected_xmls:
-            self.assertTrue(os.path.exists(exp))
-            # We should add in some code to parse the resulting xml files
-
-
-
--- a/utils/yolo/init.py
+++ b/utils/yolo/init.py
--- a/utils/yolo/converter.md
+++ b/utils/yolo/converter.md
@ -1,38 +0,0 @@
-# Utility for converting CVAT XML annotation file to YOLO format
-
-## Description
-
-Given a CVAT XML, this script reads the CVAT XML and writes the 
-annotations in YOLO format into a given directory. This implementation
-supports both interpolation tracks from video and annotated images.
-
-## Installation
-
-Install necessary packages and create a virtual environment.
-
-```bash
-sudo apt-get update
-sudo apt-get install -y --no-install-recommends python3-pip python3-venv python3-dev
-```
-
-```bash
-python3 -m venv .env
-. .env/bin/activate
-cat requirements.txt | xargs -n 1 -L 1 pip install
-```
-
-## Usage
-
-Run the script inside the virtual environment:
-
-```bash
-python converter.py --cvat-xml </path/to/cvat/xml> --image-dir </path/to/images> --output-dir </path/to/output/directory>
-```
-
-Case you need download frames from annotated video file submited to CVAT:
-
-```bash
-python converter.py --cvat-xml </path/to/cvat/xml> --output-dir </path/to/output/directory> --username <CVAT Username> --password <CVAT Password>
-```
-
-Please run `python converter.py --help` for more details.
--- a/utils/yolo/converter.py
+++ b/utils/yolo/converter.py
@ -1,261 +0,0 @@
-#!/usr/bin/env python
-#
-# SPDX-License-Identifier: MIT
-"""
-Given a CVAT XML, this script reads the CVAT XML and writes the
-annotations in YOLO format into a given directory.
-
-This implementation supports both interpolation tracks from video and
-annotated images.
-"""
-
-import os
-import argparse
-import glog as log
-from lxml import etree
-import requests
-
-
-def parse_args():
-    """Parse arguments of command line"""
-    parser = argparse.ArgumentParser(
-        description='Convert CVAT XML annotations to YOLO format'
-    )
-
-    parser.add_argument(
-        '--cvat-xml', metavar='FILE', required=True,
-        help='input file with CVAT annotation in xml format'
-    )
-
-    parser.add_argument(
-        '--image-dir', metavar='DIRECTORY', required=False,
-        help='directory which contains original images'
-    )
-
-    parser.add_argument(
-        '--output-dir', metavar='DIRECTORY', required=True,
-        help='directory for output annotations in YOLO format'
-    )
-
-    parser.add_argument(
-        '--username', metavar='USERNAME', required=False,
-        help='Username from CVAT Login page, required to download images'
-    )
-
-    parser.add_argument(
-        '--password', metavar='PASSWORD', required=False,
-        help='Password from CVAT Login page, required to download images'
-    )
-
-    parser.add_argument(
-        '--labels', metavar='ILABELS', required=False,
-        help='Labels (separated by comma) to extract. Example: car,truck,motorcycle'
-    )
-
-    return parser.parse_args()
-
-
-def process_cvat_xml(xml_file, image_dir, output_dir,username,password,ilabels):
-    """
-    Transforms a single XML in CVAT format to YOLO TXT files and download images when not in IMAGE_DIR
-
-    :param xml_file: CVAT format XML
-    :param image_dir: image directory of the dataset
-    :param output_dir: directory of annotations with YOLO format
-    :param username: Username used to login CVAT. Required to download images
-    :param password: Password used to login CVAT. Required to download images
-    :param ilabels: Comma separated ordered labels
-    :return:
-    """
-    KNOWN_TAGS = {'box', 'image', 'attribute'}
-
-    if (image_dir is None):
-        image_dir=os.path.join(output_dir,"data/obj")
-        os.makedirs(image_dir, exist_ok=True)
-
-    os.makedirs(output_dir, exist_ok=True)
-    cvat_xml = etree.parse(xml_file)
-    basename = os.path.splitext( os.path.basename( xml_file ) )[0]
-    current_labels = {}
-    traintxt = ""
-
-    if (ilabels is not None):
-        vlabels=ilabels.split(',')
-        current_labels = {label: idx for idx, label in enumerate(vlabels)}
-    else:
-        current_labels = {label.text: idx for idx, label in enumerate(cvat_xml.findall('meta/task/labels/label/name'))}
-
-    tracks= cvat_xml.findall( './/track' )
-
-    if (tracks is not None) and (len(tracks) > 0):
-        frames = {}
-
-        for track in tracks:
-            trackid = int(track.get("id"))
-            label = track.get("label")
-            boxes = track.findall( './box' )
-            for box in boxes:
-                frameid  = int(box.get('frame'))
-                outside  = int(box.get('outside'))
-                #occluded = int(box.get('occluded'))  #currently unused
-                #keyframe = int(box.get('keyframe'))  #currently unused
-                xtl      = float(box.get('xtl'))
-                ytl      = float(box.get('ytl'))
-                xbr      = float(box.get('xbr'))
-                ybr      = float(box.get('ybr'))
-
-                frame = frames.get( frameid, {} )
-
-                if outside == 0:
-                    frame[ trackid ] = { 'xtl': xtl, 'ytl': ytl, 'xbr': xbr, 'ybr': ybr, 'label': label }
-
-                frames[ frameid ] = frame
-
-        width = int(cvat_xml.find('.//original_size/width').text)
-        height  = int(cvat_xml.find('.//original_size/height').text)
-
-        taskid = int(cvat_xml.find('.//task/id').text)
-
-        urlsegment = cvat_xml.find(".//segments/segment/url").text
-        urlbase = urlsegment.split("?")[0]
-
-        httpclient = requests.session()
-        httpclient.get(urlbase)
-
-        csrftoken = "none"
-        sessionid = "none"
-
-        # Spit out a list of each object for each frame
-        for frameid in sorted(frames.keys()):
-            image_name = "%s_%08d.jpg" % (basename, frameid)
-            image_path = os.path.join(image_dir, image_name)
-            if not os.path.exists(image_path):
-                if username is None:
-                    log.warn('{} image cannot be found. Is `{}` image directory correct?\n'.format(image_path, image_dir))
-                else:
-                    log.info('{} image cannot be found. Downloading from task ID {}\n'.format(image_path, taskid))
-
-                    if sessionid == "none":
-                        if "csrftoken" in httpclient.cookies:
-                            csrftoken = httpclient.cookies["csrftoken"]
-                        elif "csrf" in httpclient.cookies:
-                            csrftoken = httpclient.cookies["csrf"]
-
-                        login_data = dict(username=username, password=password,
-                                        csrfmiddlewaretoken=csrftoken, next='/dashboard')
-
-                        urllogin = urlbase+"/auth/login"
-                        httpclient.post(urllogin, data=login_data,
-                                        headers=dict(Referer=urllogin))
-
-                        if ("sessionid" in httpclient.cookies):
-                            sessionid = httpclient.cookies["sessionid"]
-
-                    url = urlbase+"/api/v1/tasks/"+str(taskid)+"/frames/"+ str(frameid)
-
-                    req = httpclient.get(url, headers=dict(
-                        csrftoken=csrftoken, sessionid=sessionid))
-
-                    with open(image_path, 'wb') as fo:
-                        fo.write(req.content)
-                        print('Url saved as %s\n' % image_path)
-
-
-            frame = frames[frameid]
-
-            _yoloAnnotationContent=""
-
-            objids = sorted(frame.keys())
-
-            for objid in objids:
-
-                box = frame[objid]
-
-                label = box.get('label')
-                xmin = float(box.get('xtl'))
-                ymin = float(box.get('ytl'))
-                xmax = float(box.get('xbr'))
-                ymax = float(box.get('ybr'))
-
-                if not label in current_labels:
-                    raise Exception('Unexpected label name {}'.format(label))
-
-                labelid=current_labels[label]
-                yolo_x= (xmin + ((xmax-xmin)/2))/width
-                yolo_y= (ymin + ((ymax-ymin)/2))/height
-                yolo_w = (xmax - xmin) / width
-                yolo_h = (ymax - ymin) / height
-
-                if len(_yoloAnnotationContent) != 0:
-                        _yoloAnnotationContent += "\n"
-
-                _yoloAnnotationContent+=str(labelid)+" "+"{:.6f}".format(yolo_x) +" "+"{:.6f}".format(yolo_y) +" "+"{:.6f}".format(yolo_w) +" "+"{:.6f}".format(yolo_h)
-            anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.txt')
-            anno_path = os.path.join(image_dir, anno_name)
-
-            _yoloFile = open(anno_path, "w", newline="\n")
-            _yoloFile.write(_yoloAnnotationContent)
-            _yoloFile.close()
-
-            if len(traintxt)!=0:
-                traintxt+="\n"
-
-            traintxt+=image_path
-
-    else:
-        for img_tag in cvat_xml.findall('image'):
-            image_name = img_tag.get('name')
-            width = int(img_tag.get('width'))
-            height = int(img_tag.get('height'))
-            image_path = os.path.join(image_dir, image_name)
-            if not os.path.exists(image_path):
-                log.warn('{} image cannot be found. Is `{}` image directory correct?'.
-                    format(image_path, image_dir))
-
-            unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS)
-            if unknown_tags:
-                log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags))
-
-            _yoloAnnotationContent = ""
-
-            for box in img_tag.findall('box'):
-                label = box.get('label')
-                xmin = float(box.get('xtl'))
-                ymin = float(box.get('ytl'))
-                xmax = float(box.get('xbr'))
-                ymax = float(box.get('ybr'))
-
-                if not label in current_labels:
-                    raise Exception('Unexpected label name {}'.format(label))
-
-                labelid = current_labels[label]
-                yolo_x = (xmin + ((xmax-xmin)/2))/width
-                yolo_y = (ymin + ((ymax-ymin)/2))/height
-                yolo_w = (xmax - xmin) / width
-                yolo_h = (ymax - ymin) / height
-
-                if len(_yoloAnnotationContent) != 0:
-                        _yoloAnnotationContent += "\n"
-
-                _yoloAnnotationContent += str(labelid)+" "+"{:.6f}".format(yolo_x) + " "+"{:.6f}".format(
-                    yolo_y) + " "+"{:.6f}".format(yolo_w) + " "+"{:.6f}".format(yolo_h)
-
-            anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.txt')
-            anno_path = os.path.join(image_dir, anno_name)
-
-            _yoloFile = open(anno_path, "w", newline="\n")
-            _yoloFile.write(_yoloAnnotationContent)
-            _yoloFile.close()
-
-    traintxt_file=open(output_dir+"/train.txt","w",newline="\n")
-    traintxt_file.write(traintxt)
-    traintxt_file.close()
-
-
-def main():
-    args = parse_args()
-    process_cvat_xml(args.cvat_xml, args.image_dir, args.output_dir, args.username,args.password,args.labels)
-
-
-if __name__ == "__main__":
-    main()
--- a/utils/yolo/requirements.txt
+++ b/utils/yolo/requirements.txt
@ -1,4 +0,0 @@
-argparse>=1.1
-lxml>=3.5.0
-glog>=0.3.1
-requests==2.22.0