You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

212 lines
7.6 KiB
Python

import argparse
import cv2
import glog
import json
import numpy as np
import os
from tqdm import tqdm
from pycocotools import coco as coco_loader
def parse_args():
"""Parse arguments of command line"""
parser = argparse.ArgumentParser(
description='Merge annotations in COCO representation into one'
)
parser.add_argument(
'--input-dir', required=True,
help='directory with input annotations in *.json format'
)
parser.add_argument(
'--output', required=True,
help='output annotation file'
)
parser.add_argument(
'--images-map', required=True,
help='file with map of datasets and its images path (json format)'
)
parser.add_argument(
'--draw', default=None,
help='directory to save images with its segments. By default is disabled'
)
return parser.parse_args()
def draw_bboxes_and_masks(img, annotations, input_dir):
""" Draw bounding boxes and contours of masks on image and save it.
:param img: file name of image (is getting from the same field in annotation)
:param annotations: list of bonding boxes and segments on the image
:param input_dir: base directory to save images
"""
input_file = os.path.join(input_dir, img['file_name'])
save_path = os.path.join(os.path.dirname(input_file), 'draw')
if not os.path.exists(save_path):
os.makedirs(save_path)
output_file = os.path.join(save_path, os.path.basename(input_file))
img = cv2.imread(input_file)
yellow = (0, 255, 255)
red = (0, 0, 255)
for ann in annotations:
cat_id = str(ann['category_id'])
bbox = [int(ann['bbox'][0]), int(ann['bbox'][1]),
int(ann['bbox'][0] + ann['bbox'][2]), int(ann['bbox'][1] + ann['bbox'][3])]
masks = ann['segmentation']
for mask in masks:
i = 0
points = []
while i < len(mask):
x = int(mask[i])
y = int(mask[i + 1])
points.append([x, y])
i += 2
img = cv2.polylines(img, np.int32([points]), True, yellow, 1)
img = cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), red, 1)
x = bbox[0] + (bbox[2] - bbox[0]) // 4
y = bbox[1] + (bbox[3] - bbox[1]) // 2
cv2.putText(img, cat_id, (x, y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, red, 1)
cv2.imwrite(output_file, img)
def is_json_file(filename):
""" Check if file has a *.json type (just check an extension)
:param filename: name of file
:return: True if file has a *.json type
"""
return True if filename.lower().endswith('.json') else False
def get_anno_list(directory):
""" Get list of files in directory
:param directory: directory to parse
:return: list of files in the directory in format [name1.ext, name2.ext, ...]
"""
files = []
for file in os.listdir(directory):
if is_json_file(file):
files.append(file)
return files
def pretty_string(name_list):
""" Make a string from list of some names
:param name_list: list of names [name#0, name#1, ...]
:return: string in format:
-name#0
-name#1
"""
output_string = ''
for s in name_list:
output_string += '\n -' + s
return output_string
def common_path_images(images_map):
""" Define which part of paths to images is common for all of them
:param images_map: dictionary of matched datasets and its images paths. Format:
{
'dataset1.json': '/path/to/images/for/dataset1',
'dataset2.json': '/path/to/images/for/dataset2',
...
}
:return: string with a common part of the images paths
"""
paths = [path for _, path in images_map.items()]
return os.path.commonpath(paths)
def merge_annotations(directory, anno_list, images_map):
""" Merge several annotations in COCO representation into one
:param directory: base directory where is saved all datasets which is needed to merge
:param anno_list: list of annotations to merge. [dataset1.json, dataset2.json, ...]
:param images_map: dictionary of matched datasets and its images paths
:return: merged annotation, list of used annotations and list of skipped annotations
"""
merged_anno = None
first_step = True
reference_classes = None
common_path = common_path_images(images_map)
valid_annos = []
skipped_annos = []
for anno_file in tqdm(anno_list, 'Parsing annotations...'):
if anno_file not in images_map:
glog.warning('Dataset <{}> is absent in \'images-map\' file and will be ignored!'.format(anno_file))
skipped_annos.append(anno_file)
continue
img_prefix = images_map[anno_file].replace(common_path, '')
if img_prefix[0] == '/':
img_prefix = img_prefix.replace('/', '', 1)
with open(os.path.join(directory, anno_file)) as f:
data = json.load(f)
for img in data['images']:
img['file_name'] = os.path.join(img_prefix, img['file_name'])
if first_step:
merged_anno = data
reference_classes = data['categories']
first_step = False
else:
classes = data['categories']
if classes != reference_classes:
glog.warning('Categories field in dataset <{}> has another classes and will be ignored!'
.format(anno_file))
skipped_annos.append(anno_file)
continue
add_img_id = len(merged_anno['images'])
add_obj_id = len(merged_anno['annotations'])
for img in data['images']:
img['id'] += add_img_id
for ann in data['annotations']:
ann['id'] += add_obj_id
ann['image_id'] += add_img_id
merged_anno['images'].extend(data['images'])
merged_anno['annotations'].extend(data['annotations'])
valid_annos.append(anno_file)
return merged_anno, valid_annos, skipped_annos
def main():
args = parse_args()
anno_list = get_anno_list(args.input_dir)
with open(args.images_map) as f:
images_map = json.load(f)
result_annotation, valid_annos, skipped_annos = merge_annotations(args.input_dir, anno_list, images_map)
assert len(valid_annos) > 0, 'The result annotation is empty! Please check parameters and your \'images_map\' file.'
# Save created annotation
glog.info('Saving annotation...')
with open(args.output, 'w') as outfile:
json.dump(result_annotation, outfile)
glog.info('Annotation was saved in <{}> successfully'.format(args.output))
# Try to load created annotation via cocoapi
try:
glog.info('Trying to load annotation <{}> via cocoapi...'.format(args.output))
coco_loader.COCO(args.output)
except:
raise
else:
glog.info('Annotation in COCO representation <{}> successfully created from: {}'
.format(args.output, pretty_string(valid_annos)))
if len(skipped_annos) > 0:
glog.info('The next annotations were skipped: {}'.format(pretty_string(skipped_annos)))
if args.draw:
for img in tqdm(result_annotation['images'], 'Drawing and saving images...'):
ann_for_img = []
for ann in result_annotation['annotations']:
if ann['image_id'] == img['id']:
ann_for_img.append(ann)
draw_bboxes_and_masks(img, ann_for_img, args.draw)
if __name__ == "__main__":
main()