[Datumaro] Add merge command with segment intersection (#1695)
* Add multi source merge * update changelog * cli update * linter * fixes and tests * fix test * fix test * relax type requirements in annotations * fix polylines * Make groups more stable * Add group checks * add group check testmain
parent
90cc36eb1c
commit
17a5554cd5
@ -0,0 +1,124 @@
|
|||||||
|
|
||||||
|
# Copyright (C) 2020 Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging as log
|
||||||
|
import os.path as osp
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
from datumaro.components.project import Project
|
||||||
|
from datumaro.components.operations import (IntersectMerge,
|
||||||
|
QualityError, MergeError)
|
||||||
|
|
||||||
|
from ..util import at_least, MultilineFormatter, CliException
|
||||||
|
from ..util.project import generate_next_file_name, load_project
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser(parser_ctor=argparse.ArgumentParser):
|
||||||
|
parser = parser_ctor(help="Merge few projects",
|
||||||
|
description="""
|
||||||
|
Merges multiple datasets into one. This can be useful if you
|
||||||
|
have few annotations and wish to merge them,
|
||||||
|
taking into consideration potential overlaps and conflicts.
|
||||||
|
This command can try to find a common ground by voting or
|
||||||
|
return a list of conflicts.|n
|
||||||
|
|n
|
||||||
|
Examples:|n
|
||||||
|
- Merge annotations from 3 (or more) annotators:|n
|
||||||
|
|s|smerge project1/ project2/ project3/|n
|
||||||
|
- Check groups of the merged dataset for consistence:|n
|
||||||
|
|s|s|slook for groups consising of 'person', 'hand' 'head', 'foot'|n
|
||||||
|
|s|smerge project1/ project2/ -g 'person,hand?,head,foot?'
|
||||||
|
""",
|
||||||
|
formatter_class=MultilineFormatter)
|
||||||
|
|
||||||
|
def _group(s):
|
||||||
|
return s.split(',')
|
||||||
|
|
||||||
|
parser.add_argument('project', nargs='+', action=at_least(2),
|
||||||
|
help="Path to a project (repeatable)")
|
||||||
|
parser.add_argument('-iou', '--iou-thresh', default=0.25, type=float,
|
||||||
|
help="IoU match threshold for segments (default: %(default)s)")
|
||||||
|
parser.add_argument('-oconf', '--output-conf-thresh',
|
||||||
|
default=0.0, type=float,
|
||||||
|
help="Confidence threshold for output "
|
||||||
|
"annotations (default: %(default)s)")
|
||||||
|
parser.add_argument('--quorum', default=0, type=int,
|
||||||
|
help="Minimum count for a label and attribute voting "
|
||||||
|
"results to be counted (default: %(default)s)")
|
||||||
|
parser.add_argument('-g', '--groups', action='append', type=_group,
|
||||||
|
default=[],
|
||||||
|
help="A comma-separated list of labels in "
|
||||||
|
"annotation groups to check. '?' postfix can be added to a label to"
|
||||||
|
"make it optional in the group (repeatable)")
|
||||||
|
parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
|
||||||
|
help="Output directory (default: current project's dir)")
|
||||||
|
parser.add_argument('--overwrite', action='store_true',
|
||||||
|
help="Overwrite existing files in the save directory")
|
||||||
|
parser.set_defaults(command=merge_command)
|
||||||
|
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def merge_command(args):
|
||||||
|
source_projects = [load_project(p) for p in args.project]
|
||||||
|
|
||||||
|
dst_dir = args.dst_dir
|
||||||
|
if dst_dir:
|
||||||
|
if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
|
||||||
|
raise CliException("Directory '%s' already exists "
|
||||||
|
"(pass --overwrite to overwrite)" % dst_dir)
|
||||||
|
else:
|
||||||
|
dst_dir = generate_next_file_name('merged')
|
||||||
|
|
||||||
|
source_datasets = []
|
||||||
|
for p in source_projects:
|
||||||
|
log.debug("Loading project '%s' dataset", p.config.project_name)
|
||||||
|
source_datasets.append(p.make_dataset())
|
||||||
|
|
||||||
|
merger = IntersectMerge(conf=IntersectMerge.Conf(
|
||||||
|
pairwise_dist=args.iou_thresh, groups=args.groups,
|
||||||
|
output_conf_thresh=args.output_conf_thresh, quorum=args.quorum
|
||||||
|
))
|
||||||
|
merged_dataset = merger(source_datasets)
|
||||||
|
|
||||||
|
merged_project = Project()
|
||||||
|
output_dataset = merged_project.make_dataset()
|
||||||
|
output_dataset.define_categories(merged_dataset.categories())
|
||||||
|
merged_dataset = output_dataset.update(merged_dataset)
|
||||||
|
merged_dataset.save(save_dir=dst_dir)
|
||||||
|
|
||||||
|
report_path = osp.join(dst_dir, 'merge_report.json')
|
||||||
|
save_merge_report(merger, report_path)
|
||||||
|
|
||||||
|
dst_dir = osp.abspath(dst_dir)
|
||||||
|
log.info("Merge results have been saved to '%s'" % dst_dir)
|
||||||
|
log.info("Report has been saved to '%s'" % report_path)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def save_merge_report(merger, path):
|
||||||
|
item_errors = OrderedDict()
|
||||||
|
source_errors = OrderedDict()
|
||||||
|
all_errors = []
|
||||||
|
|
||||||
|
for e in merger.errors:
|
||||||
|
if isinstance(e, QualityError):
|
||||||
|
item_errors[str(e.item_id)] = item_errors.get(str(e.item_id), 0) + 1
|
||||||
|
elif isinstance(e, MergeError):
|
||||||
|
for s in e.sources:
|
||||||
|
source_errors[s] = source_errors.get(s, 0) + 1
|
||||||
|
item_errors[str(e.item_id)] = item_errors.get(str(e.item_id), 0) + 1
|
||||||
|
|
||||||
|
all_errors.append(str(e))
|
||||||
|
|
||||||
|
errors = OrderedDict([
|
||||||
|
('Item errors', item_errors),
|
||||||
|
('Source errors', source_errors),
|
||||||
|
('All errors', all_errors),
|
||||||
|
])
|
||||||
|
|
||||||
|
with open(path, 'w') as f:
|
||||||
|
json.dump(errors, f, indent=4)
|
||||||
@ -1,34 +0,0 @@
|
|||||||
|
|
||||||
# Copyright (C) 2020 Intel Corporation
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: MIT
|
|
||||||
|
|
||||||
from itertools import groupby
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
|
|
||||||
def find_instances(instance_anns):
|
|
||||||
instance_anns = sorted(instance_anns, key=lambda a: a.group)
|
|
||||||
ann_groups = []
|
|
||||||
for g_id, group in groupby(instance_anns, lambda a: a.group):
|
|
||||||
if not g_id:
|
|
||||||
ann_groups.extend(([a] for a in group))
|
|
||||||
else:
|
|
||||||
ann_groups.append(list(group))
|
|
||||||
|
|
||||||
return ann_groups
|
|
||||||
|
|
||||||
def find_group_leader(group):
|
|
||||||
return max(group, key=lambda x: x.get_area())
|
|
||||||
|
|
||||||
def compute_bbox(annotations):
|
|
||||||
boxes = [ann.get_bbox() for ann in annotations]
|
|
||||||
x0 = min((b[0] for b in boxes), default=0)
|
|
||||||
y0 = min((b[1] for b in boxes), default=0)
|
|
||||||
x1 = max((b[0] + b[2] for b in boxes), default=0)
|
|
||||||
y1 = max((b[1] + b[3] for b in boxes), default=0)
|
|
||||||
return [x0, y0, x1 - x0, y1 - y0]
|
|
||||||
|
|
||||||
def softmax(x):
|
|
||||||
return np.exp(x) / sum(np.exp(x))
|
|
||||||
@ -0,0 +1,213 @@
|
|||||||
|
|
||||||
|
# Copyright (C) 2020 Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
from itertools import groupby
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from datumaro.components.extractor import _Shape, Mask, AnnotationType, RleMask
|
||||||
|
from datumaro.util.mask_tools import mask_to_rle
|
||||||
|
|
||||||
|
|
||||||
|
def find_instances(instance_anns):
|
||||||
|
instance_anns = sorted(instance_anns, key=lambda a: a.group)
|
||||||
|
ann_groups = []
|
||||||
|
for g_id, group in groupby(instance_anns, lambda a: a.group):
|
||||||
|
if not g_id:
|
||||||
|
ann_groups.extend(([a] for a in group))
|
||||||
|
else:
|
||||||
|
ann_groups.append(list(group))
|
||||||
|
|
||||||
|
return ann_groups
|
||||||
|
|
||||||
|
def find_group_leader(group):
|
||||||
|
return max(group, key=lambda x: x.get_area())
|
||||||
|
|
||||||
|
def _get_bbox(ann):
|
||||||
|
if isinstance(ann, (_Shape, Mask)):
|
||||||
|
return ann.get_bbox()
|
||||||
|
else:
|
||||||
|
return ann
|
||||||
|
|
||||||
|
def max_bbox(annotations):
|
||||||
|
boxes = [_get_bbox(ann) for ann in annotations]
|
||||||
|
x0 = min((b[0] for b in boxes), default=0)
|
||||||
|
y0 = min((b[1] for b in boxes), default=0)
|
||||||
|
x1 = max((b[0] + b[2] for b in boxes), default=0)
|
||||||
|
y1 = max((b[1] + b[3] for b in boxes), default=0)
|
||||||
|
return [x0, y0, x1 - x0, y1 - y0]
|
||||||
|
|
||||||
|
def mean_bbox(annotations):
|
||||||
|
le = len(annotations)
|
||||||
|
boxes = [_get_bbox(ann) for ann in annotations]
|
||||||
|
mlb = sum(b[0] for b in boxes) / le
|
||||||
|
mtb = sum(b[1] for b in boxes) / le
|
||||||
|
mrb = sum(b[0] + b[2] for b in boxes) / le
|
||||||
|
mbb = sum(b[1] + b[3] for b in boxes) / le
|
||||||
|
return [mlb, mtb, mrb - mlb, mbb - mtb]
|
||||||
|
|
||||||
|
def softmax(x):
|
||||||
|
return np.exp(x) / sum(np.exp(x))
|
||||||
|
|
||||||
|
def nms(segments, iou_thresh=0.5):
|
||||||
|
"""
|
||||||
|
Non-maxima suppression algorithm.
|
||||||
|
"""
|
||||||
|
|
||||||
|
indices = np.argsort([b.attributes['score'] for b in segments])
|
||||||
|
ious = np.array([[iou(a, b) for b in segments] for a in segments])
|
||||||
|
|
||||||
|
predictions = []
|
||||||
|
while len(indices) != 0:
|
||||||
|
i = len(indices) - 1
|
||||||
|
pred_idx = indices[i]
|
||||||
|
to_remove = [i]
|
||||||
|
predictions.append(segments[pred_idx])
|
||||||
|
for i, box_idx in enumerate(indices[:i]):
|
||||||
|
if iou_thresh < ious[pred_idx, box_idx]:
|
||||||
|
to_remove.append(i)
|
||||||
|
indices = np.delete(indices, to_remove)
|
||||||
|
|
||||||
|
return predictions
|
||||||
|
|
||||||
|
def bbox_iou(a, b):
|
||||||
|
"""
|
||||||
|
IoU computations for simple cases with bounding boxes
|
||||||
|
"""
|
||||||
|
bbox_a = _get_bbox(a)
|
||||||
|
bbox_b = _get_bbox(b)
|
||||||
|
|
||||||
|
aX, aY, aW, aH = bbox_a
|
||||||
|
bX, bY, bW, bH = bbox_b
|
||||||
|
in_right = min(aX + aW, bX + bW)
|
||||||
|
in_left = max(aX, bX)
|
||||||
|
in_top = max(aY, bY)
|
||||||
|
in_bottom = min(aY + aH, bY + bH)
|
||||||
|
|
||||||
|
in_w = max(0, in_right - in_left)
|
||||||
|
in_h = max(0, in_bottom - in_top)
|
||||||
|
intersection = in_w * in_h
|
||||||
|
if not intersection:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
a_area = aW * aH
|
||||||
|
b_area = bW * bH
|
||||||
|
union = a_area + b_area - intersection
|
||||||
|
return intersection / union
|
||||||
|
|
||||||
|
def segment_iou(a, b):
|
||||||
|
"""
|
||||||
|
Generic IoU computation with masks, polygons, and boxes.
|
||||||
|
Returns -1 if no intersection, [0; 1] otherwise
|
||||||
|
"""
|
||||||
|
from pycocotools import mask as mask_utils
|
||||||
|
|
||||||
|
a_bbox = a.get_bbox()
|
||||||
|
b_bbox = b.get_bbox()
|
||||||
|
|
||||||
|
is_bbox = AnnotationType.bbox in [a.type, b.type]
|
||||||
|
if is_bbox:
|
||||||
|
a = [a_bbox]
|
||||||
|
b = [b_bbox]
|
||||||
|
else:
|
||||||
|
w = max(a_bbox[0] + a_bbox[2], b_bbox[0] + b_bbox[2])
|
||||||
|
h = max(a_bbox[1] + a_bbox[3], b_bbox[1] + b_bbox[3])
|
||||||
|
|
||||||
|
def _to_rle(ann):
|
||||||
|
if ann.type == AnnotationType.polygon:
|
||||||
|
return mask_utils.frPyObjects([ann.points], h, w)
|
||||||
|
elif isinstance(ann, RleMask):
|
||||||
|
return [ann._rle]
|
||||||
|
elif ann.type == AnnotationType.mask:
|
||||||
|
return mask_utils.frPyObjects([mask_to_rle(ann.image)], h, w)
|
||||||
|
else:
|
||||||
|
raise TypeError("Unexpected arguments: %s, %s" % (a, b))
|
||||||
|
a = _to_rle(a)
|
||||||
|
b = _to_rle(b)
|
||||||
|
return float(mask_utils.iou(a, b, [not is_bbox]))
|
||||||
|
|
||||||
|
def PDJ(a, b, eps=None, ratio=0.05, bbox=None):
|
||||||
|
"""
|
||||||
|
Percentage of Detected Joints metric.
|
||||||
|
Counts the number of matching points.
|
||||||
|
"""
|
||||||
|
|
||||||
|
assert eps is not None or ratio is not None
|
||||||
|
|
||||||
|
p1 = np.array(a.points).reshape((-1, 2))
|
||||||
|
p2 = np.array(b.points).reshape((-1, 2))
|
||||||
|
if len(p1) != len(p2):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if not eps:
|
||||||
|
if bbox is None:
|
||||||
|
bbox = mean_bbox([a, b])
|
||||||
|
|
||||||
|
diag = (bbox[2] ** 2 + bbox[3] ** 2) ** 0.5
|
||||||
|
eps = ratio * diag
|
||||||
|
|
||||||
|
dists = np.linalg.norm(p1 - p2, axis=1)
|
||||||
|
return np.sum(dists < eps) / len(p1)
|
||||||
|
|
||||||
|
def OKS(a, b, sigma=None, bbox=None, scale=None):
|
||||||
|
"""
|
||||||
|
Object Keypoint Similarity metric.
|
||||||
|
https://cocodataset.org/#keypoints-eval
|
||||||
|
"""
|
||||||
|
|
||||||
|
p1 = np.array(a.points).reshape((-1, 2))
|
||||||
|
p2 = np.array(b.points).reshape((-1, 2))
|
||||||
|
if len(p1) != len(p2):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if not sigma:
|
||||||
|
sigma = 0.1
|
||||||
|
else:
|
||||||
|
assert len(sigma) == len(p1)
|
||||||
|
|
||||||
|
if not scale:
|
||||||
|
if bbox is None:
|
||||||
|
bbox = mean_bbox([a, b])
|
||||||
|
scale = bbox[2] * bbox[3]
|
||||||
|
|
||||||
|
dists = np.linalg.norm(p1 - p2, axis=1)
|
||||||
|
return np.sum(np.exp(-(dists ** 2) / (2 * scale * (2 * sigma) ** 2)))
|
||||||
|
|
||||||
|
def smooth_line(points, segments):
|
||||||
|
assert 2 <= len(points) // 2 and len(points) % 2 == 0
|
||||||
|
|
||||||
|
if len(points) // 2 == segments:
|
||||||
|
return points
|
||||||
|
|
||||||
|
points = list(points)
|
||||||
|
if len(points) == 2:
|
||||||
|
points.extend(points)
|
||||||
|
points = np.array(points).reshape((-1, 2))
|
||||||
|
|
||||||
|
lengths = np.linalg.norm(points[1:] - points[:-1], axis=1)
|
||||||
|
dists = [0]
|
||||||
|
for l in lengths:
|
||||||
|
dists.append(dists[-1] + l)
|
||||||
|
|
||||||
|
step = dists[-1] / segments
|
||||||
|
|
||||||
|
new_points = np.zeros((segments + 1, 2))
|
||||||
|
new_points[0] = points[0]
|
||||||
|
|
||||||
|
old_segment = 0
|
||||||
|
for new_segment in range(1, segments + 1):
|
||||||
|
pos = new_segment * step
|
||||||
|
while dists[old_segment + 1] < pos and old_segment + 2 < len(dists):
|
||||||
|
old_segment += 1
|
||||||
|
|
||||||
|
segment_start = dists[old_segment]
|
||||||
|
segment_len = lengths[old_segment]
|
||||||
|
prev_p = points[old_segment]
|
||||||
|
next_p = points[old_segment + 1]
|
||||||
|
r = (pos - segment_start) / segment_len
|
||||||
|
|
||||||
|
new_points[new_segment] = prev_p * (1 - r) + next_p * r
|
||||||
|
|
||||||
|
return new_points, step
|
||||||
Loading…
Reference in New Issue