[Datumaro] Add merge command with segment intersection (#1695)
* Add multi source merge * update changelog * cli update * linter * fixes and tests * fix test * fix test * relax type requirements in annotations * fix polylines * Make groups more stable * Add group checks * add group check testmain
parent
90cc36eb1c
commit
17a5554cd5
@ -0,0 +1,124 @@
|
||||
|
||||
# Copyright (C) 2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging as log
|
||||
import os.path as osp
|
||||
from collections import OrderedDict
|
||||
|
||||
from datumaro.components.project import Project
|
||||
from datumaro.components.operations import (IntersectMerge,
|
||||
QualityError, MergeError)
|
||||
|
||||
from ..util import at_least, MultilineFormatter, CliException
|
||||
from ..util.project import generate_next_file_name, load_project
|
||||
|
||||
|
||||
def build_parser(parser_ctor=argparse.ArgumentParser):
|
||||
parser = parser_ctor(help="Merge few projects",
|
||||
description="""
|
||||
Merges multiple datasets into one. This can be useful if you
|
||||
have few annotations and wish to merge them,
|
||||
taking into consideration potential overlaps and conflicts.
|
||||
This command can try to find a common ground by voting or
|
||||
return a list of conflicts.|n
|
||||
|n
|
||||
Examples:|n
|
||||
- Merge annotations from 3 (or more) annotators:|n
|
||||
|s|smerge project1/ project2/ project3/|n
|
||||
- Check groups of the merged dataset for consistence:|n
|
||||
|s|s|slook for groups consising of 'person', 'hand' 'head', 'foot'|n
|
||||
|s|smerge project1/ project2/ -g 'person,hand?,head,foot?'
|
||||
""",
|
||||
formatter_class=MultilineFormatter)
|
||||
|
||||
def _group(s):
|
||||
return s.split(',')
|
||||
|
||||
parser.add_argument('project', nargs='+', action=at_least(2),
|
||||
help="Path to a project (repeatable)")
|
||||
parser.add_argument('-iou', '--iou-thresh', default=0.25, type=float,
|
||||
help="IoU match threshold for segments (default: %(default)s)")
|
||||
parser.add_argument('-oconf', '--output-conf-thresh',
|
||||
default=0.0, type=float,
|
||||
help="Confidence threshold for output "
|
||||
"annotations (default: %(default)s)")
|
||||
parser.add_argument('--quorum', default=0, type=int,
|
||||
help="Minimum count for a label and attribute voting "
|
||||
"results to be counted (default: %(default)s)")
|
||||
parser.add_argument('-g', '--groups', action='append', type=_group,
|
||||
default=[],
|
||||
help="A comma-separated list of labels in "
|
||||
"annotation groups to check. '?' postfix can be added to a label to"
|
||||
"make it optional in the group (repeatable)")
|
||||
parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
|
||||
help="Output directory (default: current project's dir)")
|
||||
parser.add_argument('--overwrite', action='store_true',
|
||||
help="Overwrite existing files in the save directory")
|
||||
parser.set_defaults(command=merge_command)
|
||||
|
||||
return parser
|
||||
|
||||
def merge_command(args):
|
||||
source_projects = [load_project(p) for p in args.project]
|
||||
|
||||
dst_dir = args.dst_dir
|
||||
if dst_dir:
|
||||
if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
|
||||
raise CliException("Directory '%s' already exists "
|
||||
"(pass --overwrite to overwrite)" % dst_dir)
|
||||
else:
|
||||
dst_dir = generate_next_file_name('merged')
|
||||
|
||||
source_datasets = []
|
||||
for p in source_projects:
|
||||
log.debug("Loading project '%s' dataset", p.config.project_name)
|
||||
source_datasets.append(p.make_dataset())
|
||||
|
||||
merger = IntersectMerge(conf=IntersectMerge.Conf(
|
||||
pairwise_dist=args.iou_thresh, groups=args.groups,
|
||||
output_conf_thresh=args.output_conf_thresh, quorum=args.quorum
|
||||
))
|
||||
merged_dataset = merger(source_datasets)
|
||||
|
||||
merged_project = Project()
|
||||
output_dataset = merged_project.make_dataset()
|
||||
output_dataset.define_categories(merged_dataset.categories())
|
||||
merged_dataset = output_dataset.update(merged_dataset)
|
||||
merged_dataset.save(save_dir=dst_dir)
|
||||
|
||||
report_path = osp.join(dst_dir, 'merge_report.json')
|
||||
save_merge_report(merger, report_path)
|
||||
|
||||
dst_dir = osp.abspath(dst_dir)
|
||||
log.info("Merge results have been saved to '%s'" % dst_dir)
|
||||
log.info("Report has been saved to '%s'" % report_path)
|
||||
|
||||
return 0
|
||||
|
||||
def save_merge_report(merger, path):
|
||||
item_errors = OrderedDict()
|
||||
source_errors = OrderedDict()
|
||||
all_errors = []
|
||||
|
||||
for e in merger.errors:
|
||||
if isinstance(e, QualityError):
|
||||
item_errors[str(e.item_id)] = item_errors.get(str(e.item_id), 0) + 1
|
||||
elif isinstance(e, MergeError):
|
||||
for s in e.sources:
|
||||
source_errors[s] = source_errors.get(s, 0) + 1
|
||||
item_errors[str(e.item_id)] = item_errors.get(str(e.item_id), 0) + 1
|
||||
|
||||
all_errors.append(str(e))
|
||||
|
||||
errors = OrderedDict([
|
||||
('Item errors', item_errors),
|
||||
('Source errors', source_errors),
|
||||
('All errors', all_errors),
|
||||
])
|
||||
|
||||
with open(path, 'w') as f:
|
||||
json.dump(errors, f, indent=4)
|
||||
@ -1,34 +0,0 @@
|
||||
|
||||
# Copyright (C) 2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
from itertools import groupby
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def find_instances(instance_anns):
|
||||
instance_anns = sorted(instance_anns, key=lambda a: a.group)
|
||||
ann_groups = []
|
||||
for g_id, group in groupby(instance_anns, lambda a: a.group):
|
||||
if not g_id:
|
||||
ann_groups.extend(([a] for a in group))
|
||||
else:
|
||||
ann_groups.append(list(group))
|
||||
|
||||
return ann_groups
|
||||
|
||||
def find_group_leader(group):
|
||||
return max(group, key=lambda x: x.get_area())
|
||||
|
||||
def compute_bbox(annotations):
|
||||
boxes = [ann.get_bbox() for ann in annotations]
|
||||
x0 = min((b[0] for b in boxes), default=0)
|
||||
y0 = min((b[1] for b in boxes), default=0)
|
||||
x1 = max((b[0] + b[2] for b in boxes), default=0)
|
||||
y1 = max((b[1] + b[3] for b in boxes), default=0)
|
||||
return [x0, y0, x1 - x0, y1 - y0]
|
||||
|
||||
def softmax(x):
|
||||
return np.exp(x) / sum(np.exp(x))
|
||||
@ -0,0 +1,213 @@
|
||||
|
||||
# Copyright (C) 2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
from itertools import groupby
|
||||
|
||||
import numpy as np
|
||||
|
||||
from datumaro.components.extractor import _Shape, Mask, AnnotationType, RleMask
|
||||
from datumaro.util.mask_tools import mask_to_rle
|
||||
|
||||
|
||||
def find_instances(instance_anns):
|
||||
instance_anns = sorted(instance_anns, key=lambda a: a.group)
|
||||
ann_groups = []
|
||||
for g_id, group in groupby(instance_anns, lambda a: a.group):
|
||||
if not g_id:
|
||||
ann_groups.extend(([a] for a in group))
|
||||
else:
|
||||
ann_groups.append(list(group))
|
||||
|
||||
return ann_groups
|
||||
|
||||
def find_group_leader(group):
|
||||
return max(group, key=lambda x: x.get_area())
|
||||
|
||||
def _get_bbox(ann):
|
||||
if isinstance(ann, (_Shape, Mask)):
|
||||
return ann.get_bbox()
|
||||
else:
|
||||
return ann
|
||||
|
||||
def max_bbox(annotations):
|
||||
boxes = [_get_bbox(ann) for ann in annotations]
|
||||
x0 = min((b[0] for b in boxes), default=0)
|
||||
y0 = min((b[1] for b in boxes), default=0)
|
||||
x1 = max((b[0] + b[2] for b in boxes), default=0)
|
||||
y1 = max((b[1] + b[3] for b in boxes), default=0)
|
||||
return [x0, y0, x1 - x0, y1 - y0]
|
||||
|
||||
def mean_bbox(annotations):
|
||||
le = len(annotations)
|
||||
boxes = [_get_bbox(ann) for ann in annotations]
|
||||
mlb = sum(b[0] for b in boxes) / le
|
||||
mtb = sum(b[1] for b in boxes) / le
|
||||
mrb = sum(b[0] + b[2] for b in boxes) / le
|
||||
mbb = sum(b[1] + b[3] for b in boxes) / le
|
||||
return [mlb, mtb, mrb - mlb, mbb - mtb]
|
||||
|
||||
def softmax(x):
|
||||
return np.exp(x) / sum(np.exp(x))
|
||||
|
||||
def nms(segments, iou_thresh=0.5):
|
||||
"""
|
||||
Non-maxima suppression algorithm.
|
||||
"""
|
||||
|
||||
indices = np.argsort([b.attributes['score'] for b in segments])
|
||||
ious = np.array([[iou(a, b) for b in segments] for a in segments])
|
||||
|
||||
predictions = []
|
||||
while len(indices) != 0:
|
||||
i = len(indices) - 1
|
||||
pred_idx = indices[i]
|
||||
to_remove = [i]
|
||||
predictions.append(segments[pred_idx])
|
||||
for i, box_idx in enumerate(indices[:i]):
|
||||
if iou_thresh < ious[pred_idx, box_idx]:
|
||||
to_remove.append(i)
|
||||
indices = np.delete(indices, to_remove)
|
||||
|
||||
return predictions
|
||||
|
||||
def bbox_iou(a, b):
|
||||
"""
|
||||
IoU computations for simple cases with bounding boxes
|
||||
"""
|
||||
bbox_a = _get_bbox(a)
|
||||
bbox_b = _get_bbox(b)
|
||||
|
||||
aX, aY, aW, aH = bbox_a
|
||||
bX, bY, bW, bH = bbox_b
|
||||
in_right = min(aX + aW, bX + bW)
|
||||
in_left = max(aX, bX)
|
||||
in_top = max(aY, bY)
|
||||
in_bottom = min(aY + aH, bY + bH)
|
||||
|
||||
in_w = max(0, in_right - in_left)
|
||||
in_h = max(0, in_bottom - in_top)
|
||||
intersection = in_w * in_h
|
||||
if not intersection:
|
||||
return -1
|
||||
|
||||
a_area = aW * aH
|
||||
b_area = bW * bH
|
||||
union = a_area + b_area - intersection
|
||||
return intersection / union
|
||||
|
||||
def segment_iou(a, b):
|
||||
"""
|
||||
Generic IoU computation with masks, polygons, and boxes.
|
||||
Returns -1 if no intersection, [0; 1] otherwise
|
||||
"""
|
||||
from pycocotools import mask as mask_utils
|
||||
|
||||
a_bbox = a.get_bbox()
|
||||
b_bbox = b.get_bbox()
|
||||
|
||||
is_bbox = AnnotationType.bbox in [a.type, b.type]
|
||||
if is_bbox:
|
||||
a = [a_bbox]
|
||||
b = [b_bbox]
|
||||
else:
|
||||
w = max(a_bbox[0] + a_bbox[2], b_bbox[0] + b_bbox[2])
|
||||
h = max(a_bbox[1] + a_bbox[3], b_bbox[1] + b_bbox[3])
|
||||
|
||||
def _to_rle(ann):
|
||||
if ann.type == AnnotationType.polygon:
|
||||
return mask_utils.frPyObjects([ann.points], h, w)
|
||||
elif isinstance(ann, RleMask):
|
||||
return [ann._rle]
|
||||
elif ann.type == AnnotationType.mask:
|
||||
return mask_utils.frPyObjects([mask_to_rle(ann.image)], h, w)
|
||||
else:
|
||||
raise TypeError("Unexpected arguments: %s, %s" % (a, b))
|
||||
a = _to_rle(a)
|
||||
b = _to_rle(b)
|
||||
return float(mask_utils.iou(a, b, [not is_bbox]))
|
||||
|
||||
def PDJ(a, b, eps=None, ratio=0.05, bbox=None):
|
||||
"""
|
||||
Percentage of Detected Joints metric.
|
||||
Counts the number of matching points.
|
||||
"""
|
||||
|
||||
assert eps is not None or ratio is not None
|
||||
|
||||
p1 = np.array(a.points).reshape((-1, 2))
|
||||
p2 = np.array(b.points).reshape((-1, 2))
|
||||
if len(p1) != len(p2):
|
||||
return 0
|
||||
|
||||
if not eps:
|
||||
if bbox is None:
|
||||
bbox = mean_bbox([a, b])
|
||||
|
||||
diag = (bbox[2] ** 2 + bbox[3] ** 2) ** 0.5
|
||||
eps = ratio * diag
|
||||
|
||||
dists = np.linalg.norm(p1 - p2, axis=1)
|
||||
return np.sum(dists < eps) / len(p1)
|
||||
|
||||
def OKS(a, b, sigma=None, bbox=None, scale=None):
|
||||
"""
|
||||
Object Keypoint Similarity metric.
|
||||
https://cocodataset.org/#keypoints-eval
|
||||
"""
|
||||
|
||||
p1 = np.array(a.points).reshape((-1, 2))
|
||||
p2 = np.array(b.points).reshape((-1, 2))
|
||||
if len(p1) != len(p2):
|
||||
return 0
|
||||
|
||||
if not sigma:
|
||||
sigma = 0.1
|
||||
else:
|
||||
assert len(sigma) == len(p1)
|
||||
|
||||
if not scale:
|
||||
if bbox is None:
|
||||
bbox = mean_bbox([a, b])
|
||||
scale = bbox[2] * bbox[3]
|
||||
|
||||
dists = np.linalg.norm(p1 - p2, axis=1)
|
||||
return np.sum(np.exp(-(dists ** 2) / (2 * scale * (2 * sigma) ** 2)))
|
||||
|
||||
def smooth_line(points, segments):
|
||||
assert 2 <= len(points) // 2 and len(points) % 2 == 0
|
||||
|
||||
if len(points) // 2 == segments:
|
||||
return points
|
||||
|
||||
points = list(points)
|
||||
if len(points) == 2:
|
||||
points.extend(points)
|
||||
points = np.array(points).reshape((-1, 2))
|
||||
|
||||
lengths = np.linalg.norm(points[1:] - points[:-1], axis=1)
|
||||
dists = [0]
|
||||
for l in lengths:
|
||||
dists.append(dists[-1] + l)
|
||||
|
||||
step = dists[-1] / segments
|
||||
|
||||
new_points = np.zeros((segments + 1, 2))
|
||||
new_points[0] = points[0]
|
||||
|
||||
old_segment = 0
|
||||
for new_segment in range(1, segments + 1):
|
||||
pos = new_segment * step
|
||||
while dists[old_segment + 1] < pos and old_segment + 2 < len(dists):
|
||||
old_segment += 1
|
||||
|
||||
segment_start = dists[old_segment]
|
||||
segment_len = lengths[old_segment]
|
||||
prev_p = points[old_segment]
|
||||
next_p = points[old_segment + 1]
|
||||
r = (pos - segment_start) / segment_len
|
||||
|
||||
new_points[new_segment] = prev_p * (1 - r) + next_p * r
|
||||
|
||||
return new_points, step
|
||||
Loading…
Reference in New Issue