[Datumaro] CLI updates (#1824)

* updates

* diff fix
main
zhiltsov-max 6 years ago committed by GitHub
parent 575c93ff2c
commit c6b3c797ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -550,11 +550,17 @@ def diff_command(args):
dst_dir = osp.abspath(dst_dir)
log.info("Saving diff to '%s'" % dst_dir)
visualizer = DiffVisualizer(save_dir=dst_dir, comparator=comparator,
output_format=args.format)
visualizer.save_dataset_diff(
first_project.make_dataset(),
second_project.make_dataset())
dst_dir_existed = osp.exists(dst_dir)
try:
visualizer = DiffVisualizer(save_dir=dst_dir, comparator=comparator,
output_format=args.format)
visualizer.save_dataset_diff(
first_project.make_dataset(),
second_project.make_dataset())
except BaseException:
if not dst_dir_existed and osp.isdir(dst_dir):
shutil.rmtree(dst_dir, ignore_errors=True)
raise
return 0

@ -83,11 +83,20 @@ class DiffVisualizer:
if self.output_format is Format.tensorboard:
self.file_writer.reopen()
for i, (item_a, item_b) in enumerate(zip(extractor_a, extractor_b)):
if item_a.id != item_b.id or not item_a.id or not item_b.id:
print("Dataset items #%s '%s' '%s' do not match" % \
(i + 1, item_a.id, item_b.id))
continue
ids_a = set((item.id, item.subset) for item in extractor_a)
ids_b = set((item.id, item.subset) for item in extractor_b)
ids = ids_a & ids_b
if len(ids) != len(ids_a):
print("Unmatched items in the first dataset: ")
print(ids_a - ids)
if len(ids) != len(ids_b):
print("Unmatched items in the second dataset: ")
print(ids_b - ids)
for item_id, item_subset in ids:
item_a = extractor_a.get(item_id, item_subset)
item_b = extractor_a.get(item_id, item_subset)
label_diff = self.comparator.compare_item_labels(item_a, item_b)
self.update_label_confusion(label_diff)

@ -611,33 +611,39 @@ class ProjectDataset(Dataset):
project.config.remove('sources')
save_dir = osp.abspath(save_dir)
os.makedirs(save_dir, exist_ok=True)
dataset_save_dir = osp.join(save_dir, project.config.dataset_dir)
os.makedirs(dataset_save_dir, exist_ok=True)
converter_kwargs = {
'save_images': save_images,
}
if merge:
# merge and save the resulting dataset
converter = self.env.make_converter(
DEFAULT_FORMAT, **converter_kwargs)
converter(self, dataset_save_dir)
else:
if recursive:
# children items should already be updated
# so we just save them recursively
for source in self._sources.values():
if isinstance(source, ProjectDataset):
source.save(**converter_kwargs)
converter = self.env.make_converter(
DEFAULT_FORMAT, **converter_kwargs)
converter(self.iterate_own(), dataset_save_dir)
save_dir_existed = osp.exists(save_dir)
try:
os.makedirs(save_dir, exist_ok=True)
os.makedirs(dataset_save_dir, exist_ok=True)
project.save(save_dir)
if merge:
# merge and save the resulting dataset
converter = self.env.make_converter(
DEFAULT_FORMAT, **converter_kwargs)
converter(self, dataset_save_dir)
else:
if recursive:
# children items should already be updated
# so we just save them recursively
for source in self._sources.values():
if isinstance(source, ProjectDataset):
source.save(**converter_kwargs)
converter = self.env.make_converter(
DEFAULT_FORMAT, **converter_kwargs)
converter(self.iterate_own(), dataset_save_dir)
project.save(save_dir)
except BaseException:
if not save_dir_existed and osp.isdir(save_dir):
shutil.rmtree(save_dir, ignore_errors=True)
raise
@property
def env(self):
@ -705,7 +711,7 @@ class ProjectDataset(Dataset):
try:
os.makedirs(save_dir, exist_ok=True)
converter(dataset, save_dir)
except Exception:
except BaseException:
if not save_dir_existed:
shutil.rmtree(save_dir)
raise
@ -750,7 +756,7 @@ class Project:
config_path = osp.join(save_dir, config.project_filename)
config.dump(config_path)
except Exception:
except BaseException:
if not env_dir_existed:
shutil.rmtree(save_dir, ignore_errors=True)
if not project_dir_existed:

@ -7,6 +7,7 @@ from enum import Enum
import logging as log
import os.path as osp
import random
import re
import pycocotools.mask as mask_utils
@ -372,6 +373,40 @@ class IdFromImageName(Transform, CliPlugin):
"item has no image info" % item.id)
return item
class Rename(Transform, CliPlugin):
"""
Renames items in the dataset. Supports regular expressions.
The first character in the expression is a delimiter for
the pattern and replacement parts. Replacement part can also
contain string.format tokens with 'item' object available.|n
|n
Examples:|n
- Replace 'pattern' with 'replacement':|n
|s|srename -e '|pattern|replacement|'|n
- Remove 'frame_' from item ids:|n
|s|srename -e '|frame_(\d+)|\\1|'
"""
@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('-e', '--regex',
help="Regex for renaming.")
return parser
def __init__(self, extractor, regex):
super().__init__(extractor)
assert regex and isinstance(regex, str)
parts = regex.split(regex[0], maxsplit=3)
regex, sub = parts[1:3]
self._re = re.compile(regex)
self._sub = sub
def transform_item(self, item):
return self.wrap_item(item, id=self._re.sub(self._sub, item.id) \
.format(item=item))
class RemapLabels(Transform, CliPlugin):
DefaultAction = Enum('DefaultAction', ['keep', 'delete'])

Loading…
Cancel
Save