You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
56 lines
1.4 KiB
Python
56 lines
1.4 KiB
Python
|
|
# Copyright (C) 2018 Intel Corporation
|
|
#
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
from collections import OrderedDict
|
|
import os
|
|
import os.path as osp
|
|
|
|
from datumaro.components.extractor import DatasetItem, Extractor
|
|
from datumaro.util.image import lazy_image
|
|
|
|
|
|
class ImageDirExtractor(Extractor):
|
|
_SUPPORTED_FORMATS = ['.png', '.jpg']
|
|
|
|
def __init__(self, url):
|
|
super().__init__()
|
|
|
|
assert osp.isdir(url)
|
|
|
|
items = []
|
|
for name in os.listdir(url):
|
|
path = osp.join(url, name)
|
|
if self._is_image(path):
|
|
item_id = osp.splitext(name)[0]
|
|
item = DatasetItem(id=item_id, image=lazy_image(path))
|
|
items.append((item.id, item))
|
|
|
|
items = sorted(items, key=lambda e: e[0])
|
|
items = OrderedDict(items)
|
|
self._items = items
|
|
|
|
self._subsets = None
|
|
|
|
def __iter__(self):
|
|
for item in self._items.values():
|
|
yield item
|
|
|
|
def __len__(self):
|
|
return len(self._items)
|
|
|
|
def subsets(self):
|
|
return self._subsets
|
|
|
|
def get(self, item_id, subset=None, path=None):
|
|
if path or subset:
|
|
raise KeyError()
|
|
return self._items[item_id]
|
|
|
|
def _is_image(self, path):
|
|
for ext in self._SUPPORTED_FORMATS:
|
|
if osp.isfile(path) and path.endswith(ext):
|
|
return True
|
|
return False
|