Added sorting methods (#3937)

main
Maria Khrustaleva 4 years ago committed by GitHub
parent 2ed9f9cdb4
commit 68fbcdec43
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Google Cloud Storage support in UI (<https://github.com/openvinotoolkit/cvat/pull/3919>) - Google Cloud Storage support in UI (<https://github.com/openvinotoolkit/cvat/pull/3919>)
- Add project tasks paginations (<https://github.com/openvinotoolkit/cvat/pull/3910>) - Add project tasks paginations (<https://github.com/openvinotoolkit/cvat/pull/3910>)
- Add remove issue button (<https://github.com/openvinotoolkit/cvat/pull/3952>) - Add remove issue button (<https://github.com/openvinotoolkit/cvat/pull/3952>)
- Data sorting option (<https://github.com/openvinotoolkit/cvat/pull/3937>)
- Options to change font size & position of text labels on the canvas (<https://github.com/openvinotoolkit/cvat/pull/3972>) - Options to change font size & position of text labels on the canvas (<https://github.com/openvinotoolkit/cvat/pull/3972>)
- Add "tag" return type for automatic annotation in Nuclio (<https://github.com/openvinotoolkit/cvat/pull/3896>) - Add "tag" return type for automatic annotation in Nuclio (<https://github.com/openvinotoolkit/cvat/pull/3896>)

@ -1,12 +1,12 @@
{ {
"name": "cvat-core", "name": "cvat-core",
"version": "3.20.1", "version": "3.21.0",
"lockfileVersion": 2, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "cvat-core", "name": "cvat-core",
"version": "3.20.1", "version": "3.21.0",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"axios": "^0.21.4", "axios": "^0.21.4",

@ -1,6 +1,6 @@
{ {
"name": "cvat-core", "name": "cvat-core",
"version": "3.20.1", "version": "3.21.0",
"description": "Part of Computer Vision Tool which presents an interface for client-side integration", "description": "Part of Computer Vision Tool which presents an interface for client-side integration",
"main": "babel.config.js", "main": "babel.config.js",
"scripts": { "scripts": {

@ -367,6 +367,24 @@
KEY_FILE_PATH: 'KEY_FILE_PATH', KEY_FILE_PATH: 'KEY_FILE_PATH',
}); });
/**
* Sorting methods
* @enum {string}
* @name SortingMethod
* @memberof module:API.cvat.enums
* @property {string} LEXICOGRAPHICAL 'lexicographical'
* @property {string} NATURAL 'natural'
* @property {string} PREDEFINED 'predefined'
* @property {string} RANDOM 'random'
* @readonly
*/
const SortingMethod = Object.freeze({
LEXICOGRAPHICAL: 'lexicographical',
NATURAL: 'natural',
PREDEFINED: 'predefined',
RANDOM: 'random',
});
module.exports = { module.exports = {
ShareFileType, ShareFileType,
TaskStatus, TaskStatus,
@ -384,5 +402,6 @@
DimensionType, DimensionType,
CloudStorageProviderType, CloudStorageProviderType,
CloudStorageCredentialsType, CloudStorageCredentialsType,
SortingMethod,
}; };
})(); })();

@ -1017,6 +1017,7 @@
copy_data: undefined, copy_data: undefined,
dimension: undefined, dimension: undefined,
cloud_storage_id: undefined, cloud_storage_id: undefined,
sorting_method: undefined,
}; };
const updatedFields = new FieldUpdateTrigger({ const updatedFields = new FieldUpdateTrigger({
@ -1549,6 +1550,16 @@
cloudStorageId: { cloudStorageId: {
get: () => data.cloud_storage_id, get: () => data.cloud_storage_id,
}, },
sortingMethod: {
/**
* @name sortingMethod
* @type {module:API.cvat.enums.SortingMethod}
* @memberof module:API.cvat.classes.Task
* @instance
* @readonly
*/
get: () => data.sorting_method,
},
_internalData: { _internalData: {
get: () => data, get: () => data,
}, },
@ -2061,6 +2072,7 @@
image_quality: this.imageQuality, image_quality: this.imageQuality,
use_zip_chunks: this.useZipChunks, use_zip_chunks: this.useZipChunks,
use_cache: this.useCache, use_cache: this.useCache,
sorting_method: this.sortingMethod,
}; };
if (typeof this.startFrame !== 'undefined') { if (typeof this.startFrame !== 'undefined') {

@ -1,12 +1,12 @@
{ {
"name": "cvat-ui", "name": "cvat-ui",
"version": "1.28.1", "version": "1.28.2",
"lockfileVersion": 2, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "cvat-ui", "name": "cvat-ui",
"version": "1.28.1", "version": "1.28.2",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@ant-design/icons": "^4.6.3", "@ant-design/icons": "^4.6.3",

@ -1,6 +1,6 @@
{ {
"name": "cvat-ui", "name": "cvat-ui",
"version": "1.28.1", "version": "1.28.2",
"description": "CVAT single-page application", "description": "CVAT single-page application",
"main": "src/index.tsx", "main": "src/index.tsx",
"scripts": { "scripts": {

@ -353,6 +353,7 @@ export function createTaskAsync(data: any): ThunkAction<Promise<void>, {}, {}, A
image_quality: 70, image_quality: 70,
use_zip_chunks: data.advanced.useZipChunks, use_zip_chunks: data.advanced.useZipChunks,
use_cache: data.advanced.useCache, use_cache: data.advanced.useCache,
sorting_method: data.advanced.sortingMethod,
}; };
if (data.projectId) { if (data.projectId) {

@ -7,6 +7,7 @@ import { Row, Col } from 'antd/lib/grid';
import { PercentageOutlined } from '@ant-design/icons'; import { PercentageOutlined } from '@ant-design/icons';
import Input from 'antd/lib/input'; import Input from 'antd/lib/input';
import Select from 'antd/lib/select'; import Select from 'antd/lib/select';
import Radio from 'antd/lib/radio';
import Checkbox from 'antd/lib/checkbox'; import Checkbox from 'antd/lib/checkbox';
import Form, { FormInstance, RuleObject, RuleRender } from 'antd/lib/form'; import Form, { FormInstance, RuleObject, RuleRender } from 'antd/lib/form';
import Text from 'antd/lib/typography/Text'; import Text from 'antd/lib/typography/Text';
@ -16,6 +17,13 @@ import patterns from 'utils/validation-patterns';
const { Option } = Select; const { Option } = Select;
export enum SortingMethod {
LEXICOGRAPHICAL = 'lexicographical',
NATURAL = 'natural',
PREDEFINED = 'predefined',
RANDOM = 'random',
}
export interface AdvancedConfiguration { export interface AdvancedConfiguration {
bugTracker?: string; bugTracker?: string;
imageQuality?: number; imageQuality?: number;
@ -31,6 +39,7 @@ export interface AdvancedConfiguration {
dataChunkSize?: number; dataChunkSize?: number;
useCache: boolean; useCache: boolean;
copyData?: boolean; copyData?: boolean;
sortingMethod: SortingMethod;
} }
const initialValues: AdvancedConfiguration = { const initialValues: AdvancedConfiguration = {
@ -39,6 +48,7 @@ const initialValues: AdvancedConfiguration = {
useZipChunks: true, useZipChunks: true,
useCache: true, useCache: true,
copyData: false, copyData: false,
sortingMethod: SortingMethod.LEXICOGRAPHICAL,
}; };
interface Props { interface Props {
@ -178,6 +188,33 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
); );
} }
private renderSortingMethodRadio(): JSX.Element {
return (
<Form.Item
label='Sorting method'
name='sortingMethod'
rules={[
{
required: true,
message: 'The field is required.',
},
]}
help='Specify how to sort images. It is not relevant for videos.'
>
<Radio.Group>
<Radio value={SortingMethod.LEXICOGRAPHICAL} key={SortingMethod.LEXICOGRAPHICAL}>
Lexicographical
</Radio>
<Radio value={SortingMethod.NATURAL} key={SortingMethod.NATURAL}>Natural</Radio>
<Radio value={SortingMethod.PREDEFINED} key={SortingMethod.PREDEFINED}>
Predefined
</Radio>
<Radio value={SortingMethod.RANDOM} key={SortingMethod.RANDOM}>Random</Radio>
</Radio.Group>
</Form.Item>
);
}
private renderImageQuality(): JSX.Element { private renderImageQuality(): JSX.Element {
return ( return (
<CVATTooltip title='Defines images compression level'> <CVATTooltip title='Defines images compression level'>
@ -290,8 +327,7 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
> >
<Select style={{ width: '100%' }} initialValue='CVAT for video 1.1'> <Select style={{ width: '100%' }} initialValue='CVAT for video 1.1'>
{ {
dumpers.map((dumper: any) => dumpers.map((dumper: any) => <Option value={dumper.name}>{dumper.name}</Option>)
<Option value={dumper.name}>{dumper.name}</Option>)
} }
</Select> </Select>
</Form.Item> </Form.Item>
@ -384,6 +420,9 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
const { installedGit, activeFileManagerTab } = this.props; const { installedGit, activeFileManagerTab } = this.props;
return ( return (
<Form initialValues={initialValues} ref={this.formRef} layout='vertical'> <Form initialValues={initialValues} ref={this.formRef} layout='vertical'>
<Row>
<Col>{this.renderSortingMethodRadio()}</Col>
</Row>
{activeFileManagerTab === 'share' ? ( {activeFileManagerTab === 'share' ? (
<Row> <Row>
<Col>{this.renderCopyDataChechbox()}</Col> <Col>{this.renderCopyDataChechbox()}</Col>

@ -20,7 +20,7 @@ import { Files } from 'components/file-manager/file-manager';
import BasicConfigurationForm, { BaseConfiguration } from './basic-configuration-form'; import BasicConfigurationForm, { BaseConfiguration } from './basic-configuration-form';
import ProjectSearchField from './project-search-field'; import ProjectSearchField from './project-search-field';
import ProjectSubsetField from './project-subset-field'; import ProjectSubsetField from './project-subset-field';
import AdvancedConfigurationForm, { AdvancedConfiguration } from './advanced-configuration-form'; import AdvancedConfigurationForm, { AdvancedConfiguration, SortingMethod } from './advanced-configuration-form';
export interface CreateTaskData { export interface CreateTaskData {
projectId: number | null; projectId: number | null;
@ -54,6 +54,7 @@ const defaultState = {
lfs: false, lfs: false,
useZipChunks: true, useZipChunks: true,
useCache: true, useCache: true,
sortingMethod: SortingMethod.LEXICOGRAPHICAL,
}, },
labels: [], labels: [],
files: { files: {

@ -65,6 +65,7 @@ class _TaskBackupBase():
'chunk_type', 'chunk_type',
'storage_method', 'storage_method',
'storage', 'storage',
'sorting_method',
} }
self._prepare_meta(allowed_fields, data) self._prepare_meta(allowed_fields, data)

@ -14,11 +14,13 @@ from contextlib import closing
import av import av
import numpy as np import numpy as np
from natsort import os_sorted
from pyunpack import Archive from pyunpack import Archive
from PIL import Image, ImageFile from PIL import Image, ImageFile
from random import shuffle
import open3d as o3d import open3d as o3d
from cvat.apps.engine.utils import rotate_image from cvat.apps.engine.utils import rotate_image
from cvat.apps.engine.models import DimensionType from cvat.apps.engine.models import DimensionType, SortingMethod
# fixes: "OSError:broken data stream" when executing line 72 while loading images downloaded from the web # fixes: "OSError:broken data stream" when executing line 72 while loading images downloaded from the web
# see: https://stackoverflow.com/questions/42462431/oserror-broken-data-stream-when-reading-image-file # see: https://stackoverflow.com/questions/42462431/oserror-broken-data-stream-when-reading-image-file
@ -47,9 +49,22 @@ def files_to_ignore(directory):
return True return True
return False return False
def sort(images, sorting_method=SortingMethod.LEXICOGRAPHICAL, func=None):
if sorting_method == SortingMethod.LEXICOGRAPHICAL:
return sorted(images, key=func)
elif sorting_method == SortingMethod.NATURAL:
return os_sorted(images, key=func)
elif sorting_method == SortingMethod.PREDEFINED:
return images
elif sorting_method == SortingMethod.RANDOM:
shuffle(images)
return images
else:
raise NotImplementedError()
class IMediaReader(ABC): class IMediaReader(ABC):
def __init__(self, source_path, step, start, stop, dimension): def __init__(self, source_path, step, start, stop, dimension):
self._source_path = sorted(source_path) self._source_path = source_path
self._step = step self._step = step
self._start = start self._start = start
self._stop = stop self._stop = stop
@ -90,7 +105,13 @@ class IMediaReader(ABC):
return range(self._start, self._stop, self._step) return range(self._start, self._stop, self._step)
class ImageListReader(IMediaReader): class ImageListReader(IMediaReader):
def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D): def __init__(self,
source_path,
step=1,
start=0,
stop=None,
dimension=DimensionType.DIM_2D,
sorting_method=SortingMethod.LEXICOGRAPHICAL):
if not source_path: if not source_path:
raise Exception('No image found') raise Exception('No image found')
@ -102,13 +123,15 @@ class ImageListReader(IMediaReader):
assert stop > start assert stop > start
super().__init__( super().__init__(
source_path=source_path, source_path=sort(source_path, sorting_method),
step=step, step=step,
start=start, start=start,
stop=stop, stop=stop,
dimension=dimension dimension=dimension
) )
self._sorting_method = sorting_method
def __iter__(self): def __iter__(self):
for i in range(self._start, self._stop, self._step): for i in range(self._start, self._stop, self._step):
yield (self.get_image(i), self.get_path(i), i) yield (self.get_image(i), self.get_path(i), i)
@ -121,7 +144,8 @@ class ImageListReader(IMediaReader):
step=self._step, step=self._step,
start=self._start, start=self._start,
stop=self._stop, stop=self._stop,
dimension=self._dimension dimension=self._dimension,
sorting_method=self._sorting_method
) )
def get_path(self, i): def get_path(self, i):
@ -154,7 +178,8 @@ class ImageListReader(IMediaReader):
source_path=source_files, source_path=source_files,
step=step, step=step,
start=start, start=start,
stop=stop stop=stop,
sorting_method=self._sorting_method,
) )
self._dimension = dimension self._dimension = dimension
@ -163,7 +188,13 @@ class ImageListReader(IMediaReader):
return [self.get_path(idx) for idx, _ in enumerate(self._source_path)] return [self.get_path(idx) for idx, _ in enumerate(self._source_path)]
class DirectoryReader(ImageListReader): class DirectoryReader(ImageListReader):
def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D): def __init__(self,
source_path,
step=1,
start=0,
stop=None,
dimension=DimensionType.DIM_2D,
sorting_method=SortingMethod.LEXICOGRAPHICAL):
image_paths = [] image_paths = []
for source in source_path: for source in source_path:
for root, _, files in os.walk(source): for root, _, files in os.walk(source):
@ -176,10 +207,17 @@ class DirectoryReader(ImageListReader):
start=start, start=start,
stop=stop, stop=stop,
dimension=dimension, dimension=dimension,
sorting_method=sorting_method,
) )
class ArchiveReader(DirectoryReader): class ArchiveReader(DirectoryReader):
def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D): def __init__(self,
source_path,
step=1,
start=0,
stop=None,
dimension=DimensionType.DIM_2D,
sorting_method=SortingMethod.LEXICOGRAPHICAL):
self._archive_source = source_path[0] self._archive_source = source_path[0]
extract_dir = source_path[1] if len(source_path) > 1 else os.path.dirname(source_path[0]) extract_dir = source_path[1] if len(source_path) > 1 else os.path.dirname(source_path[0])
Archive(self._archive_source).extractall(extract_dir) Archive(self._archive_source).extractall(extract_dir)
@ -190,11 +228,18 @@ class ArchiveReader(DirectoryReader):
step=step, step=step,
start=start, start=start,
stop=stop, stop=stop,
dimension=dimension dimension=dimension,
sorting_method=sorting_method,
) )
class PdfReader(ImageListReader): class PdfReader(ImageListReader):
def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D): def __init__(self,
source_path,
step=1,
start=0,
stop=None,
dimension=DimensionType.DIM_2D,
sorting_method=SortingMethod.LEXICOGRAPHICAL):
if not source_path: if not source_path:
raise Exception('No PDF found') raise Exception('No PDF found')
@ -223,14 +268,26 @@ class PdfReader(ImageListReader):
start=start, start=start,
stop=stop, stop=stop,
dimension=dimension, dimension=dimension,
sorting_method=sorting_method,
) )
class ZipReader(ImageListReader): class ZipReader(ImageListReader):
def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D): def __init__(self,
source_path,
step=1,
start=0,
stop=None,
dimension=DimensionType.DIM_2D,
sorting_method=SortingMethod.LEXICOGRAPHICAL):
self._zip_source = zipfile.ZipFile(source_path[0], mode='r') self._zip_source = zipfile.ZipFile(source_path[0], mode='r')
self.extract_dir = source_path[1] if len(source_path) > 1 else None self.extract_dir = source_path[1] if len(source_path) > 1 else None
file_list = [f for f in self._zip_source.namelist() if files_to_ignore(f) and get_mime(f) == 'image'] file_list = [f for f in self._zip_source.namelist() if files_to_ignore(f) and get_mime(f) == 'image']
super().__init__(file_list, step=step, start=start, stop=stop, dimension=dimension) super().__init__(file_list,
step=step,
start=start,
stop=stop,
dimension=dimension,
sorting_method=sorting_method)
def __del__(self): def __del__(self):
self._zip_source.close() self._zip_source.close()

@ -0,0 +1,19 @@
# Generated by Django 3.1.13 on 2021-12-03 08:06
import cvat.apps.engine.models
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('engine', '0044_auto_20211123_0824'),
]
operations = [
migrations.AddField(
model_name='data',
name='sorting_method',
field=models.CharField(choices=[('lexicographical', 'LEXICOGRAPHICAL'), ('natural', 'NATURAL'), ('predefined', 'PREDEFINED'), ('random', 'RANDOM')], default=cvat.apps.engine.models.SortingMethod['LEXICOGRAPHICAL'], max_length=15),
),
]

@ -81,6 +81,19 @@ class StorageChoice(str, Enum):
def __str__(self): def __str__(self):
return self.value return self.value
class SortingMethod(str, Enum):
LEXICOGRAPHICAL = 'lexicographical'
NATURAL = 'natural'
PREDEFINED = 'predefined'
RANDOM = 'random'
@classmethod
def choices(cls):
return tuple((x.value, x.name) for x in cls)
def __str__(self):
return self.value
class Data(models.Model): class Data(models.Model):
chunk_size = models.PositiveIntegerField(null=True) chunk_size = models.PositiveIntegerField(null=True)
size = models.PositiveIntegerField(default=0) size = models.PositiveIntegerField(default=0)
@ -95,6 +108,7 @@ class Data(models.Model):
storage_method = models.CharField(max_length=15, choices=StorageMethodChoice.choices(), default=StorageMethodChoice.FILE_SYSTEM) storage_method = models.CharField(max_length=15, choices=StorageMethodChoice.choices(), default=StorageMethodChoice.FILE_SYSTEM)
storage = models.CharField(max_length=15, choices=StorageChoice.choices(), default=StorageChoice.LOCAL) storage = models.CharField(max_length=15, choices=StorageChoice.choices(), default=StorageChoice.LOCAL)
cloud_storage = models.ForeignKey('CloudStorage', on_delete=models.SET_NULL, null=True, related_name='data') cloud_storage = models.ForeignKey('CloudStorage', on_delete=models.SET_NULL, null=True, related_name='data')
sorting_method = models.CharField(max_length=15, choices=SortingMethod.choices(), default=SortingMethod.LEXICOGRAPHICAL)
class Meta: class Meta:
default_permissions = () default_permissions = ()

@ -281,7 +281,7 @@ class DataSerializer(serializers.ModelSerializer):
model = models.Data model = models.Data
fields = ('chunk_size', 'size', 'image_quality', 'start_frame', 'stop_frame', 'frame_filter', fields = ('chunk_size', 'size', 'image_quality', 'start_frame', 'stop_frame', 'frame_filter',
'compressed_chunk_type', 'original_chunk_type', 'client_files', 'server_files', 'remote_files', 'use_zip_chunks', 'compressed_chunk_type', 'original_chunk_type', 'client_files', 'server_files', 'remote_files', 'use_zip_chunks',
'cloud_storage_id', 'use_cache', 'copy_data', 'storage_method', 'storage') 'cloud_storage_id', 'use_cache', 'copy_data', 'storage_method', 'storage', 'sorting_method')
# pylint: disable=no-self-use # pylint: disable=no-self-use
def validate_frame_filter(self, value): def validate_frame_filter(self, value):
@ -308,9 +308,9 @@ class DataSerializer(serializers.ModelSerializer):
client_files = validated_data.pop('client_files') client_files = validated_data.pop('client_files')
server_files = validated_data.pop('server_files') server_files = validated_data.pop('server_files')
remote_files = validated_data.pop('remote_files') remote_files = validated_data.pop('remote_files')
validated_data.pop('use_zip_chunks') for extra_key in { 'use_zip_chunks', 'use_cache', 'copy_data' }:
validated_data.pop('use_cache') validated_data.pop(extra_key)
validated_data.pop('copy_data')
db_data = models.Data.objects.create(**validated_data) db_data = models.Data.objects.create(**validated_data)
data_path = db_data.get_data_dirname() data_path = db_data.get_data_dirname()

@ -22,7 +22,7 @@ from django.db import transaction
from cvat.apps.engine import models from cvat.apps.engine import models
from cvat.apps.engine.log import slogger from cvat.apps.engine.log import slogger
from cvat.apps.engine.media_extractors import (MEDIA_TYPES, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter, from cvat.apps.engine.media_extractors import (MEDIA_TYPES, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter,
ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime) ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime, sort)
from cvat.apps.engine.utils import av_scan_paths from cvat.apps.engine.utils import av_scan_paths
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager from utils.dataset_manifest import ImageManifestManager, VideoManifestManager
from utils.dataset_manifest.core import VideoManifestValidator from utils.dataset_manifest.core import VideoManifestValidator
@ -123,15 +123,18 @@ def _count_files(data, manifest_file=None):
raise ValueError("Bad file path: " + path) raise ValueError("Bad file path: " + path)
server_files.append(path) server_files.append(path)
server_files.sort(reverse=True) sorted_server_files = sorted(server_files, reverse=True)
# The idea of the code is trivial. After sort we will have files in the # The idea of the code is trivial. After sort we will have files in the
# following order: 'a/b/c/d/2.txt', 'a/b/c/d/1.txt', 'a/b/c/d', 'a/b/c' # following order: 'a/b/c/d/2.txt', 'a/b/c/d/1.txt', 'a/b/c/d', 'a/b/c'
# Let's keep all items which aren't substrings of the previous item. In # Let's keep all items which aren't substrings of the previous item. In
# the example above only 2.txt and 1.txt files will be in the final list. # the example above only 2.txt and 1.txt files will be in the final list.
# Also need to correctly handle 'a/b/c0', 'a/b/c' case. # Also need to correctly handle 'a/b/c0', 'a/b/c' case.
data['server_files'] = [v[1] for v in zip([""] + server_files, server_files) without_extra_dirs = [v[1] for v in zip([""] + sorted_server_files, sorted_server_files)
if not os.path.dirname(v[0]).startswith(v[1])] if not os.path.dirname(v[0]).startswith(v[1])]
# we need to keep the original sequence of files
data['server_files'] = [f for f in server_files if f in without_extra_dirs]
def count_files(file_mapping, counter): def count_files(file_mapping, counter):
for rel_path, full_path in file_mapping.items(): for rel_path, full_path in file_mapping.items():
mime = get_mime(full_path) mime = get_mime(full_path)
@ -141,7 +144,7 @@ def _count_files(data, manifest_file=None):
manifest_file.append(rel_path) manifest_file.append(rel_path)
else: else:
slogger.glob.warn("Skip '{}' file (its mime type doesn't " slogger.glob.warn("Skip '{}' file (its mime type doesn't "
"correspond to a video or an image file)".format(full_path)) "correspond to supported MIME file type)".format(full_path))
counter = { media_type: [] for media_type in MEDIA_TYPES.keys() } counter = { media_type: [] for media_type in MEDIA_TYPES.keys() }
@ -213,6 +216,7 @@ def _download_data(urls, upload_dir):
def _get_manifest_frame_indexer(start_frame=0, frame_step=1): def _get_manifest_frame_indexer(start_frame=0, frame_step=1):
return lambda frame_id: start_frame + frame_id * frame_step return lambda frame_id: start_frame + frame_id * frame_step
@transaction.atomic @transaction.atomic
def _create_thread(tid, data, isImport=False): def _create_thread(tid, data, isImport=False):
slogger.glob.info("create task #{}".format(tid)) slogger.glob.info("create task #{}".format(tid))
@ -222,15 +226,13 @@ def _create_thread(tid, data, isImport=False):
upload_dir = db_data.get_upload_dirname() upload_dir = db_data.get_upload_dirname()
if data['remote_files']: if data['remote_files']:
if db_data.storage != models.StorageChoice.CLOUD_STORAGE: data['remote_files'] = _download_data(data['remote_files'], upload_dir)
data['remote_files'] = _download_data(data['remote_files'], upload_dir)
manifest_file = [] manifest_file = []
media = _count_files(data, manifest_file) media = _count_files(data, manifest_file)
media, task_mode = _validate_data(media, manifest_file) media, task_mode = _validate_data(media, manifest_file)
if manifest_file: if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE):
assert settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE, \ raise Exception("File with meta information can be uploaded if 'Use cache' option is also selected")
"File with meta information can be uploaded if 'Use cache' option is also selected"
if data['server_files']: if data['server_files']:
if db_data.storage == models.StorageChoice.LOCAL: if db_data.storage == models.StorageChoice.LOCAL:
@ -252,19 +254,22 @@ def _create_thread(tid, data, isImport=False):
'specific_attributes': db_cloud_storage.get_specific_attributes() 'specific_attributes': db_cloud_storage.get_specific_attributes()
} }
cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
first_sorted_media_image = sorted(media['image'])[0] sorted_media = sort(media['image'], data['sorting_method'])
first_sorted_media_image = sorted_media[0]
cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image)) cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image))
# prepare task manifest file from cloud storage manifest file # prepare task manifest file from cloud storage manifest file
# NOTE we should create manifest before defining chunk_size
# FIXME in the future when will be implemented archive support
manifest = ImageManifestManager(db_data.get_manifest_path()) manifest = ImageManifestManager(db_data.get_manifest_path())
cloud_storage_manifest = ImageManifestManager( cloud_storage_manifest = ImageManifestManager(
os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]), os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]),
db_data.cloud_storage.get_storage_dirname() db_data.cloud_storage.get_storage_dirname()
) )
cloud_storage_manifest.set_index() cloud_storage_manifest.set_index()
media_files = sorted(media['image']) sequence, content = cloud_storage_manifest.get_subset(sorted_media)
content = cloud_storage_manifest.get_subset(media_files) sorted_content = (i[1] for i in sorted(zip(sequence, content)))
manifest.create(content) manifest.create(sorted_content)
av_scan_paths(upload_dir) av_scan_paths(upload_dir)
@ -292,24 +297,48 @@ def _create_thread(tid, data, isImport=False):
if media_files: if media_files:
if extractor is not None: if extractor is not None:
raise Exception('Combined data types are not supported') raise Exception('Combined data types are not supported')
source_paths=[os.path.join(upload_dir, f) for f in media_files]
if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE:
source_paths.append(db_data.get_upload_dirname())
upload_dir = db_data.get_upload_dirname()
db_data.storage = models.StorageChoice.LOCAL
if isImport and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE: if isImport and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE:
manifest_index = _get_manifest_frame_indexer(db_data.start_frame, db_data.get_frame_step()) manifest_index = _get_manifest_frame_indexer(db_data.start_frame, db_data.get_frame_step())
db_data.start_frame = 0 db_data.start_frame = 0
data['stop_frame'] = None data['stop_frame'] = None
db_data.frame_filter = '' db_data.frame_filter = ''
if isImport and media_type != 'video' and db_data.storage_method == models.StorageMethodChoice.CACHE:
# we should sort media_files according to the manifest content sequence
manifest = ImageManifestManager(db_data.get_manifest_path())
manifest.set_index()
sorted_media_files = []
for idx in range(len(media_files)):
properties = manifest[manifest_index(idx)]
image_name = properties.get('name', None)
image_extension = properties.get('extension', None)
full_image_path = f"{image_name}{image_extension}" if image_name and image_extension else None
if full_image_path and full_image_path in media_files:
sorted_media_files.append(full_image_path)
media_files = sorted_media_files.copy()
del sorted_media_files
data['sorting_method'] = models.SortingMethod.PREDEFINED
source_paths=[os.path.join(upload_dir, f) for f in media_files]
if manifest_file and not isImport and data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}:
raise Exception("It isn't supported to upload manifest file and use random sorting")
if isImport and db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM and \
data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}:
raise Exception("It isn't supported to import the task that was created without cache but with random/predefined sorting")
extractor = MEDIA_TYPES[media_type]['extractor']( if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE:
source_path=source_paths, source_paths.append(db_data.get_upload_dirname())
step=db_data.get_frame_step(), upload_dir = db_data.get_upload_dirname()
start=db_data.start_frame, db_data.storage = models.StorageChoice.LOCAL
stop=data['stop_frame'],
)
details = {
'source_path': source_paths,
'step': db_data.get_frame_step(),
'start': db_data.start_frame,
'stop': data['stop_frame'],
}
if media_type != 'video':
details['sorting_method'] = data['sorting_method']
extractor = MEDIA_TYPES[media_type]['extractor'](**details)
validate_dimension = ValidateDimension() validate_dimension = ValidateDimension()
if isinstance(extractor, MEDIA_TYPES['zip']['extractor']): if isinstance(extractor, MEDIA_TYPES['zip']['extractor']):
@ -474,8 +503,12 @@ def _create_thread(tid, data, isImport=False):
chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames] chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames]
img_sizes = [] img_sizes = []
for _, frame_id in chunk_paths: for chunk_path, frame_id in chunk_paths:
properties = manifest[manifest_index(frame_id)] properties = manifest[manifest_index(frame_id)]
# check mapping
if not chunk_path.endswith(f"{properties['name']}{properties['extension']}"):
raise Exception('Incorrect file mapping to manifest content')
if db_task.dimension == models.DimensionType.DIM_2D: if db_task.dimension == models.DimensionType.DIM_2D:
resolution = (properties['width'], properties['height']) resolution = (properties['width'], properties['height'])
else: else:

@ -30,9 +30,9 @@ from rest_framework.test import APIClient, APITestCase
from datumaro.util.test_utils import TestDir from datumaro.util.test_utils import TestDir
from cvat.apps.engine.models import (AttributeSpec, AttributeType, Data, Job, Project, from cvat.apps.engine.models import (AttributeSpec, AttributeType, Data, Job, Project,
Segment, StatusChoice, Task, Label, StorageMethodChoice, StorageChoice) Segment, StatusChoice, Task, Label, StorageMethodChoice, StorageChoice, DimensionType,
from cvat.apps.engine.media_extractors import ValidateDimension SortingMethod)
from cvat.apps.engine.models import DimensionType from cvat.apps.engine.media_extractors import ValidateDimension, sort
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager from utils.dataset_manifest import ImageManifestManager, VideoManifestManager
def create_db_users(cls): def create_db_users(cls):
@ -2169,17 +2169,29 @@ class TaskImportExportAPITestCase(APITestCase):
with open(path, "wb") as image: with open(path, "wb") as image:
image.write(data.read()) image.write(data.read())
cls.media_data.append( data = {
{ "image_quality": 75,
**{"image_quality": 75, "copy_data": True,
"copy_data": True, "start_frame": 2,
"start_frame": 2, "stop_frame": 9,
"stop_frame": 9, "frame_filter": "step=2",
"frame_filter": "step=2", **{"server_files[{}]".format(i): imagename_pattern.format(i) for i in range(image_count)},
}, }
**{"server_files[{}]".format(i): imagename_pattern.format(i) for i in range(image_count)}, use_cache_data = {
} **data,
) 'use_cache': True,
}
cls.media_data.append(data)
data['sorting_method'] = SortingMethod.NATURAL
cls.media_data.append(data)
cls.media_data.append(use_cache_data)
use_cache_data['sorting_method'] = SortingMethod.NATURAL
cls.media_data.append(use_cache_data)
use_cache_data['sorting_method'] = SortingMethod.RANDOM
cls.media_data.append(use_cache_data)
filename = "test_video_1.mp4" filename = "test_video_1.mp4"
path = os.path.join(settings.SHARE_ROOT, filename) path = os.path.join(settings.SHARE_ROOT, filename)
@ -2267,13 +2279,47 @@ class TaskImportExportAPITestCase(APITestCase):
} }
) )
data = {
"client_files[0]": generate_image_file("test_1.jpg")[1],
"client_files[1]": generate_image_file("test_2.jpg")[1],
"client_files[2]": generate_image_file("test_10.jpg")[1],
"client_files[3]": generate_image_file("test_3.jpg")[1],
"image_quality": 75,
}
use_cache_data = {
**data,
'use_cache': True,
}
cls.media_data.extend([ cls.media_data.extend([
# image list local # image list local
# sorted data
# natural: test_1.jpg, test_2.jpg, test_3.jpg, test_10.jpg
{ {
"client_files[0]": generate_image_file("test_1.jpg")[1], **use_cache_data,
"client_files[1]": generate_image_file("test_2.jpg")[1], 'sorting_method': SortingMethod.NATURAL,
"client_files[2]": generate_image_file("test_3.jpg")[1], },
"image_quality": 75, {
**data,
'sorting_method': SortingMethod.NATURAL,
},
# random
{
**use_cache_data,
'sorting_method': SortingMethod.RANDOM,
},
# predefined: test_1.jpg, test_2.jpg, test_10.jpg, test_2.jpg
{
**use_cache_data,
'sorting_method': SortingMethod.PREDEFINED,
},
# lexicographical: test_1.jpg, test_10.jpg, test_2.jpg, test_3.jpg
{
**use_cache_data,
'sorting_method': SortingMethod.LEXICOGRAPHICAL,
},
{
**data,
'sorting_method': SortingMethod.LEXICOGRAPHICAL,
}, },
# video local # video local
{ {
@ -2576,7 +2622,7 @@ def generate_manifest_file(data_type, manifest_path, sources):
kwargs = { kwargs = {
'images': { 'images': {
'sources': sources, 'sources': sources,
'is_sorted': False, 'sorting_method': SortingMethod.LEXICOGRAPHICAL,
}, },
'video': { 'video': {
'media_file': sources[0], 'media_file': sources[0],
@ -2633,6 +2679,13 @@ class TaskDataAPITestCase(APITestCase):
image.write(data.read()) image.write(data.read())
cls._image_sizes[filename] = img_size cls._image_sizes[filename] = img_size
filename = "test_10.jpg"
path = os.path.join(settings.SHARE_ROOT, filename)
img_size, data = generate_image_file(filename)
with open(path, "wb") as image:
image.write(data.read())
cls._image_sizes[filename] = img_size
filename = os.path.join("data", "test_3.jpg") filename = os.path.join("data", "test_3.jpg")
path = os.path.join(settings.SHARE_ROOT, filename) path = os.path.join(settings.SHARE_ROOT, filename)
os.makedirs(os.path.dirname(path)) os.makedirs(os.path.dirname(path))
@ -2732,6 +2785,9 @@ class TaskDataAPITestCase(APITestCase):
path = os.path.join(settings.SHARE_ROOT, "test_3.jpg") path = os.path.join(settings.SHARE_ROOT, "test_3.jpg")
os.remove(path) os.remove(path)
path = os.path.join(settings.SHARE_ROOT, "test_10.jpg")
os.remove(path)
path = os.path.join(settings.SHARE_ROOT, "data", "test_3.jpg") path = os.path.join(settings.SHARE_ROOT, "data", "test_3.jpg")
os.remove(path) os.remove(path)
@ -2892,9 +2948,9 @@ class TaskDataAPITestCase(APITestCase):
client_files = [img for key, img in data.items() if key.startswith("client_files")] client_files = [img for key, img in data.items() if key.startswith("client_files")]
if server_files: if server_files:
source_files = [os.path.join(settings.SHARE_ROOT, f) for f in sorted(server_files)] source_files = [os.path.join(settings.SHARE_ROOT, f) for f in sort(server_files, data.get('sorting_method', SortingMethod.LEXICOGRAPHICAL))]
else: else:
source_files = [f for f in sorted(client_files, key=lambda e: e.name)] source_files = [f for f in sort(client_files, data.get('sorting_method', SortingMethod.LEXICOGRAPHICAL), func=lambda e: e.name)]
source_images = [] source_images = []
for f in source_files: for f in source_files:
@ -3128,7 +3184,7 @@ class TaskDataAPITestCase(APITestCase):
image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL) image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL)
task_spec = { task_spec = {
"name": "cached images task without copying #16", "name": "cached images task with default sorting data and without copying #16",
"overlap": 0, "overlap": 0,
"segment_size": 0, "segment_size": 0,
"labels": [ "labels": [
@ -3140,14 +3196,14 @@ class TaskDataAPITestCase(APITestCase):
task_data = { task_data = {
"server_files[0]": "test_1.jpg", "server_files[0]": "test_1.jpg",
"server_files[1]": "test_2.jpg", "server_files[1]": "test_2.jpg",
"server_files[2]": "test_3.jpg", "server_files[2]": "test_10.jpg",
"image_quality": 70, "image_quality": 70,
"use_cache": True, "use_cache": True,
} }
image_sizes = [ image_sizes = [
self._image_sizes[task_data["server_files[0]"]], self._image_sizes[task_data["server_files[0]"]],
self._image_sizes[task_data["server_files[1]"]],
self._image_sizes[task_data["server_files[2]"]], self._image_sizes[task_data["server_files[2]"]],
self._image_sizes[task_data["server_files[1]"]],
] ]
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
@ -3381,6 +3437,44 @@ class TaskDataAPITestCase(APITestCase):
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET,
image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL) image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL)
# test predefined sorting
task_spec.update([('name', 'task custom data sequence #28')])
task_data = {
"server_files[0]": "test_1.jpg",
"server_files[1]": "test_3.jpg",
"server_files[2]": "test_2.jpg",
"image_quality": 70,
"use_cache": True,
"sorting_method": SortingMethod.PREDEFINED
}
image_sizes = [
self._image_sizes[task_data["server_files[0]"]],
self._image_sizes[task_data["server_files[1]"]],
self._image_sizes[task_data["server_files[2]"]],
]
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET,
image_sizes, StorageMethodChoice.CACHE, StorageChoice.SHARE)
# test a natural data sequence
task_spec.update([('name', 'task native data sequence #29')])
task_data = {
"server_files[0]": "test_10.jpg",
"server_files[1]": "test_2.jpg",
"server_files[2]": "test_1.jpg",
"image_quality": 70,
"use_cache": True,
"sorting_method": SortingMethod.NATURAL
}
image_sizes = [
self._image_sizes[task_data["server_files[2]"]],
self._image_sizes[task_data["server_files[1]"]],
self._image_sizes[task_data["server_files[0]"]],
]
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET,
image_sizes, StorageMethodChoice.CACHE, StorageChoice.SHARE)
def test_api_v1_tasks_id_data_admin(self): def test_api_v1_tasks_id_data_admin(self):
self._test_api_v1_tasks_id_data(self.admin) self._test_api_v1_tasks_id_data(self.admin)

@ -637,9 +637,8 @@ class TaskViewSet(auth.TaskGetQuerySetMixin, viewsets.ModelViewSet):
db_task.data = db_data db_task.data = db_data
db_task.save() db_task.save()
data = {k:v for k, v in serializer.data.items()} data = {k:v for k, v in serializer.data.items()}
data['use_zip_chunks'] = serializer.validated_data['use_zip_chunks'] for extra_key in { 'use_zip_chunks', 'use_cache', 'copy_data' }:
data['use_cache'] = serializer.validated_data['use_cache'] data[extra_key] = serializer.validated_data[extra_key]
data['copy_data'] = serializer.validated_data['copy_data']
if data['use_cache']: if data['use_cache']:
db_task.data.storage_method = StorageMethodChoice.CACHE db_task.data.storage_method = StorageMethodChoice.CACHE
db_task.data.save(update_fields=['storage_method']) db_task.data.save(update_fields=['storage_method'])

@ -55,3 +55,4 @@ google-cloud-storage==1.42.0
# when pycocotools is installed by wheel in python 3.8+ # when pycocotools is installed by wheel in python 3.8+
datumaro==0.2.0 --no-binary=datumaro --no-binary=pycocotools datumaro==0.2.0 --no-binary=datumaro --no-binary=pycocotools
urllib3>=1.26.5 # not directly required, pinned by Snyk to avoid a vulnerability urllib3>=1.26.5 # not directly required, pinned by Snyk to avoid a vulnerability
natsort==8.0.0

@ -125,7 +125,15 @@ To create a 3D task, you need to use the following directory structures:
## Advanced configuration ## Advanced configuration
![](/images/image128_use_cache.jpg) ![](/images/image128.jpg)
### Sorting method
Option to sort the data. It is not relevant for videos.
For example, the sequence `2.jpeg, 10.jpeg, 1.jpeg` after sorting will be:
- `lexicographical`: 1.jpeg, 10.jpeg, 2.jpeg
- `natural`: 1.jpeg, 2.jpeg, 10.jpeg
- `predefined`: 2.jpeg, 10.jpeg, 1.jpeg
### Use zip chunks ### Use zip chunks

Binary file not shown.

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

@ -45,6 +45,8 @@ class CLI():
data['copy_data'] = kwargs.get('copy_data') data['copy_data'] = kwargs.get('copy_data')
if 'use_cache' in kwargs: if 'use_cache' in kwargs:
data['use_cache'] = kwargs.get('use_cache') data['use_cache'] = kwargs.get('use_cache')
if 'sorting_method' in kwargs:
data['sorting_method'] = kwargs.get('sorting_method')
response = self.session.post(url, data=data, files=files) response = self.session.post(url, data=data, files=files)
response.raise_for_status() response.raise_for_status()

@ -208,6 +208,13 @@ task_create_parser.add_argument(
action='store_false', action='store_false',
help='''set the option to use the cache (default: %(default)s)''' help='''set the option to use the cache (default: %(default)s)'''
) )
task_create_parser.add_argument(
'--sorting-method',
default='lexicographical',
choices=['lexicographical', 'natural', 'predefined', 'random'],
help='''data soring method (default: %(default)s)'''
)
####################################################################### #######################################################################
# Delete # Delete
####################################################################### #######################################################################

@ -10,7 +10,7 @@ from contextlib import closing
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
from PIL import Image from PIL import Image
from .utils import md5_hash, rotate_image from .utils import md5_hash, rotate_image, sort, SortingMethod
class VideoStreamReader: class VideoStreamReader:
def __init__(self, source_path, chunk_size, force): def __init__(self, source_path, chunk_size, force):
@ -146,14 +146,14 @@ class DatasetImagesReader:
def __init__(self, def __init__(self,
sources, sources,
meta=None, meta=None,
is_sorted=True, sorting_method=SortingMethod.PREDEFINED,
use_image_hash=False, use_image_hash=False,
start = 0, start = 0,
step = 1, step = 1,
stop = None, stop = None,
*args, *args,
**kwargs): **kwargs):
self._sources = sources if is_sorted else sorted(sources) self._sources = sort(sources, sorting_method)
self._meta = meta self._meta = meta
self._data_dir = kwargs.get('data_dir', None) self._data_dir = kwargs.get('data_dir', None)
self._use_image_hash = use_image_hash self._use_image_hash = use_image_hash
@ -601,11 +601,18 @@ class ImageManifestManager(_ManifestManager):
return (f"{image['name']}{image['extension']}" for _, image in self) return (f"{image['name']}{image['extension']}" for _, image in self)
def get_subset(self, subset_names): def get_subset(self, subset_names):
return ({ index_list = []
'name': f"{image['name']}", subset = []
'extension': f"{image['extension']}", for _, image in self:
'width': image['width'], image_name = f"{image['name']}{image['extension']}"
'height': image['height'], if image_name in subset_names:
'meta': image['meta'], index_list.append(subset_names.index(image_name))
'checksum': f"{image['checksum']}" subset.append({
} for _, image in self if f"{image['name']}{image['extension']}" in subset_names) 'name': f"{image['name']}",
'extension': f"{image['extension']}",
'width': image['width'],
'height': image['height'],
'meta': image['meta'],
'checksum': f"{image['checksum']}"
})
return index_list, subset

@ -17,6 +17,8 @@ def get_args():
'if by default the video does not meet the requirements and a manifest file is not prepared') 'if by default the video does not meet the requirements and a manifest file is not prepared')
parser.add_argument('--output-dir',type=str, help='Directory where the manifest file will be saved', parser.add_argument('--output-dir',type=str, help='Directory where the manifest file will be saved',
default=os.getcwd()) default=os.getcwd())
parser.add_argument('--sorting', choices=['lexicographical', 'natural', 'predefined', 'random'],
type=str, default='lexicographical')
parser.add_argument('source', type=str, help='Source paths') parser.add_argument('source', type=str, help='Source paths')
return parser.parse_args() return parser.parse_args()
@ -63,7 +65,7 @@ def main():
try: try:
assert len(sources), 'A images was not found' assert len(sources), 'A images was not found'
manifest = ImageManifestManager(manifest_path=manifest_directory) manifest = ImageManifestManager(manifest_path=manifest_directory)
manifest.link(sources=sources, meta=meta, is_sorted=False, manifest.link(sources=sources, meta=meta, sorting_method=args.sorting,
use_image_hash=True, data_dir=data_dir) use_image_hash=True, data_dir=data_dir)
manifest.create(_tqdm=tqdm) manifest.create(_tqdm=tqdm)
except Exception as ex: except Exception as ex:

@ -2,3 +2,4 @@ av==8.0.2 --no-binary=av
opencv-python-headless==4.4.0.42 opencv-python-headless==4.4.0.42
Pillow==7.2.0 Pillow==7.2.0
tqdm==4.58.0 tqdm==4.58.0
natsort==8.0.0

@ -1,12 +1,16 @@
# Copyright (C) 2021 Intel Corporation # Copyright (C) 2021 Intel Corporation
# #
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
import os import os
import re import re
import hashlib import hashlib
import mimetypes import mimetypes
import cv2 as cv import cv2 as cv
from av import VideoFrame from av import VideoFrame
from enum import Enum
from natsort import os_sorted
from random import shuffle
def rotate_image(image, angle): def rotate_image(image, angle):
height, width = image.shape[:2] height, width = image.shape[:2]
@ -187,3 +191,29 @@ def detect_related_images(image_paths, root_path):
elif data_are_3d: elif data_are_3d:
return _detect_related_images_3D(image_paths, root_path) return _detect_related_images_3D(image_paths, root_path)
return {} return {}
class SortingMethod(str, Enum):
LEXICOGRAPHICAL = 'lexicographical'
NATURAL = 'natural'
PREDEFINED = 'predefined'
RANDOM = 'random'
@classmethod
def choices(cls):
return tuple((x.value, x.name) for x in cls)
def __str__(self):
return self.value
def sort(images, sorting_method=SortingMethod.LEXICOGRAPHICAL, func=None):
if sorting_method == SortingMethod.LEXICOGRAPHICAL:
return sorted(images, key=func)
elif sorting_method == SortingMethod.NATURAL:
return os_sorted(images, key=func)
elif sorting_method == SortingMethod.PREDEFINED:
return images
elif sorting_method == SortingMethod.RANDOM:
shuffle(images)
return images
else:
raise NotImplementedError()

Loading…
Cancel
Save