Added sorting methods (#3937)

4 years ago · 68fbcdec43
parent 2ed9f9cdb4
commit 68fbcdec43
29 changed files with 440 additions and 92 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Google Cloud Storage support in UI (<https://github.com/openvinotoolkit/cvat/pull/3919>)
 - Add project tasks paginations (<https://github.com/openvinotoolkit/cvat/pull/3910>)
 - Add remove issue button (<https://github.com/openvinotoolkit/cvat/pull/3952>)
 - Data sorting option (<https://github.com/openvinotoolkit/cvat/pull/3937>)
 - Options to change font size & position of text labels on the canvas (<https://github.com/openvinotoolkit/cvat/pull/3972>)
 - Add "tag" return type for automatic annotation in Nuclio (<https://github.com/openvinotoolkit/cvat/pull/3896>)
--- a/cvat-core/package-lock.json
+++ b/cvat-core/package-lock.json
@ -1,12 +1,12 @@
 {
  "name": "cvat-core",
-  "version": "3.20.1",
+  "version": "3.21.0",
  "lockfileVersion": 2,
  "requires": true,
  "packages": {
    "": {
      "name": "cvat-core",
-      "version": "3.20.1",
+      "version": "3.21.0",
      "license": "MIT",
      "dependencies": {
        "axios": "^0.21.4",
--- a/cvat-core/package.json
+++ b/cvat-core/package.json
@ -1,6 +1,6 @@
 {
  "name": "cvat-core",
-  "version": "3.20.1",
+  "version": "3.21.0",
  "description": "Part of Computer Vision Tool which presents an interface for client-side integration",
  "main": "babel.config.js",
  "scripts": {
--- a/cvat-core/src/enums.js
+++ b/cvat-core/src/enums.js
@ -367,6 +367,24 @@
        KEY_FILE_PATH: 'KEY_FILE_PATH',
    });
    /**
     * Sorting methods
     * @enum {string}
     * @name SortingMethod
     * @memberof module:API.cvat.enums
     * @property {string} LEXICOGRAPHICAL 'lexicographical'
     * @property {string} NATURAL 'natural'
     * @property {string} PREDEFINED 'predefined'
     * @property {string} RANDOM 'random'
     * @readonly
     */
    const SortingMethod = Object.freeze({
        LEXICOGRAPHICAL: 'lexicographical',
        NATURAL: 'natural',
        PREDEFINED: 'predefined',
        RANDOM: 'random',
    });
    module.exports = {
        ShareFileType,
        TaskStatus,
@ -384,5 +402,6 @@
        DimensionType,
        CloudStorageProviderType,
        CloudStorageCredentialsType,
        SortingMethod,
    };
 })();
--- a/cvat-core/src/session.js
+++ b/cvat-core/src/session.js
@ -1017,6 +1017,7 @@
                copy_data: undefined,
                dimension: undefined,
                cloud_storage_id: undefined,
                sorting_method: undefined,
            };
            const updatedFields = new FieldUpdateTrigger({
@ -1549,6 +1550,16 @@
                    cloudStorageId: {
                        get: () => data.cloud_storage_id,
                    },
                    sortingMethod: {
                        /**
                         * @name sortingMethod
                         * @type {module:API.cvat.enums.SortingMethod}
                         * @memberof module:API.cvat.classes.Task
                         * @instance
                         * @readonly
                         */
                        get: () => data.sorting_method,
                    },
                    _internalData: {
                        get: () => data,
                    },
@ -2061,6 +2072,7 @@
            image_quality: this.imageQuality,
            use_zip_chunks: this.useZipChunks,
            use_cache: this.useCache,
            sorting_method: this.sortingMethod,
        };
        if (typeof this.startFrame !== 'undefined') {
--- a/cvat-ui/package-lock.json
+++ b/cvat-ui/package-lock.json
@ -1,12 +1,12 @@
 {
  "name": "cvat-ui",
-  "version": "1.28.1",
+  "version": "1.28.2",
  "lockfileVersion": 2,
  "requires": true,
  "packages": {
    "": {
      "name": "cvat-ui",
-      "version": "1.28.1",
+      "version": "1.28.2",
      "license": "MIT",
      "dependencies": {
        "@ant-design/icons": "^4.6.3",
--- a/cvat-ui/package.json
+++ b/cvat-ui/package.json
@ -1,6 +1,6 @@
 {
  "name": "cvat-ui",
-  "version": "1.28.1",
+  "version": "1.28.2",
  "description": "CVAT single-page application",
  "main": "src/index.tsx",
  "scripts": {
--- a/cvat-ui/src/actions/tasks-actions.ts
+++ b/cvat-ui/src/actions/tasks-actions.ts
@ -353,6 +353,7 @@ export function createTaskAsync(data: any): ThunkAction<Promise<void>, {}, {}, A
            image_quality: 70,
            use_zip_chunks: data.advanced.useZipChunks,
            use_cache: data.advanced.useCache,
            sorting_method: data.advanced.sortingMethod,
        };
        if (data.projectId) {
--- a/cvat-ui/src/components/create-task-page/advanced-configuration-form.tsx
+++ b/cvat-ui/src/components/create-task-page/advanced-configuration-form.tsx
@ -7,6 +7,7 @@ import { Row, Col } from 'antd/lib/grid';
 import { PercentageOutlined } from '@ant-design/icons';
 import Input from 'antd/lib/input';
 import Select from 'antd/lib/select';
 import Radio from 'antd/lib/radio';
 import Checkbox from 'antd/lib/checkbox';
 import Form, { FormInstance, RuleObject, RuleRender } from 'antd/lib/form';
 import Text from 'antd/lib/typography/Text';
@ -16,6 +17,13 @@ import patterns from 'utils/validation-patterns';
 const { Option } = Select;
 export enum SortingMethod {
    LEXICOGRAPHICAL = 'lexicographical',
    NATURAL = 'natural',
    PREDEFINED = 'predefined',
    RANDOM = 'random',
 }
 export interface AdvancedConfiguration {
    bugTracker?: string;
    imageQuality?: number;
@ -31,6 +39,7 @@ export interface AdvancedConfiguration {
    dataChunkSize?: number;
    useCache: boolean;
    copyData?: boolean;
    sortingMethod: SortingMethod;
 }
 const initialValues: AdvancedConfiguration = {
@ -39,6 +48,7 @@ const initialValues: AdvancedConfiguration = {
    useZipChunks: true,
    useCache: true,
    copyData: false,
    sortingMethod: SortingMethod.LEXICOGRAPHICAL,
 };
 interface Props {
@ -178,6 +188,33 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
        );
    }
    private renderSortingMethodRadio(): JSX.Element {
        return (
            <Form.Item
                label='Sorting method'
                name='sortingMethod'
                rules={[
                    {
                        required: true,
                        message: 'The field is required.',
                    },
                ]}
                help='Specify how to sort images. It is not relevant for videos.'
            >
                <Radio.Group>
                    <Radio value={SortingMethod.LEXICOGRAPHICAL} key={SortingMethod.LEXICOGRAPHICAL}>
                        Lexicographical
                    </Radio>
                    <Radio value={SortingMethod.NATURAL} key={SortingMethod.NATURAL}>Natural</Radio>
                    <Radio value={SortingMethod.PREDEFINED} key={SortingMethod.PREDEFINED}>
                        Predefined
                    </Radio>
                    <Radio value={SortingMethod.RANDOM} key={SortingMethod.RANDOM}>Random</Radio>
                </Radio.Group>
            </Form.Item>
        );
    }
    private renderImageQuality(): JSX.Element {
        return (
            <CVATTooltip title='Defines images compression level'>
@ -290,8 +327,7 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
            >
                <Select style={{ width: '100%' }} initialValue='CVAT for video 1.1'>
                    {
-                        dumpers.map((dumper: any) =>
+                        dumpers.map((dumper: any) => <Option value={dumper.name}>{dumper.name}</Option>)
                            <Option value={dumper.name}>{dumper.name}</Option>)
                    }
                </Select>
            </Form.Item>
@ -384,6 +420,9 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
        const { installedGit, activeFileManagerTab } = this.props;
        return (
            <Form initialValues={initialValues} ref={this.formRef} layout='vertical'>
                <Row>
                    <Col>{this.renderSortingMethodRadio()}</Col>
                </Row>
                {activeFileManagerTab === 'share' ? (
                    <Row>
                        <Col>{this.renderCopyDataChechbox()}</Col>
--- a/cvat-ui/src/components/create-task-page/create-task-content.tsx
+++ b/cvat-ui/src/components/create-task-page/create-task-content.tsx
@ -20,7 +20,7 @@ import { Files } from 'components/file-manager/file-manager';
 import BasicConfigurationForm, { BaseConfiguration } from './basic-configuration-form';
 import ProjectSearchField from './project-search-field';
 import ProjectSubsetField from './project-subset-field';
-import AdvancedConfigurationForm, { AdvancedConfiguration } from './advanced-configuration-form';
+import AdvancedConfigurationForm, { AdvancedConfiguration, SortingMethod } from './advanced-configuration-form';
 export interface CreateTaskData {
    projectId: number | null;
@ -54,6 +54,7 @@ const defaultState = {
        lfs: false,
        useZipChunks: true,
        useCache: true,
        sortingMethod: SortingMethod.LEXICOGRAPHICAL,
    },
    labels: [],
    files: {
--- a/cvat/apps/engine/backup.py
+++ b/cvat/apps/engine/backup.py
@ -65,6 +65,7 @@ class _TaskBackupBase():
            'chunk_type',
            'storage_method',
            'storage',
            'sorting_method',
        }
        self._prepare_meta(allowed_fields, data)
--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@ -14,11 +14,13 @@ from contextlib import closing
 import av
 import numpy as np
 from natsort import os_sorted
 from pyunpack import Archive
 from PIL import Image, ImageFile
 from random import shuffle
 import open3d as o3d
 from cvat.apps.engine.utils import rotate_image
-from cvat.apps.engine.models import DimensionType
+from cvat.apps.engine.models import DimensionType, SortingMethod
 # fixes: "OSError:broken data stream" when executing line 72 while loading images downloaded from the web
 # see: https://stackoverflow.com/questions/42462431/oserror-broken-data-stream-when-reading-image-file
@ -47,9 +49,22 @@ def files_to_ignore(directory):
        return True
    return False
 def sort(images, sorting_method=SortingMethod.LEXICOGRAPHICAL, func=None):
    if sorting_method == SortingMethod.LEXICOGRAPHICAL:
        return sorted(images, key=func)
    elif sorting_method == SortingMethod.NATURAL:
        return os_sorted(images, key=func)
    elif sorting_method == SortingMethod.PREDEFINED:
        return images
    elif sorting_method == SortingMethod.RANDOM:
        shuffle(images)
        return images
    else:
        raise NotImplementedError()
 class IMediaReader(ABC):
    def __init__(self, source_path, step, start, stop, dimension):
-        self._source_path = sorted(source_path)
+        self._source_path = source_path
        self._step = step
        self._start = start
        self._stop = stop
@ -90,7 +105,13 @@ class IMediaReader(ABC):
        return range(self._start, self._stop, self._step)
 class ImageListReader(IMediaReader):
-    def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
+    def __init__(self,
                source_path,
                step=1,
                start=0,
                stop=None,
                dimension=DimensionType.DIM_2D,
                sorting_method=SortingMethod.LEXICOGRAPHICAL):
        if not source_path:
            raise Exception('No image found')
@ -102,13 +123,15 @@ class ImageListReader(IMediaReader):
        assert stop > start
        super().__init__(
-            source_path=source_path,
+            source_path=sort(source_path, sorting_method),
            step=step,
            start=start,
            stop=stop,
            dimension=dimension
        )
        self._sorting_method = sorting_method
    def __iter__(self):
        for i in range(self._start, self._stop, self._step):
            yield (self.get_image(i), self.get_path(i), i)
@ -121,7 +144,8 @@ class ImageListReader(IMediaReader):
            step=self._step,
            start=self._start,
            stop=self._stop,
-            dimension=self._dimension
+            dimension=self._dimension,
            sorting_method=self._sorting_method
        )
    def get_path(self, i):
@ -154,7 +178,8 @@ class ImageListReader(IMediaReader):
            source_path=source_files,
            step=step,
            start=start,
-            stop=stop
+            stop=stop,
            sorting_method=self._sorting_method,
        )
        self._dimension = dimension
@ -163,7 +188,13 @@ class ImageListReader(IMediaReader):
        return [self.get_path(idx) for idx, _ in enumerate(self._source_path)]
 class DirectoryReader(ImageListReader):
-    def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
+    def __init__(self,
                source_path,
                step=1,
                start=0,
                stop=None,
                dimension=DimensionType.DIM_2D,
                sorting_method=SortingMethod.LEXICOGRAPHICAL):
        image_paths = []
        for source in source_path:
            for root, _, files in os.walk(source):
@ -176,10 +207,17 @@ class DirectoryReader(ImageListReader):
            start=start,
            stop=stop,
            dimension=dimension,
            sorting_method=sorting_method,
        )
 class ArchiveReader(DirectoryReader):
-    def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
+    def __init__(self,
                source_path,
                step=1,
                start=0,
                stop=None,
                dimension=DimensionType.DIM_2D,
                sorting_method=SortingMethod.LEXICOGRAPHICAL):
        self._archive_source = source_path[0]
        extract_dir = source_path[1] if len(source_path) > 1 else os.path.dirname(source_path[0])
        Archive(self._archive_source).extractall(extract_dir)
@ -190,11 +228,18 @@ class ArchiveReader(DirectoryReader):
            step=step,
            start=start,
            stop=stop,
-            dimension=dimension
+            dimension=dimension,
            sorting_method=sorting_method,
        )
 class PdfReader(ImageListReader):
-    def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
+    def __init__(self,
                source_path,
                step=1,
                start=0,
                stop=None,
                dimension=DimensionType.DIM_2D,
                sorting_method=SortingMethod.LEXICOGRAPHICAL):
        if not source_path:
            raise Exception('No PDF found')
@ -223,14 +268,26 @@ class PdfReader(ImageListReader):
            start=start,
            stop=stop,
            dimension=dimension,
            sorting_method=sorting_method,
        )
 class ZipReader(ImageListReader):
-    def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
+    def __init__(self,
                source_path,
                step=1,
                start=0,
                stop=None,
                dimension=DimensionType.DIM_2D,
                sorting_method=SortingMethod.LEXICOGRAPHICAL):
        self._zip_source = zipfile.ZipFile(source_path[0], mode='r')
        self.extract_dir = source_path[1] if len(source_path) > 1 else None
        file_list = [f for f in self._zip_source.namelist() if files_to_ignore(f) and get_mime(f) == 'image']
-        super().__init__(file_list, step=step, start=start, stop=stop, dimension=dimension)
+        super().__init__(file_list,
                        step=step,
                        start=start,
                        stop=stop,
                        dimension=dimension,
                        sorting_method=sorting_method)
    def __del__(self):
        self._zip_source.close()
--- a/cvat/apps/engine/migrations/0045_data_sorting_method.py
+++ b/cvat/apps/engine/migrations/0045_data_sorting_method.py
@ -0,0 +1,19 @@
 # Generated by Django 3.1.13 on 2021-12-03 08:06
 import cvat.apps.engine.models
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('engine', '0044_auto_20211123_0824'),
    ]
    operations = [
        migrations.AddField(
            model_name='data',
            name='sorting_method',
            field=models.CharField(choices=[('lexicographical', 'LEXICOGRAPHICAL'), ('natural', 'NATURAL'), ('predefined', 'PREDEFINED'), ('random', 'RANDOM')], default=cvat.apps.engine.models.SortingMethod['LEXICOGRAPHICAL'], max_length=15),
        ),
    ]
--- a/cvat/apps/engine/models.py
+++ b/cvat/apps/engine/models.py
@ -81,6 +81,19 @@ class StorageChoice(str, Enum):
    def __str__(self):
        return self.value
 class SortingMethod(str, Enum):
    LEXICOGRAPHICAL = 'lexicographical'
    NATURAL = 'natural'
    PREDEFINED = 'predefined'
    RANDOM = 'random'
    @classmethod
    def choices(cls):
        return tuple((x.value, x.name) for x in cls)
    def __str__(self):
        return self.value
 class Data(models.Model):
    chunk_size = models.PositiveIntegerField(null=True)
    size = models.PositiveIntegerField(default=0)
@ -95,6 +108,7 @@ class Data(models.Model):
    storage_method = models.CharField(max_length=15, choices=StorageMethodChoice.choices(), default=StorageMethodChoice.FILE_SYSTEM)
    storage = models.CharField(max_length=15, choices=StorageChoice.choices(), default=StorageChoice.LOCAL)
    cloud_storage = models.ForeignKey('CloudStorage', on_delete=models.SET_NULL, null=True, related_name='data')
    sorting_method = models.CharField(max_length=15, choices=SortingMethod.choices(), default=SortingMethod.LEXICOGRAPHICAL)
    class Meta:
        default_permissions = ()
--- a/cvat/apps/engine/serializers.py
+++ b/cvat/apps/engine/serializers.py
@ -281,7 +281,7 @@ class DataSerializer(serializers.ModelSerializer):
        model = models.Data
        fields = ('chunk_size', 'size', 'image_quality', 'start_frame', 'stop_frame', 'frame_filter',
            'compressed_chunk_type', 'original_chunk_type', 'client_files', 'server_files', 'remote_files', 'use_zip_chunks',
-            'cloud_storage_id', 'use_cache', 'copy_data', 'storage_method', 'storage')
+            'cloud_storage_id', 'use_cache', 'copy_data', 'storage_method', 'storage', 'sorting_method')
    # pylint: disable=no-self-use
    def validate_frame_filter(self, value):
@ -308,9 +308,9 @@ class DataSerializer(serializers.ModelSerializer):
        client_files = validated_data.pop('client_files')
        server_files = validated_data.pop('server_files')
        remote_files = validated_data.pop('remote_files')
-        validated_data.pop('use_zip_chunks')
+        for extra_key in { 'use_zip_chunks', 'use_cache', 'copy_data' }:
-        validated_data.pop('use_cache')
+            validated_data.pop(extra_key)
-        validated_data.pop('copy_data')
+
        db_data = models.Data.objects.create(**validated_data)
        data_path = db_data.get_data_dirname()
--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@ -22,7 +22,7 @@ from django.db import transaction
 from cvat.apps.engine import models
 from cvat.apps.engine.log import slogger
 from cvat.apps.engine.media_extractors import (MEDIA_TYPES, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter,
-    ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime)
+    ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime, sort)
 from cvat.apps.engine.utils import av_scan_paths
 from utils.dataset_manifest import ImageManifestManager, VideoManifestManager
 from utils.dataset_manifest.core import VideoManifestValidator
@ -123,15 +123,18 @@ def _count_files(data, manifest_file=None):
            raise ValueError("Bad file path: " + path)
        server_files.append(path)
-    server_files.sort(reverse=True)
+    sorted_server_files = sorted(server_files, reverse=True)
    # The idea of the code is trivial. After sort we will have files in the
    # following order: 'a/b/c/d/2.txt', 'a/b/c/d/1.txt', 'a/b/c/d', 'a/b/c'
    # Let's keep all items which aren't substrings of the previous item. In
    # the example above only 2.txt and 1.txt files will be in the final list.
    # Also need to correctly handle 'a/b/c0', 'a/b/c' case.
-    data['server_files'] = [v[1] for v in zip([""] + server_files, server_files)
+    without_extra_dirs = [v[1] for v in zip([""] + sorted_server_files, sorted_server_files)
        if not os.path.dirname(v[0]).startswith(v[1])]
    # we need to keep the original sequence of files
    data['server_files'] = [f for f in server_files if f in without_extra_dirs]
    def count_files(file_mapping, counter):
        for rel_path, full_path in file_mapping.items():
            mime = get_mime(full_path)
@ -141,7 +144,7 @@ def _count_files(data, manifest_file=None):
                manifest_file.append(rel_path)
            else:
                slogger.glob.warn("Skip '{}' file (its mime type doesn't "
-                    "correspond to a video or an image file)".format(full_path))
+                    "correspond to supported MIME file type)".format(full_path))
    counter = { media_type: [] for media_type in MEDIA_TYPES.keys() }
@ -213,6 +216,7 @@ def _download_data(urls, upload_dir):
 def _get_manifest_frame_indexer(start_frame=0, frame_step=1):
    return lambda frame_id: start_frame + frame_id * frame_step
@transaction.atomic
 def _create_thread(tid, data, isImport=False):
    slogger.glob.info("create task #{}".format(tid))
@ -222,15 +226,13 @@ def _create_thread(tid, data, isImport=False):
    upload_dir = db_data.get_upload_dirname()
    if data['remote_files']:
-        if db_data.storage != models.StorageChoice.CLOUD_STORAGE:
+        data['remote_files'] = _download_data(data['remote_files'], upload_dir)
            data['remote_files'] = _download_data(data['remote_files'], upload_dir)
    manifest_file = []
    media = _count_files(data, manifest_file)
    media, task_mode = _validate_data(media, manifest_file)
-    if manifest_file:
+    if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE):
-        assert settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE, \
+        raise Exception("File with meta information can be uploaded if 'Use cache' option is also selected")
            "File with meta information can be uploaded if 'Use cache' option is also selected"
    if data['server_files']:
        if db_data.storage == models.StorageChoice.LOCAL:
@ -252,19 +254,22 @@ def _create_thread(tid, data, isImport=False):
                'specific_attributes': db_cloud_storage.get_specific_attributes()
            }
            cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
-            first_sorted_media_image = sorted(media['image'])[0]
+            sorted_media = sort(media['image'], data['sorting_method'])
            first_sorted_media_image = sorted_media[0]
            cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image))
            # prepare task manifest file from cloud storage manifest file
            # NOTE we should create manifest before defining chunk_size
            # FIXME in the future when will be implemented archive support
            manifest = ImageManifestManager(db_data.get_manifest_path())
            cloud_storage_manifest = ImageManifestManager(
                os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]),
                db_data.cloud_storage.get_storage_dirname()
            )
            cloud_storage_manifest.set_index()
-            media_files = sorted(media['image'])
+            sequence, content = cloud_storage_manifest.get_subset(sorted_media)
-            content = cloud_storage_manifest.get_subset(media_files)
+            sorted_content = (i[1] for i in sorted(zip(sequence, content)))
-            manifest.create(content)
+            manifest.create(sorted_content)
    av_scan_paths(upload_dir)
@ -292,24 +297,48 @@ def _create_thread(tid, data, isImport=False):
        if media_files:
            if extractor is not None:
                raise Exception('Combined data types are not supported')
            source_paths=[os.path.join(upload_dir, f) for f in media_files]
            if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE:
                source_paths.append(db_data.get_upload_dirname())
                upload_dir = db_data.get_upload_dirname()
                db_data.storage = models.StorageChoice.LOCAL
            if isImport and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE:
                manifest_index = _get_manifest_frame_indexer(db_data.start_frame, db_data.get_frame_step())
                db_data.start_frame = 0
                data['stop_frame'] = None
                db_data.frame_filter = ''
            if isImport and media_type != 'video' and db_data.storage_method == models.StorageMethodChoice.CACHE:
                # we should sort media_files according to the manifest content sequence
                manifest = ImageManifestManager(db_data.get_manifest_path())
                manifest.set_index()
                sorted_media_files = []
                for idx in range(len(media_files)):
                    properties = manifest[manifest_index(idx)]
                    image_name = properties.get('name', None)
                    image_extension = properties.get('extension', None)
                    full_image_path = f"{image_name}{image_extension}" if image_name and image_extension else None
                    if full_image_path and full_image_path in media_files:
                        sorted_media_files.append(full_image_path)
                media_files = sorted_media_files.copy()
                del sorted_media_files
                data['sorting_method'] = models.SortingMethod.PREDEFINED
            source_paths=[os.path.join(upload_dir, f) for f in media_files]
            if manifest_file and not isImport and data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}:
                raise Exception("It isn't supported to upload manifest file and use random sorting")
            if isImport and db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM and \
                    data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}:
                raise Exception("It isn't supported to import the task that was created without cache but with random/predefined sorting")
-            extractor = MEDIA_TYPES[media_type]['extractor'](
+            if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE:
-                source_path=source_paths,
+                source_paths.append(db_data.get_upload_dirname())
-                step=db_data.get_frame_step(),
+                upload_dir = db_data.get_upload_dirname()
-                start=db_data.start_frame,
+                db_data.storage = models.StorageChoice.LOCAL
                stop=data['stop_frame'],
            )
            details = {
                'source_path': source_paths,
                'step': db_data.get_frame_step(),
                'start': db_data.start_frame,
                'stop': data['stop_frame'],
            }
            if media_type != 'video':
                details['sorting_method'] = data['sorting_method']
            extractor = MEDIA_TYPES[media_type]['extractor'](**details)
    validate_dimension = ValidateDimension()
    if isinstance(extractor, MEDIA_TYPES['zip']['extractor']):
@ -474,8 +503,12 @@ def _create_thread(tid, data, isImport=False):
                    chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames]
                    img_sizes = []
-                    for _, frame_id in chunk_paths:
+                    for chunk_path, frame_id in chunk_paths:
                        properties = manifest[manifest_index(frame_id)]
                        # check mapping
                        if not chunk_path.endswith(f"{properties['name']}{properties['extension']}"):
                            raise Exception('Incorrect file mapping to manifest content')
                        if db_task.dimension == models.DimensionType.DIM_2D:
                            resolution = (properties['width'], properties['height'])
                        else:
--- a/cvat/apps/engine/tests/test_rest_api.py
+++ b/cvat/apps/engine/tests/test_rest_api.py
@ -30,9 +30,9 @@ from rest_framework.test import APIClient, APITestCase
 from datumaro.util.test_utils import TestDir
 from cvat.apps.engine.models import (AttributeSpec, AttributeType, Data, Job, Project,
-    Segment, StatusChoice, Task, Label, StorageMethodChoice, StorageChoice)
+    Segment, StatusChoice, Task, Label, StorageMethodChoice, StorageChoice, DimensionType,
-from cvat.apps.engine.media_extractors import ValidateDimension
+    SortingMethod)
-from cvat.apps.engine.models import DimensionType
+from cvat.apps.engine.media_extractors import ValidateDimension, sort
 from utils.dataset_manifest import ImageManifestManager, VideoManifestManager
 def create_db_users(cls):
@ -2169,17 +2169,29 @@ class TaskImportExportAPITestCase(APITestCase):
            with open(path, "wb") as image:
                image.write(data.read())
-        cls.media_data.append(
+        data = {
-            {
+            "image_quality": 75,
-                **{"image_quality": 75,
+            "copy_data": True,
-                   "copy_data": True,
+            "start_frame": 2,
-                   "start_frame": 2,
+            "stop_frame": 9,
-                   "stop_frame": 9,
+            "frame_filter": "step=2",
-                   "frame_filter": "step=2",
+            **{"server_files[{}]".format(i): imagename_pattern.format(i) for i in range(image_count)},
-                },
+        }
-                **{"server_files[{}]".format(i): imagename_pattern.format(i) for i in range(image_count)},
+        use_cache_data = {
-            }
+            **data,
-        )
+            'use_cache': True,
        }
        cls.media_data.append(data)
        data['sorting_method'] = SortingMethod.NATURAL
        cls.media_data.append(data)
        cls.media_data.append(use_cache_data)
        use_cache_data['sorting_method'] = SortingMethod.NATURAL
        cls.media_data.append(use_cache_data)
        use_cache_data['sorting_method'] = SortingMethod.RANDOM
        cls.media_data.append(use_cache_data)
        filename = "test_video_1.mp4"
        path = os.path.join(settings.SHARE_ROOT, filename)
@ -2267,13 +2279,47 @@ class TaskImportExportAPITestCase(APITestCase):
            }
        )
        data = {
            "client_files[0]": generate_image_file("test_1.jpg")[1],
            "client_files[1]": generate_image_file("test_2.jpg")[1],
            "client_files[2]": generate_image_file("test_10.jpg")[1],
            "client_files[3]": generate_image_file("test_3.jpg")[1],
            "image_quality": 75,
        }
        use_cache_data = {
            **data,
            'use_cache': True,
        }
        cls.media_data.extend([
            # image list local
            # sorted data
            # natural: test_1.jpg, test_2.jpg, test_3.jpg, test_10.jpg
            {
-                "client_files[0]": generate_image_file("test_1.jpg")[1],
+                **use_cache_data,
-                "client_files[1]": generate_image_file("test_2.jpg")[1],
+                'sorting_method': SortingMethod.NATURAL,
-                "client_files[2]": generate_image_file("test_3.jpg")[1],
+            },
-                "image_quality": 75,
+            {
                **data,
                'sorting_method': SortingMethod.NATURAL,
            },
            # random
            {
                **use_cache_data,
                'sorting_method': SortingMethod.RANDOM,
            },
            # predefined: test_1.jpg, test_2.jpg, test_10.jpg, test_2.jpg
            {
                **use_cache_data,
                'sorting_method': SortingMethod.PREDEFINED,
            },
            # lexicographical: test_1.jpg, test_10.jpg, test_2.jpg, test_3.jpg
            {
                **use_cache_data,
                'sorting_method': SortingMethod.LEXICOGRAPHICAL,
            },
            {
                **data,
                'sorting_method': SortingMethod.LEXICOGRAPHICAL,
            },
            # video local
            {
@ -2576,7 +2622,7 @@ def generate_manifest_file(data_type, manifest_path, sources):
    kwargs = {
        'images': {
            'sources': sources,
-            'is_sorted': False,
+            'sorting_method': SortingMethod.LEXICOGRAPHICAL,
        },
        'video': {
            'media_file': sources[0],
@ -2633,6 +2679,13 @@ class TaskDataAPITestCase(APITestCase):
            image.write(data.read())
        cls._image_sizes[filename] = img_size
        filename = "test_10.jpg"
        path = os.path.join(settings.SHARE_ROOT, filename)
        img_size, data = generate_image_file(filename)
        with open(path, "wb") as image:
            image.write(data.read())
        cls._image_sizes[filename] = img_size
        filename = os.path.join("data", "test_3.jpg")
        path = os.path.join(settings.SHARE_ROOT, filename)
        os.makedirs(os.path.dirname(path))
@ -2732,6 +2785,9 @@ class TaskDataAPITestCase(APITestCase):
        path = os.path.join(settings.SHARE_ROOT, "test_3.jpg")
        os.remove(path)
        path = os.path.join(settings.SHARE_ROOT, "test_10.jpg")
        os.remove(path)
        path = os.path.join(settings.SHARE_ROOT, "data", "test_3.jpg")
        os.remove(path)
@ -2892,9 +2948,9 @@ class TaskDataAPITestCase(APITestCase):
                client_files = [img for key, img in data.items() if key.startswith("client_files")]
                if server_files:
-                    source_files = [os.path.join(settings.SHARE_ROOT, f) for f in sorted(server_files)]
+                    source_files = [os.path.join(settings.SHARE_ROOT, f) for f in sort(server_files, data.get('sorting_method', SortingMethod.LEXICOGRAPHICAL))]
                else:
-                    source_files = [f for f in sorted(client_files, key=lambda e: e.name)]
+                    source_files = [f for f in sort(client_files, data.get('sorting_method', SortingMethod.LEXICOGRAPHICAL), func=lambda e: e.name)]
                source_images = []
                for f in source_files:
@ -3128,7 +3184,7 @@ class TaskDataAPITestCase(APITestCase):
                                             image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL)
        task_spec = {
-            "name": "cached images task without copying #16",
+            "name": "cached images task with default sorting data and without copying #16",
            "overlap": 0,
            "segment_size": 0,
            "labels": [
@ -3140,14 +3196,14 @@ class TaskDataAPITestCase(APITestCase):
        task_data = {
            "server_files[0]": "test_1.jpg",
            "server_files[1]": "test_2.jpg",
-            "server_files[2]": "test_3.jpg",
+            "server_files[2]": "test_10.jpg",
            "image_quality": 70,
            "use_cache": True,
        }
        image_sizes = [
            self._image_sizes[task_data["server_files[0]"]],
            self._image_sizes[task_data["server_files[1]"]],
            self._image_sizes[task_data["server_files[2]"]],
            self._image_sizes[task_data["server_files[1]"]],
        ]
        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
@ -3381,6 +3437,44 @@ class TaskDataAPITestCase(APITestCase):
        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET,
            image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL)
        # test predefined sorting
        task_spec.update([('name', 'task custom data sequence #28')])
        task_data = {
            "server_files[0]": "test_1.jpg",
            "server_files[1]": "test_3.jpg",
            "server_files[2]": "test_2.jpg",
            "image_quality": 70,
            "use_cache": True,
            "sorting_method": SortingMethod.PREDEFINED
        }
        image_sizes = [
            self._image_sizes[task_data["server_files[0]"]],
            self._image_sizes[task_data["server_files[1]"]],
            self._image_sizes[task_data["server_files[2]"]],
        ]
        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET,
            image_sizes, StorageMethodChoice.CACHE, StorageChoice.SHARE)
        # test a natural data sequence
        task_spec.update([('name', 'task native data sequence #29')])
        task_data = {
            "server_files[0]": "test_10.jpg",
            "server_files[1]": "test_2.jpg",
            "server_files[2]": "test_1.jpg",
            "image_quality": 70,
            "use_cache": True,
            "sorting_method": SortingMethod.NATURAL
        }
        image_sizes = [
            self._image_sizes[task_data["server_files[2]"]],
            self._image_sizes[task_data["server_files[1]"]],
            self._image_sizes[task_data["server_files[0]"]],
        ]
        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET,
            image_sizes, StorageMethodChoice.CACHE, StorageChoice.SHARE)
    def test_api_v1_tasks_id_data_admin(self):
        self._test_api_v1_tasks_id_data(self.admin)
--- a/cvat/apps/engine/views.py
+++ b/cvat/apps/engine/views.py
@ -637,9 +637,8 @@ class TaskViewSet(auth.TaskGetQuerySetMixin, viewsets.ModelViewSet):
            db_task.data = db_data
            db_task.save()
            data = {k:v for k, v in serializer.data.items()}
-            data['use_zip_chunks'] = serializer.validated_data['use_zip_chunks']
+            for extra_key in { 'use_zip_chunks', 'use_cache', 'copy_data' }:
-            data['use_cache'] = serializer.validated_data['use_cache']
+                data[extra_key] = serializer.validated_data[extra_key]
            data['copy_data'] = serializer.validated_data['copy_data']
            if data['use_cache']:
                db_task.data.storage_method = StorageMethodChoice.CACHE
                db_task.data.save(update_fields=['storage_method'])
--- a/cvat/requirements/base.txt
+++ b/cvat/requirements/base.txt
@ -55,3 +55,4 @@ google-cloud-storage==1.42.0
 # when pycocotools is installed by wheel in python 3.8+
 datumaro==0.2.0 --no-binary=datumaro --no-binary=pycocotools
 urllib3>=1.26.5 # not directly required, pinned by Snyk to avoid a vulnerability
 natsort==8.0.0
--- a/site/content/en/docs/manual/basics/creating_an_annotation_task.md
+++ b/site/content/en/docs/manual/basics/creating_an_annotation_task.md
@ -125,7 +125,15 @@ To create a 3D task, you need to use the following directory structures:
 ## Advanced configuration
-![](/images/image128_use_cache.jpg)
+![](/images/image128.jpg)
 ### Sorting method
 Option to sort the data. It is not relevant for videos.
 For example, the sequence `2.jpeg, 10.jpeg, 1.jpeg` after sorting will be:
 - `lexicographical`: 1.jpeg, 10.jpeg, 2.jpeg
 - `natural`: 1.jpeg, 2.jpeg, 10.jpeg
 - `predefined`: 2.jpeg, 10.jpeg, 1.jpeg
 ### Use zip chunks
--- a/site/content/en/images/image128.jpg
+++ b/site/content/en/images/image128.jpg
--- a/site/content/en/images/image128_use_cache.jpg
+++ b/site/content/en/images/image128_use_cache.jpg
--- a/utils/cli/core/core.py
+++ b/utils/cli/core/core.py
@ -45,6 +45,8 @@ class CLI():
            data['copy_data'] = kwargs.get('copy_data')
        if 'use_cache' in kwargs:
            data['use_cache'] = kwargs.get('use_cache')
        if 'sorting_method' in kwargs:
            data['sorting_method'] = kwargs.get('sorting_method')
        response = self.session.post(url, data=data, files=files)
        response.raise_for_status()
--- a/utils/cli/core/definition.py
+++ b/utils/cli/core/definition.py
@ -208,6 +208,13 @@ task_create_parser.add_argument(
    action='store_false',
    help='''set the option to use the cache (default: %(default)s)'''
 )
 task_create_parser.add_argument(
    '--sorting-method',
    default='lexicographical',
    choices=['lexicographical', 'natural', 'predefined', 'random'],
    help='''data soring method (default: %(default)s)'''
 )
 #######################################################################
 # Delete
 #######################################################################
--- a/utils/dataset_manifest/core.py
+++ b/utils/dataset_manifest/core.py
@ -10,7 +10,7 @@ from contextlib import closing
 from tempfile import NamedTemporaryFile
 from PIL import Image
-from .utils import md5_hash, rotate_image
+from .utils import md5_hash, rotate_image, sort, SortingMethod
 class VideoStreamReader:
    def __init__(self, source_path, chunk_size, force):
@ -146,14 +146,14 @@ class DatasetImagesReader:
    def __init__(self,
                sources,
                meta=None,
-                is_sorted=True,
+                sorting_method=SortingMethod.PREDEFINED,
                use_image_hash=False,
                start = 0,
                step = 1,
                stop = None,
                *args,
                **kwargs):
-        self._sources = sources if is_sorted else sorted(sources)
+        self._sources = sort(sources, sorting_method)
        self._meta = meta
        self._data_dir = kwargs.get('data_dir', None)
        self._use_image_hash = use_image_hash
@ -601,11 +601,18 @@ class ImageManifestManager(_ManifestManager):
        return (f"{image['name']}{image['extension']}" for _, image in self)
    def get_subset(self, subset_names):
-        return ({
+        index_list = []
-            'name': f"{image['name']}",
+        subset = []
-            'extension': f"{image['extension']}",
+        for _, image in self:
-            'width': image['width'],
+            image_name = f"{image['name']}{image['extension']}"
-            'height': image['height'],
+            if image_name in subset_names:
-            'meta': image['meta'],
+                index_list.append(subset_names.index(image_name))
-            'checksum': f"{image['checksum']}"
+                subset.append({
-        } for _, image in self if f"{image['name']}{image['extension']}" in subset_names)
+                    'name': f"{image['name']}",
                    'extension': f"{image['extension']}",
                    'width': image['width'],
                    'height': image['height'],
                    'meta': image['meta'],
                    'checksum': f"{image['checksum']}"
                })
        return index_list, subset
--- a/utils/dataset_manifest/create.py
+++ b/utils/dataset_manifest/create.py
@ -17,6 +17,8 @@ def get_args():
             'if by default the video does not meet the requirements and a manifest file is not prepared')
    parser.add_argument('--output-dir',type=str, help='Directory where the manifest file will be saved',
        default=os.getcwd())
    parser.add_argument('--sorting', choices=['lexicographical', 'natural', 'predefined', 'random'],
                        type=str, default='lexicographical')
    parser.add_argument('source', type=str, help='Source paths')
    return parser.parse_args()
@ -63,7 +65,7 @@ def main():
        try:
            assert len(sources), 'A images was not found'
            manifest = ImageManifestManager(manifest_path=manifest_directory)
-            manifest.link(sources=sources, meta=meta, is_sorted=False,
+            manifest.link(sources=sources, meta=meta, sorting_method=args.sorting,
                    use_image_hash=True, data_dir=data_dir)
            manifest.create(_tqdm=tqdm)
        except Exception as ex:
--- a/utils/dataset_manifest/requirements.txt
+++ b/utils/dataset_manifest/requirements.txt
@ -2,3 +2,4 @@ av==8.0.2 --no-binary=av
 opencv-python-headless==4.4.0.42
 Pillow==7.2.0
 tqdm==4.58.0
 natsort==8.0.0
--- a/utils/dataset_manifest/utils.py
+++ b/utils/dataset_manifest/utils.py
@ -1,12 +1,16 @@
 # Copyright (C) 2021 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 import os
 import re
 import hashlib
 import mimetypes
 import cv2 as cv
 from av import VideoFrame
 from enum import Enum
 from natsort import os_sorted
 from random import shuffle
 def rotate_image(image, angle):
    height, width = image.shape[:2]
@ -187,3 +191,29 @@ def detect_related_images(image_paths, root_path):
    elif data_are_3d:
        return _detect_related_images_3D(image_paths, root_path)
    return {}
 class SortingMethod(str, Enum):
    LEXICOGRAPHICAL = 'lexicographical'
    NATURAL = 'natural'
    PREDEFINED = 'predefined'
    RANDOM = 'random'
    @classmethod
    def choices(cls):
        return tuple((x.value, x.name) for x in cls)
    def __str__(self):
        return self.value
 def sort(images, sorting_method=SortingMethod.LEXICOGRAPHICAL, func=None):
    if sorting_method == SortingMethod.LEXICOGRAPHICAL:
        return sorted(images, key=func)
    elif sorting_method == SortingMethod.NATURAL:
        return os_sorted(images, key=func)
    elif sorting_method == SortingMethod.PREDEFINED:
        return images
    elif sorting_method == SortingMethod.RANDOM:
        shuffle(images)
        return images
    else:
        raise NotImplementedError()