Add progressbar to task import and create in CLI (#51)

main
Maxim Zhiltsov 4 years ago committed by GitHub
parent e49741848e
commit b50b275e0b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -3,8 +3,8 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
from __future__ import annotations from __future__ import annotations
from contextlib import closing from contextlib import ExitStack, closing
from typing import Optional, Tuple from typing import Dict, List, Optional, Sequence, Tuple
import tqdm import tqdm
import json import json
@ -21,8 +21,7 @@ from tusclient import client
from tusclient import uploader from tusclient import uploader
from tusclient.request import TusRequest, TusUploadFailed from tusclient.request import TusRequest, TusUploadFailed
from utils.cli.core.utils import StreamWithProgress from .utils import StreamWithProgress, expect_status
from .definition import ResourceType from .definition import ResourceType
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -33,17 +32,47 @@ class CLI:
self.session = session self.session = session
self.login(credentials) self.login(credentials)
def tasks_data(self, task_id, resource_type, resources, **kwargs): def tasks_data(self, task_id: int, resource_type: ResourceType,
resources: Sequence[str], *, pbar: tqdm.tqdm = None, **kwargs) -> None:
""" Add local, remote, or shared files to an existing task. """ """ Add local, remote, or shared files to an existing task. """
url = self.api.tasks_id_data(task_id) url = self.api.tasks_id_data(task_id)
data = {} data = {}
files = None
if resource_type == ResourceType.LOCAL: if resource_type == ResourceType.LOCAL:
files = {'client_files[{}]'.format(i): open(f, 'rb') for i, f in enumerate(resources)} bulk_files: Dict[str, int] = {}
separate_files: Dict[str, int] = {}
MAX_REQUEST_SIZE = 100 * 2**20
for filename in resources:
filename = os.path.abspath(filename)
file_size = os.stat(filename).st_size
if MAX_REQUEST_SIZE < file_size:
separate_files[filename] = file_size
else:
bulk_files[filename] = file_size
total_size = sum(bulk_files.values()) + sum(separate_files.values())
# split files by requests
bulk_file_groups = []
current_group_size = 0
current_group = []
for filename, file_size in bulk_files.items():
if MAX_REQUEST_SIZE < current_group_size + file_size:
bulk_file_groups.append((current_group, current_group_size))
current_group_size = 0
current_group = []
current_group.append(filename)
current_group_size += file_size
if current_group:
bulk_file_groups.append((current_group, current_group_size))
elif resource_type == ResourceType.REMOTE: elif resource_type == ResourceType.REMOTE:
data = {'remote_files[{}]'.format(i): f for i, f in enumerate(resources)} data = {'remote_files[{}]'.format(i): f for i, f in enumerate(resources)}
elif resource_type == ResourceType.SHARE: elif resource_type == ResourceType.SHARE:
data = {'server_files[{}]'.format(i): f for i, f in enumerate(resources)} data = {'server_files[{}]'.format(i): f for i, f in enumerate(resources)}
data['image_quality'] = 70 data['image_quality'] = 70
## capture additional kwargs ## capture additional kwargs
@ -54,8 +83,38 @@ class CLI:
if kwargs.get('frame_step') is not None: if kwargs.get('frame_step') is not None:
data['frame_filter'] = f"step={kwargs.get('frame_step')}" data['frame_filter'] = f"step={kwargs.get('frame_step')}"
response = self.session.post(url, data=data, files=files) if resource_type in [ResourceType.REMOTE, ResourceType.SHARE]:
response.raise_for_status() response = self.session.post(url, data=data)
response.raise_for_status()
elif resource_type == ResourceType.LOCAL:
if pbar is None:
pbar = self._make_pbar("Uploading files...")
if pbar is not None:
pbar.reset(total_size)
self._tus_start_upload(url)
for group, group_size in bulk_file_groups:
with ExitStack() as es:
group_files = {}
for i, filename in enumerate(group):
group_files[f'client_files[{i}]'] = (
filename,
es.enter_context(closing(open(filename, 'rb')))
)
response = self.session.post(url, data=data,
files=group_files, headers={'Upload-Multiple': ''})
expect_status(200, response)
if pbar is not None:
pbar.update(group_size)
for filename in separate_files:
self._upload_file_with_tus(url, filename,
pbar=pbar, logger=log.debug)
self._tus_finish_upload(url, data=data)
def tasks_list(self, use_json_output, **kwargs): def tasks_list(self, use_json_output, **kwargs):
""" List all tasks in either basic or JSON format. """ """ List all tasks in either basic or JSON format. """
@ -82,18 +141,20 @@ class CLI:
response.raise_for_status() response.raise_for_status()
return output return output
def tasks_create(self, name, labels, resource_type, resources, def tasks_create(self, name: str, labels: List[Dict[str, str]],
annotation_path='', annotation_format='CVAT XML 1.1', resource_type: ResourceType, resources: Sequence[str], *,
completion_verification_period=20, annotation_path='', annotation_format='CVAT XML 1.1',
git_completion_verification_period=2, completion_verification_period=20,
dataset_repository_url='', git_completion_verification_period=2,
lfs=False, **kwargs) -> int: dataset_repository_url='',
lfs=False, pbar: tqdm.tqdm = None, **kwargs) -> int:
""" """
Create a new task with the given name and labels JSON and Create a new task with the given name and labels JSON and
add the files to it. add the files to it.
Returns: id of the created task Returns: id of the created task
""" """
url = self.api.tasks url = self.api.tasks
labels = [] if kwargs.get('project_id') is not None else labels labels = [] if kwargs.get('project_id') is not None else labels
data = {'name': name, data = {'name': name,
@ -111,7 +172,7 @@ class CLI:
task_id = response_json['id'] task_id = response_json['id']
assert isinstance(task_id, int) assert isinstance(task_id, int)
self.tasks_data(task_id, resource_type, resources, **kwargs) self.tasks_data(task_id, resource_type, resources, pbar=pbar, **kwargs)
if annotation_path != '': if annotation_path != '':
url = self.api.tasks_id_status(task_id) url = self.api.tasks_id_status(task_id)
@ -129,7 +190,7 @@ class CLI:
response_json['message']) response_json['message'])
log.info(logger_string) log.info(logger_string)
self.tasks_upload(task_id, annotation_format, annotation_path, **kwargs) self.tasks_upload(task_id, annotation_format, annotation_path, pbar=pbar, **kwargs)
if dataset_repository_url: if dataset_repository_url:
response = self.session.post( response = self.session.post(
@ -277,7 +338,7 @@ class CLI:
return MyTusUploader(client=tus_client, session=cli.session, **kwargs) return MyTusUploader(client=tus_client, session=cli.session, **kwargs)
def _upload_file_data_with_tus(self, url, filename, *, params=None, pbar=None, logger=None): def _upload_file_data_with_tus(self, url, filename, *, params=None, pbar=None, logger=None):
CHUNK_SIZE = 2**20 CHUNK_SIZE = 10 * 2**20
file_size = os.stat(filename).st_size file_size = os.stat(filename).st_size
@ -291,20 +352,20 @@ class CLI:
def _upload_file_with_tus(self, url, filename, *, params=None, pbar=None, logger=None): def _upload_file_with_tus(self, url, filename, *, params=None, pbar=None, logger=None):
# "CVAT-TUS" protocol has 2 extra messages # "CVAT-TUS" protocol has 2 extra messages
response = self.session.post(url, headers={'Upload-Start': ''}, params=params) self._tus_start_upload(url, params=params)
response.raise_for_status()
if response.status_code != 202:
raise Exception("Failed to upload file: "
f"unexpected status code received ({response.status_code})")
self._upload_file_data_with_tus(url=url, filename=filename, self._upload_file_data_with_tus(url=url, filename=filename,
params=params, pbar=pbar, logger=logger) params=params, pbar=pbar, logger=logger)
return self._tus_finish_upload(url, params=params)
response = self.session.post(url, headers={'Upload-Finish': ''}, params=params) def _tus_start_upload(self, url, *, params=None):
response.raise_for_status() response = self.session.post(url, headers={'Upload-Start': ''}, params=params)
if response.status_code != 202: expect_status(202, response)
raise Exception("Failed to upload file: " return response
f"unexpected status code received ({response.status_code})")
def _tus_finish_upload(self, url, *, params=None, data=None):
response = self.session.post(url, headers={'Upload-Finish': ''}, params=params, data=data)
expect_status(202, response)
return response
def tasks_upload(self, task_id, fileformat, filename, *, def tasks_upload(self, task_id, fileformat, filename, *,
completion_check_period=2, pbar=None, **kwargs): completion_check_period=2, pbar=None, **kwargs):
@ -358,37 +419,32 @@ class CLI:
""" Import a task from a backup file""" """ Import a task from a backup file"""
url = self.api.tasks_backup() url = self.api.tasks_backup()
params = {
'filename': os.path.basename(filename)
}
if pbar is None: if pbar is None:
pbar = self._make_pbar("Uploading...") pbar = self._make_pbar("Uploading...")
file_size = os.stat(filename).st_size response = self._upload_file_with_tus(url, filename,
params=params, pbar=pbar, logger=log.debug)
with open(filename, 'rb') as input_file:
input_stream = StreamWithProgress(input_file, pbar=pbar, length=file_size)
response = self.session.post(
url,
files={'task_file': input_stream}
)
response.raise_for_status()
response_json = response.json() response_json = response.json()
rq_id = response_json['rq_id'] rq_id = response_json['rq_id']
# check task status # check task status
while True: while True:
sleep(completion_check_period) sleep(completion_check_period)
response = self.session.post( response = self.session.post(
url, url,
data={'rq_id': rq_id} data={'rq_id': rq_id}
) )
response.raise_for_status()
if response.status_code == 201: if response.status_code == 201:
break break
expect_status(202, response)
task_id = response.json()['id'] task_id = response.json()['id']
logger_string = "Task has been imported sucessfully. Task ID: {}".format(task_id) log.info(f"Task has been imported sucessfully. Task ID: {task_id}")
log.info(logger_string)
def login(self, credentials): def login(self, credentials):
url = self.api.login url = self.api.login
@ -408,7 +464,7 @@ class CLI:
to the requested name. to the requested name.
""" """
CHUNK_SIZE = 2**20 CHUNK_SIZE = 10 * 2**20
assert not osp.exists(output_path) assert not osp.exists(output_path)

@ -5,6 +5,8 @@
from __future__ import annotations from __future__ import annotations
import io import io
import requests
import tqdm import tqdm
@ -34,3 +36,9 @@ class StreamWithProgress:
def tell(self): def tell(self):
return self.stream.tell() return self.stream.tell()
def expect_status(code: int, response: requests.Response) -> None:
response.raise_for_status()
if response.status_code != code:
raise Exception("Failed to upload file: "
f"unexpected status code received ({response.status_code})")

@ -8,6 +8,7 @@ import logging
import os import os
import sys import sys
import unittest import unittest
from unittest.mock import MagicMock
from django.conf import settings from django.conf import settings
from PIL import Image from PIL import Image
@ -22,6 +23,53 @@ from tqdm import tqdm
class TestCLI(APITestCase): class TestCLI(APITestCase):
@unittest.mock.patch('sys.stdout', new_callable=io.StringIO)
def setUp(self, mock_stdout):
log = logging.getLogger('utils.cli.core')
log.setLevel(logging.INFO)
log.addHandler(logging.StreamHandler(sys.stdout))
self.mock_stdout = mock_stdout
self.client = RequestsClient()
self.credentials = ('admin', 'admin')
self.api = CVAT_API_V2('testserver')
self.cli = CLI(self.client, self.api, self.credentials)
@classmethod
def setUpClass(cls):
super().setUpClass()
cls.img_file = os.path.join(settings.SHARE_ROOT, 'test_cli.jpg')
_, data = generate_image_file(cls.img_file)
with open(cls.img_file, 'wb') as image:
image.write(data.read())
@classmethod
def tearDownClass(cls):
super().tearDownClass()
os.remove(cls.img_file)
def tearDown(self):
super().tearDown()
log.close_all() # Release logging resources correctly
@classmethod
def setUpTestData(cls):
create_db_users(cls)
def test_tasks_create(self):
with closing(io.StringIO()) as pbar_out:
pbar = tqdm(file=pbar_out, mininterval=0)
task_id = self.cli.tasks_create('test_task',
[{'name' : 'car'}, {'name': 'person'}],
ResourceType.LOCAL, [self.img_file], pbar=pbar)
pbar_out = pbar_out.getvalue().strip('\r').split('\r')
self.assertEqual(1, task_id)
self.assertRegex(pbar_out[-1], '100%')
class TestTaskOperations(APITestCase):
@unittest.mock.patch('sys.stdout', new_callable=io.StringIO) @unittest.mock.patch('sys.stdout', new_callable=io.StringIO)
def setUp(self, mock_stdout): def setUp(self, mock_stdout):
self.client = RequestsClient() self.client = RequestsClient()
@ -32,7 +80,7 @@ class TestCLI(APITestCase):
self.task_id = self.cli.tasks_create(self.taskname, self.task_id = self.cli.tasks_create(self.taskname,
[{'name' : 'car'}, {'name': 'person'}], [{'name' : 'car'}, {'name': 'person'}],
ResourceType.LOCAL, ResourceType.LOCAL,
[self.img_file]) [self.img_file], pbar=MagicMock())
# redirect logging to mocked stdout to test program output # redirect logging to mocked stdout to test program output
self.mock_stdout = mock_stdout self.mock_stdout = mock_stdout
log = logging.getLogger('utils.cli.core') log = logging.getLogger('utils.cli.core')
@ -72,11 +120,12 @@ class TestCLI(APITestCase):
@scoped @scoped
def test_tasks_dump(self): def test_tasks_dump(self):
path = os.path.join(settings.SHARE_ROOT, 'test_cli.zip') path = os.path.join(settings.SHARE_ROOT, 'test_cli.zip')
on_exit_do(os.remove, path)
with closing(io.StringIO()) as pbar_out: with closing(io.StringIO()) as pbar_out:
pbar = tqdm(file=pbar_out, mininterval=0) pbar = tqdm(file=pbar_out, mininterval=0)
self.cli.tasks_dump(self.task_id, 'CVAT for images 1.1', path, pbar=pbar) self.cli.tasks_dump(self.task_id, 'CVAT for images 1.1', path, pbar=pbar)
on_exit_do(os.remove, path)
pbar_out = pbar_out.getvalue().strip('\r').split('\r') pbar_out = pbar_out.getvalue().strip('\r').split('\r')
@ -86,11 +135,12 @@ class TestCLI(APITestCase):
@scoped @scoped
def test_tasks_export(self): def test_tasks_export(self):
path = os.path.join(settings.SHARE_ROOT, 'test_cli.zip') path = os.path.join(settings.SHARE_ROOT, 'test_cli.zip')
on_exit_do(os.remove, path)
with closing(io.StringIO()) as pbar_out: with closing(io.StringIO()) as pbar_out:
pbar = tqdm(file=pbar_out, mininterval=0) pbar = tqdm(file=pbar_out, mininterval=0)
self.cli.tasks_export(self.task_id, path, pbar=pbar) self.cli.tasks_export(self.task_id, path, pbar=pbar)
on_exit_do(os.remove, path)
pbar_out = pbar_out.getvalue().strip('\r').split('\r') pbar_out = pbar_out.getvalue().strip('\r').split('\r')
@ -100,85 +150,119 @@ class TestCLI(APITestCase):
@scoped @scoped
def test_tasks_frame_original(self): def test_tasks_frame_original(self):
path = os.path.join(settings.SHARE_ROOT, 'task_1_frame_000000.jpg') path = os.path.join(settings.SHARE_ROOT, 'task_1_frame_000000.jpg')
on_exit_do(os.remove, path)
self.cli.tasks_frame(self.task_id, [0], self.cli.tasks_frame(self.task_id, [0],
outdir=settings.SHARE_ROOT, quality='original') outdir=settings.SHARE_ROOT, quality='original')
on_exit_do(os.remove, path)
self.assertTrue(os.path.exists(path)) self.assertTrue(os.path.exists(path))
@scoped @scoped
def test_tasks_frame(self): def test_tasks_frame(self):
path = os.path.join(settings.SHARE_ROOT, 'task_1_frame_000000.jpg') path = os.path.join(settings.SHARE_ROOT, 'task_1_frame_000000.jpg')
on_exit_do(os.remove, path)
self.cli.tasks_frame(self.task_id, [0], self.cli.tasks_frame(self.task_id, [0],
outdir=settings.SHARE_ROOT, quality='compressed') outdir=settings.SHARE_ROOT, quality='compressed')
on_exit_do(os.remove, path)
self.assertTrue(os.path.exists(path)) self.assertTrue(os.path.exists(path))
@scoped @scoped
def test_tasks_upload(self): def test_tasks_upload(self):
test_image = Image.open(self.img_file)
width, height = test_image.size
# Using generate_coco_anno() from:
# https://github.com/opencv/cvat/blob/develop/cvat/apps/engine/tests/test_rest_api.py
def generate_coco_anno():
return b"""{
"categories": [
{
"id": 1,
"name": "car",
"supercategory": ""
},
{
"id": 2,
"name": "person",
"supercategory": ""
}
],
"images": [
{
"coco_url": "",
"date_captured": "",
"flickr_url": "",
"license": 0,
"id": 0,
"file_name": "test_cli.jpg",
"height": %d,
"width": %d
}
],
"annotations": [
{
"category_id": 1,
"id": 1,
"image_id": 0,
"iscrowd": 0,
"segmentation": [
[]
],
"area": 17702.0,
"bbox": [
574.0,
407.0,
167.0,
106.0
]
}
]
}"""
content = generate_coco_anno() % (height, width)
path = os.path.join(settings.SHARE_ROOT, 'test_cli.json') path = os.path.join(settings.SHARE_ROOT, 'test_cli.json')
with open(path, "wb") as coco: self._generate_coco_file(path)
coco.write(content)
on_exit_do(os.remove, path) on_exit_do(os.remove, path)
with closing(io.StringIO()) as pbar_out: with closing(io.StringIO()) as pbar_out:
pbar = tqdm(file=pbar_out, mininterval=0) pbar = tqdm(file=pbar_out, mininterval=0)
self.cli.tasks_upload(self.task_id, 'COCO 1.0', path, pbar=pbar) self.cli.tasks_upload(self.task_id, 'COCO 1.0', path, pbar=pbar)
pbar_out = pbar_out.getvalue().strip('\r').split('\r') pbar_out = pbar_out.getvalue().strip('\r').split('\r')
self.assertRegex(self.mock_stdout.getvalue(), '.*{}.*'.format("annotation file")) self.assertRegex(self.mock_stdout.getvalue(), '.*{}.*'.format("annotation file"))
self.assertRegex(pbar_out[-1], '100%') self.assertRegex(pbar_out[-1], '100%')
@scoped
def test_tasks_import(self):
anno_path = os.path.join(settings.SHARE_ROOT, 'test_cli.json')
self._generate_coco_file(anno_path)
on_exit_do(os.remove, anno_path)
backup_path = os.path.join(settings.SHARE_ROOT, 'task_backup.zip')
with closing(io.StringIO()) as pbar_out:
pbar = tqdm(file=pbar_out, mininterval=0)
self.cli.tasks_upload(self.task_id, 'COCO 1.0', anno_path, pbar=pbar)
self.cli.tasks_export(self.task_id, backup_path, pbar=pbar)
on_exit_do(os.remove, backup_path)
with closing(io.StringIO()) as pbar_out:
pbar = tqdm(file=pbar_out, mininterval=0)
self.cli.tasks_import(backup_path, pbar=pbar)
pbar_out = pbar_out.getvalue().strip('\r').split('\r')
self.assertRegex(self.mock_stdout.getvalue(), '.*{}.*'.format("exported sucessfully"))
self.assertRegex(pbar_out[-1], '100%')
def _generate_coco_file(self, path):
test_image = Image.open(self.img_file)
image_width, image_height = test_image.size
content = self._generate_coco_anno(os.path.basename(self.img_file),
image_width=image_width, image_height=image_height)
with open(path, "w") as coco:
coco.write(content)
@staticmethod
def _generate_coco_anno(image_path, image_width, image_height):
return """{
"categories": [
{
"id": 1,
"name": "car",
"supercategory": ""
},
{
"id": 2,
"name": "person",
"supercategory": ""
}
],
"images": [
{
"coco_url": "",
"date_captured": "",
"flickr_url": "",
"license": 0,
"id": 0,
"file_name": "%(image_path)s",
"height": %(image_height)d,
"width": %(image_width)d
}
],
"annotations": [
{
"category_id": 1,
"id": 1,
"image_id": 0,
"iscrowd": 0,
"segmentation": [
[]
],
"area": 17702.0,
"bbox": [
574.0,
407.0,
167.0,
106.0
]
}
]
}
""" % {
'image_path': image_path,
'image_height': image_height,
'image_width': image_width
}

Loading…
Cancel
Save