diff --git a/CHANGELOG.md b/CHANGELOG.md index c891694c..610bdc35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,30 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] +### Added +- A converter to YOLO format +- Installation guide +- Linear interpolation for a single point +- Video frame filter + +### Changed +- Outside and keyframe buttons in the side panel for all interpolation shapes (they were only for boxes before) + +### Deprecated +- + +### Removed +- + +### Fixed +- Fixed incorrect width of shapes borders in some cases +- Fixed annotation parser for tracks with a start frame less than the first segment frame +- Fixed interpolation on the server near outside frames + +### Security +- + ## [0.4.2] - 2019-06-03 ### Fixed - Fixed interaction with the server share in the auto annotation plugin @@ -37,9 +61,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Django 2.1.5 (security fix, https://nvd.nist.gov/vuln/detail/CVE-2019-3498) - Several scenarious which cause code 400 after undo/redo/save have been fixed (#315) -### Security -- - ## [0.3.0] - 2018-12-29 ### Added - Ability to copy Object URL and Frame URL via object context menu and player context menu respectively. diff --git a/Dockerfile b/Dockerfile index 3f9f3e97..6badfd6c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -102,7 +102,7 @@ RUN if [ "$WITH_TESTS" = "yes" ]; then \ # Install and initialize CVAT, copy all necessary files COPY cvat/requirements/ /tmp/requirements/ COPY supervisord.conf mod_wsgi.conf wait-for-it.sh manage.py ${HOME}/ -RUN pip3 install --no-cache-dir -r /tmp/requirements/${DJANGO_CONFIGURATION}.txt +RUN pip3 install --no-cache-dir -r /tmp/requirements/${DJANGO_CONFIGURATION}.txt # Install git application dependencies RUN apt-get update && \ @@ -138,7 +138,9 @@ RUN if [ "$WITH_DEXTR" = "yes" ]; then \ COPY ssh ${HOME}/.ssh COPY cvat/ ${HOME}/cvat COPY tests ${HOME}/tests -RUN patch -p1 < ${HOME}/cvat/apps/engine/static/engine/js/3rdparty.patch +# Binary option is necessary to correctly apply the patch on Windows platform. +# https://unix.stackexchange.com/questions/239364/how-to-fix-hunk-1-failed-at-1-different-line-endings-message +RUN patch --binary -p1 < ${HOME}/cvat/apps/engine/static/engine/js/3rdparty.patch RUN chown -R ${USER}:${USER} . # RUN all commands below as 'django' user diff --git a/README.md b/README.md index 19face9f..8f643e80 100644 --- a/README.md +++ b/README.md @@ -10,25 +10,26 @@ CVAT is completely re-designed and re-implemented version of [Video Annotation T ## Documentation -- [User's guide](cvat/apps/documentation/user_guide.md) -- [XML annotation format](cvat/apps/documentation/xml_format.md) -- [AWS Deployment Guide](cvat/apps/documentation/AWS-Deployment-Guide.md) -- [Questions](#questions) +- [Installation guide](cvat/apps/documentation/installation.md) +- [User's guide](cvat/apps/documentation/user_guide.md) +- [XML annotation format](cvat/apps/documentation/xml_format.md) +- [AWS Deployment Guide](cvat/apps/documentation/AWS-Deployment-Guide.md) +- [Questions](#questions) ## Screencasts -- [Introduction](https://youtu.be/L9_IvUIHGwM) -- [Annotation mode](https://youtu.be/6h7HxGL6Ct4) -- [Interpolation mode](https://youtu.be/U3MYDhESHo4) -- [Attribute mode](https://youtu.be/UPNfWl8Egd8) -- [Segmentation mode](https://youtu.be/Fh8oKuSUIPs) -- [Tutorial for polygons](https://www.youtube.com/watch?v=XTwfXDh4clI) -- [Semi-automatic segmentation](https://www.youtube.com/watch?v=vnqXZ-Z-VTQ) +- [Introduction](https://youtu.be/L9_IvUIHGwM) +- [Annotation mode](https://youtu.be/6h7HxGL6Ct4) +- [Interpolation mode](https://youtu.be/U3MYDhESHo4) +- [Attribute mode](https://youtu.be/UPNfWl8Egd8) +- [Segmentation mode](https://youtu.be/Fh8oKuSUIPs) +- [Tutorial for polygons](https://www.youtube.com/watch?v=XTwfXDh4clI) +- [Semi-automatic segmentation](https://www.youtube.com/watch?v=vnqXZ-Z-VTQ) ## Links -- [Intel AI blog: New Computer Vision Tool Accelerates Annotation of Digital Images and Video](https://www.intel.ai/introducing-cvat) -- [Intel Software: Computer Vision Annotation Tool: A Universal Approach to Data Annotation](https://software.intel.com/en-us/articles/computer-vision-annotation-tool-a-universal-approach-to-data-annotation) -- [VentureBeat: Intel open-sources CVAT, a toolkit for data labeling](https://venturebeat.com/2019/03/05/intel-open-sources-cvat-a-toolkit-for-data-labeling/) +- [Intel AI blog: New Computer Vision Tool Accelerates Annotation of Digital Images and Video](https://www.intel.ai/introducing-cvat) +- [Intel Software: Computer Vision Annotation Tool: A Universal Approach to Data Annotation](https://software.intel.com/en-us/articles/computer-vision-annotation-tool-a-universal-approach-to-data-annotation) +- [VentureBeat: Intel open-sources CVAT, a toolkit for data labeling](https://venturebeat.com/2019/03/05/intel-open-sources-cvat-a-toolkit-for-data-labeling/) ## Online Demo @@ -36,8 +37,8 @@ CVAT is completely re-designed and re-implemented version of [Video Annotation T After you click the link above: -- Click on "GO TO WORKSPACE" and the CVAT environment will load up -- The environment is backed by a K80 GPU +- Click on "GO TO WORKSPACE" and the CVAT environment will load up +- The environment is backed by a K80 GPU If you have any questions, please contact Onepanel directly at support@onepanel.io. If you are in the Onepanel application, you can also use the chat icon in the bottom right corner. @@ -45,115 +46,15 @@ If you have any questions, please contact Onepanel directly at support@onepanel. Code released under the [MIT License](https://opensource.org/licenses/MIT). -## INSTALLATION - -The instructions below should work for `Ubuntu 16.04`. It will probably work on other Operating Systems such as `Windows` and `macOS`, but may require minor modifications. - -### Install [Docker CE](https://www.docker.com/community-edition) or [Docker EE](https://www.docker.com/enterprise-edition) from official site - -Please read official manual [here](https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/). - -### Install docker-compose (1.19.0 or newer) - -```bash -sudo pip install docker-compose -``` - -### Build docker images - -To build all necessary docker images run `docker-compose build` command. By default, in production mode the tool uses PostgreSQL as database, Redis for caching. - -### Run docker containers - -To start default container run `docker-compose up -d` command. Go to [localhost:8080](http://localhost:8080/). You should see a login page. - -### You can include any additional components. Just add corresponding docker-compose file to build or run command: - -```bash -# Build image with CUDA and OpenVINO support -docker-compose -f docker-compose.yml -f components/cuda/docker-compose.cuda.yml -f components/openvino/docker-compose.openvino.yml build - -# Run containers with CUDA and OpenVINO support -docker-compose -f docker-compose.yml -f components/cuda/docker-compose.cuda.yml -f components/openvino/docker-compose.openvino.yml up -d -``` - -### Additional optional components - -- [Auto annotation using DL models in OpenVINO toolkit format](cvat/apps/auto_annotation/README.md) -- [Analytics: management and monitoring of data annotation team](components/analytics/README.md) -- [TF Object Detection API: auto annotation](components/tf_annotation/README.md) -- [Support for NVIDIA GPUs](components/cuda/README.md) -- [Semi-automatic segmentation with Deep Extreme Cut](cvat/apps/dextr_segmentation/README.md) - -### Create superuser account - -You can [register a user](http://localhost:8080/auth/register) but by default it will not have rights even to view list of tasks. Thus you should create a superuser. The superuser can use admin panel to assign correct groups to the user. Please use the command below: - -```bash -docker exec -it cvat bash -ic '/usr/bin/python3 ~/manage.py createsuperuser' -``` - -Type your login/password for the superuser [on the login page](http://localhost:8080/auth/login) and press **Login** button. Now you should be able to create a new annotation task. Please read documentation for more details. - -### Stop all containers - -The command below will stop and remove containers, networks, volumes, and images -created by `up`. - -```bash -docker-compose down -``` - -### Advanced settings - -If you want to access you instance of CVAT outside of your localhost you should specify [ALLOWED_HOSTS](https://docs.djangoproject.com/en/2.0/ref/settings/#allowed-hosts) environment variable. The best way to do that is to create [docker-compose.override.yml](https://docs.docker.com/compose/extends/) and put all your extra settings here. - -```yml -version: "2.3" - -services: - cvat: - environment: - ALLOWED_HOSTS: .example.com - ports: - - "80:8080" -``` -### Annotation logs - -It is possible to proxy annotation logs from client to ELK. To do that run the following command below: - -```bash -docker-compose -f docker-compose.yml -f components/analytics/docker-compose.analytics.yml up -d --build -``` - - -### Share path - -You can use a share storage for data uploading during you are creating a task. To do that you can mount it to CVAT docker container. Example of docker-compose.override.yml for this purpose: - -```yml -version: "2.3" - -services: - cvat: - environment: - CVAT_SHARE_URL: "Mounted from /mnt/share host directory" - volumes: - - cvat_share:/home/django/share:ro - -volumes: - cvat_share: - driver_opts: - type: none - device: /mnt/share - o: bind -``` -You can change the share device path to your actual share. For user convenience we have defined the enviroment variable $CVAT_SHARE_URL. This variable contains a text (url for example) which will be being shown in the client-share browser. - ## Questions -CVAT usage related questions or unclear concepts can be posted in our [Gitter chat](https://gitter.im/opencv-cvat) for **quick replies** from contributors and other users. +CVAT usage related questions or unclear concepts can be posted in our +[Gitter chat](https://gitter.im/opencv-cvat) for **quick replies** from +contributors and other users. -However, if you have a feature request or a bug report that can reproduced, feel free to open an issue (with steps to reproduce the bug if it's a bug report). +However, if you have a feature request or a bug report that can reproduced, +feel free to open an issue (with steps to reproduce the bug if it's a bug +report). -If you are not sure or just want to browse other users common questions, [Gitter chat](https://gitter.im/opencv-cvat) is the way to go. +If you are not sure or just want to browse other users common questions, +[Gitter chat](https://gitter.im/opencv-cvat) is the way to go. diff --git a/cvat/__init__.py b/cvat/__init__.py index 94a6cc8c..35efa70d 100644 --- a/cvat/__init__.py +++ b/cvat/__init__.py @@ -5,6 +5,6 @@ from cvat.utils.version import get_version -VERSION = (0, 4, 0, 'final') +VERSION = (0, 5, 0, 'alpha', 0) __version__ = get_version(VERSION) diff --git a/cvat/apps/dashboard/static/dashboard/js/dashboard.js b/cvat/apps/dashboard/static/dashboard/js/dashboard.js index 8bc302b6..8a0840c2 100644 --- a/cvat/apps/dashboard/static/dashboard/js/dashboard.js +++ b/cvat/apps/dashboard/static/dashboard/js/dashboard.js @@ -464,6 +464,10 @@ class DashboardView { return (overlapSize >= 0 && overlapSize <= segmentSize - 1); } + function validateStopFrame(stopFrame, startFrame) { + return !customStopFrame.prop('checked') || stopFrame >= startFrame; + } + function requestCreatingStatus(tid, onUpdateStatus, onSuccess, onError) { function checkCallback() { $.get(`/api/v1/tasks/${tid}/status`).done((data) => { @@ -516,6 +520,12 @@ class DashboardView { const customOverlapSize = $('#dashboardCustomOverlap'); const imageQualityInput = $('#dashboardImageQuality'); const customCompressQuality = $('#dashboardCustomQuality'); + const startFrameInput = $('#dashboardStartFrame'); + const customStartFrame = $('#dashboardCustomStart'); + const stopFrameInput = $('#dashboardStopFrame'); + const customStopFrame = $('#dashboardCustomStop'); + const frameFilterInput = $('#dashboardFrameFilter'); + const customFrameFilter = $('#dashboardCustomFilter'); const taskMessage = $('#dashboardCreateTaskMessage'); const submitCreate = $('#dashboardSubmitTask'); @@ -529,6 +539,9 @@ class DashboardView { let segmentSize = 5000; let overlapSize = 0; let compressQuality = 50; + let startFrame = 0; + let stopFrame = 0; + let frameFilter = ''; let files = []; dashboardCreateTaskButton.on('click', () => { @@ -612,6 +625,9 @@ class DashboardView { customSegmentSize.on('change', (e) => segmentSizeInput.prop('disabled', !e.target.checked)); customOverlapSize.on('change', (e) => overlapSizeInput.prop('disabled', !e.target.checked)); customCompressQuality.on('change', (e) => imageQualityInput.prop('disabled', !e.target.checked)); + customStartFrame.on('change', (e) => startFrameInput.prop('disabled', !e.target.checked)); + customStopFrame.on('change', (e) => stopFrameInput.prop('disabled', !e.target.checked)); + customFrameFilter.on('change', (e) => frameFilterInput.prop('disabled', !e.target.checked)); segmentSizeInput.on('change', () => { const value = Math.clamp( @@ -646,6 +662,28 @@ class DashboardView { compressQuality = value; }); + startFrameInput.on('change', function() { + let value = Math.max( + +startFrameInput.prop('value'), + +startFrameInput.prop('min') + ); + + startFrameInput.prop('value', value); + startFrame = value; + }); + stopFrameInput.on('change', function() { + let value = Math.max( + +stopFrameInput.prop('value'), + +stopFrameInput.prop('min') + ); + + stopFrameInput.prop('value', value); + stopFrame = value; + }); + frameFilterInput.on('change', function() { + frameFilter = frameFilterInput.prop('value'); + }); + submitCreate.on('click', () => { if (!validateName(name)) { taskMessage.css('color', 'red'); @@ -677,6 +715,12 @@ class DashboardView { return; } + if (!validateStopFrame(stopFrame, startFrame)) { + taskMessage.css('color', 'red'); + taskMessage.text('Stop frame must be greater than or equal to start frame'); + return; + } + if (files.length <= 0) { taskMessage.css('color', 'red'); taskMessage.text('No files specified for the task'); @@ -717,6 +761,15 @@ class DashboardView { if (customOverlapSize.prop('checked')) { description.overlap = overlapSize; } + if (customStartFrame.prop('checked')) { + description.start_frame = startFrame; + } + if (customStopFrame.prop('checked')) { + description.stop_frame = stopFrame; + } + if (customFrameFilter.prop('checked')) { + description.frame_filter = frameFilter; + } function cleanupTask(tid) { $.ajax({ diff --git a/cvat/apps/dashboard/templates/dashboard/dashboard.html b/cvat/apps/dashboard/templates/dashboard/dashboard.html index e75790d3..7722dc74 100644 --- a/cvat/apps/dashboard/templates/dashboard/dashboard.html +++ b/cvat/apps/dashboard/templates/dashboard/dashboard.html @@ -143,6 +143,33 @@ Example: @select=race:__undefined__,skip,asian,black,caucasian,other'/> + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/cvat/apps/documentation/installation.md b/cvat/apps/documentation/installation.md new file mode 100644 index 00000000..7fb94fc7 --- /dev/null +++ b/cvat/apps/documentation/installation.md @@ -0,0 +1,310 @@ +- [Quick installation guide](#quick-installation-guide) + - [Ubuntu 18.04 (x86_64/amd64)](#ubuntu-1804-x86_64amd64) + - [Windows 10](#windows-10) + - [Mac OS Mojave](#mac-os-mojave) + - [Advanced topics](#advanced-topics) + - [Additional components](#additional-components) + - [Stop all containers](#stop-all-containers) + - [Advanced settings](#advanced-settings) + - [Share path](#share-path) + +# Quick installation guide + +Before you can use CVAT, you’ll need to get it installed. The document below +contains instructions for the most popular operating systems. If your system is +not covered by the document it should be relatively straight forward to adapt +the instructions below for other systems. + +Probably you need to modify the instructions below in case you are behind a proxy +server. Proxy is an advanced topic and it is not covered by the guide. + +## Ubuntu 18.04 (x86_64/amd64) +- Open a terminal window. If you don't know how to open a terminal window on + Ubuntu please read [the answer](https://askubuntu.com/questions/183775/how-do-i-open-a-terminal). + +- Type commands below into the terminal window to install `docker`. More + instructions can be found [here](https://docs.docker.com/install/linux/docker-ce/ubuntu/). + + ```sh + sudo apt-get update + sudo apt-get install -y \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg-agent \ + software-properties-common + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - + sudo add-apt-repository \ + "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) \ + stable" + sudo apt-get update + sudo apt-get install -y docker-ce docker-ce-cli containerd.io + ``` + +- Perform [post-installation steps](https://docs.docker.com/install/linux/linux-postinstall/) + to run docker without root permissions. + + ```sh + sudo groupadd docker + sudo usermod -aG docker $USER + ``` + Log out and log back in (or reboot) so that your group membership is + re-evaluated. You can type `groups` command in a terminal window after + that and check if `docker` group is in its output. + +- Install docker-compose (1.19.0 or newer). Compose is a tool for + defining and running multi-container docker applications. + + ```bash + sudo apt-get install -y python3-pip + sudo pip3 install docker-compose + ``` + +- Clone _CVAT_ source code from the + [GitHub repository](https://github.com/opencv/cvat). + + ```bash + sudo apt-get install -y git + git clone https://github.com/opencv/cvat + cd cvat + ``` + +- Build docker images by default. It will take some time to download public + docker image ubuntu:16.04 and install all necessary ubuntu packages to run + CVAT server. + + ```bash + docker-compose build + ``` + +- Run docker containers. It will take some time to download public docker + images like postgres:10.3-alpine, redis:4.0.5-alpine and create containers. + + ```sh + docker-compose up -d + ``` + +- You can register a user but by default it will not have rights even to view + list of tasks. Thus you should create a superuser. A superuser can use an + admin panel to assign correct groups to the user. Please use the command + below: + + ```sh + docker exec -it cvat bash -ic 'python3 ~/manage.py createsuperuser' + ``` + Choose login and password for your admin account. For more information + please read [Django documentation](https://docs.djangoproject.com/en/2.2/ref/django-admin/#createsuperuser). + +- Google Chrome is the only browser which is supported by CVAT. You need to + install it as well. Type commands below in a terminal window: + + ```sh + wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | sudo apt-key add - + sudo sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list' + sudo apt-get update + sudo apt-get install -y google-chrome-stable + ``` + +- Open the installed Google Chrome browser and go to [localhost:8080](http://localhost:8080). + Type your login/password for the superuser on the login page and press the _Login_ + button. Now you should be able to create a new annotation task. Please read the + [CVAT user's guide](/cvat/apps/documentation/user_guide.md) for more details. + +## Windows 10 +- Download [Docker for Windows](https://download.docker.com/win/stable/Docker%20for%20Windows%20Installer.exe). + Double-click `Docker for Windows Installer` to run the installer. More + instructions can be found [here](https://docs.docker.com/docker-for-windows/install/). Note: + Docker Desktop requires Windows 10 Pro or Enterprise version 14393 to run. + +- Download and install + [Git for Windows](https://github.com/git-for-windows/git/releases/download/v2.21.0.windows.1/Git-2.21.0-64-bit.exe). + When installing the package please keep all options by default. + More information about the package can be found [here](https://gitforwindows.org). + +- Download and install [Google Chrome](https://www.google.com/chrome/). It is the only browser + which is supported by CVAT. + +- Go to windows menu, find `Git Bash` application and run it. You should see a terminal window. + +- Clone _CVAT_ source code from the + [GitHub repository](https://github.com/opencv/cvat). + + ```bash + git clone https://github.com/opencv/cvat + cd cvat + ``` + +- Build docker images by default. It will take some time to download public + docker image ubuntu:16.04 and install all necessary ubuntu packages to run + CVAT server. + + ```bash + docker-compose build + ``` + +- Run docker containers. It will take some time to download public docker + images like postgres:10.3-alpine, redis:4.0.5-alpine and create containers. + + ```sh + docker-compose up -d + ``` + +- You can register a user but by default it will not have rights even to view + list of tasks. Thus you should create a superuser. A superuser can use an + admin panel to assign correct groups to other users. Please use the command + below: + + ```sh + winpty docker exec -it cvat bash -ic 'python3 ~/manage.py createsuperuser' + ``` + Choose login and password for your admin account. For more information + please read [Django documentation](https://docs.djangoproject.com/en/2.2/ref/django-admin/#createsuperuser). + +- Open the installed Google Chrome browser and go to [localhost:8080](http://localhost:8080). + Type your login/password for the superuser on the login page and press the _Login_ + button. Now you should be able to create a new annotation task. Please read the + [CVAT user's guide](/cvat/apps/documentation/user_guide.md) for more details. + +## Mac OS Mojave +- Download [Docker for Mac](https://download.docker.com/mac/stable/Docker.dmg). + Double-click Docker.dmg to open the installer, then drag Moby the whale + to the Applications folder. Double-click Docker.app in the Applications + folder to start Docker. More instructions can be found + [here](https://docs.docker.com/v17.12/docker-for-mac/install/#install-and-run-docker-for-mac). + +- There are several ways to install Git on a Mac. The easiest is probably to + install the Xcode Command Line Tools. On Mavericks (10.9) or above you can + do this simply by trying to run git from the Terminal the very first time. + + ```bash + git --version + ``` + + If you don’t have it installed already, it will prompt you to install it. + More instructions can be found [here](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git). + +- Download and install [Google Chrome](https://www.google.com/chrome/). It + is the only browser which is supported by CVAT. + +- Open a terminal window. The terminal app is in the Utilities folder in + Applications. To open it, either open your Applications folder, then open + Utilities and double-click on Terminal, or press Command - spacebar to + launch Spotlight and type "Terminal," then double-click the search result. + +- Clone _CVAT_ source code from the + [GitHub repository](https://github.com/opencv/cvat). + + ```bash + git clone https://github.com/opencv/cvat + cd cvat + ``` + +- Build docker images by default. It will take some time to download public + docker image ubuntu:16.04 and install all necessary ubuntu packages to run + CVAT server. + + ```bash + docker-compose build + ``` + +- Run docker containers. It will take some time to download public docker + images like postgres:10.3-alpine, redis:4.0.5-alpine and create containers. + + ```sh + docker-compose up -d + ``` + +- You can register a user but by default it will not have rights even to view + list of tasks. Thus you should create a superuser. A superuser can use an + admin panel to assign correct groups to other users. Please use the command + below: + + ```sh + docker exec -it cvat bash -ic 'python3 ~/manage.py createsuperuser' + ``` + Choose login and password for your admin account. For more information + please read [Django documentation](https://docs.djangoproject.com/en/2.2/ref/django-admin/#createsuperuser). + +- Open the installed Google Chrome browser and go to [localhost:8080](http://localhost:8080). + Type your login/password for the superuser on the login page and press the _Login_ + button. Now you should be able to create a new annotation task. Please read the + [CVAT user's guide](/cvat/apps/documentation/user_guide.md) for more details. + +## Advanced topics + +### Additional components + +- [Auto annotation using DL models in OpenVINO toolkit format](/cvat/apps/auto_annotation/README.md) +- [Analytics: management and monitoring of data annotation team](/components/analytics/README.md) +- [TF Object Detection API: auto annotation](/components/tf_annotation/README.md) +- [Support for NVIDIA GPUs](/components/cuda/README.md) +- [Semi-automatic segmentation with Deep Extreme Cut](/cvat/apps/dextr_segmentation/README.md) + +```bash +# Build and run containers with CUDA and OpenVINO support +# IMPORTANT: need to download OpenVINO package before running the command +docker-compose -f docker-compose.yml -f components/cuda/docker-compose.cuda.yml -f components/openvino/docker-compose.openvino.yml up -d --build + +# Build and run containers with Analytics component support: +docker-compose -f docker-compose.yml -f components/analytics/docker-compose.analytics.yml up -d --build +``` + +### Stop all containers + +The command below stops and removes containers, networks, volumes, and images +created by `up`. + +```bash +docker-compose down +``` + +### Advanced settings + +If you want to access you instance of CVAT outside of your localhost you should +specify the [ALLOWED_HOSTS](https://docs.djangoproject.com/en/2.0/ref/settings/#allowed-hosts) +environment variable. The best way to do that is to create +[docker-compose.override.yml](https://docs.docker.com/compose/extends/) and put +all your extra settings here. + +```yml +version: "2.3" + +services: + cvat: + environment: + ALLOWED_HOSTS: .example.com + ports: + - "80:8080" +``` + +Please don't forget include this file to docker-compose commands using the `-f` +option (in some cases it can be omitted). + +### Share path + +You can use a share storage for data uploading during you are creating a task. +To do that you can mount it to CVAT docker container. Example of +docker-compose.override.yml for this purpose: + +```yml +version: "2.3" + +services: + cvat: + environment: + CVAT_SHARE_URL: "Mounted from /mnt/share host directory" + volumes: + - cvat_share:/home/django/share:ro + +volumes: + cvat_share: + driver_opts: + type: none + device: /mnt/share + o: bind +``` + +You can change the share device path to your actual share. For user convenience +we have defined the environment variable $CVAT_SHARE_URL. This variable +contains a text (url for example) which is shown in the client-share browser. diff --git a/cvat/apps/engine/annotation.py b/cvat/apps/engine/annotation.py index ef1d0bcc..d0fb0211 100644 --- a/cvat/apps/engine/annotation.py +++ b/cvat/apps/engine/annotation.py @@ -1090,12 +1090,19 @@ class TrackManager(ObjectManager): step = np.subtract(shape1["points"], shape0["points"]) / distance for frame in range(shape0["frame"] + 1, shape1["frame"]): off = frame - shape0["frame"] - points = shape0["points"] + step * off + if shape1["outside"]: + points = np.asarray(shape0["points"]).reshape(-1, 2) + else: + points = (shape0["points"] + step * off).reshape(-1, 2) shape = copy.deepcopy(shape0) - broken_line = geometry.LineString(points.reshape(-1, 2)).simplify(0.05, False) + if len(points) == 1: + shape["points"] = points.flatten() + else: + broken_line = geometry.LineString(points).simplify(0.05, False) + shape["points"] = [x for p in broken_line.coords for x in p] + shape["keyframe"] = False shape["frame"] = frame - shape["points"] = [x for p in broken_line.coords for x in p] shapes.append(shape) return shapes @@ -1248,6 +1255,9 @@ class TaskAnnotation: ("flipped", str(db_task.flipped)), ("created", str(timezone.localtime(db_task.created_date))), ("updated", str(timezone.localtime(db_task.updated_date))), + ("start_frame", str(db_task.start_frame)), + ("stop_frame", str(db_task.stop_frame)), + ("frame_filter", db_task.frame_filter), ("labels", [ ("label", OrderedDict([ @@ -1286,6 +1296,8 @@ class TaskAnnotation: ("width", str(im_meta_data[0]["width"])), ("height", str(im_meta_data[0]["height"])) ]) + # Add source to dumped file + meta["source"] = str(db_task.video.path) with open(file_path, "w") as dump_file: dumper = XmlAnnotationWriter(dump_file) @@ -1407,7 +1419,7 @@ class TaskAnnotation: self._flip_shape(shape, im_w, im_h) dump_data = OrderedDict([ - ("frame", str(shape["frame"])), + ("frame", str(db_task.start_frame + shape["frame"] * db_task.get_frame_step())), ("outside", str(int(shape["outside"]))), ("occluded", str(int(shape["occluded"]))), ("keyframe", str(int(shape["keyframe"]))) diff --git a/cvat/apps/engine/migrations/0019_frame_selection.py b/cvat/apps/engine/migrations/0019_frame_selection.py new file mode 100644 index 00000000..d1b1d731 --- /dev/null +++ b/cvat/apps/engine/migrations/0019_frame_selection.py @@ -0,0 +1,40 @@ +# Generated by Django 2.1.7 on 2019-05-10 08:23 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('engine', '0018_jobcommit'), + ] + + operations = [ + migrations.RemoveField( + model_name='video', + name='start_frame', + ), + migrations.RemoveField( + model_name='video', + name='step', + ), + migrations.RemoveField( + model_name='video', + name='stop_frame', + ), + migrations.AddField( + model_name='task', + name='frame_filter', + field=models.CharField(default='', max_length=256), + ), + migrations.AddField( + model_name='task', + name='start_frame', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='task', + name='stop_frame', + field=models.PositiveIntegerField(default=0), + ), + ] diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 736fd6b0..6a410d92 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -4,6 +4,7 @@ from enum import Enum +import re import shlex import os @@ -49,6 +50,9 @@ class Task(models.Model): z_order = models.BooleanField(default=False) flipped = models.BooleanField(default=False) image_quality = models.PositiveSmallIntegerField(default=50) + start_frame = models.PositiveIntegerField(default=0) + stop_frame = models.PositiveIntegerField(default=0) + frame_filter = models.CharField(max_length=256, default="") status = models.CharField(max_length=32, choices=StatusChoice.choices(), default=StatusChoice.ANNOTATION) @@ -64,6 +68,10 @@ class Task(models.Model): return path + def get_frame_step(self): + match = re.search("step\s*=\s*([1-9]\d*)", self.frame_filter) + return int(match.group(1)) if match else 1 + def get_upload_dirname(self): return os.path.join(self.get_task_dirname(), ".upload") @@ -128,9 +136,6 @@ class RemoteFile(models.Model): class Video(models.Model): task = models.OneToOneField(Task, on_delete=models.CASCADE) path = models.CharField(max_length=1024) - start_frame = models.PositiveIntegerField() - stop_frame = models.PositiveIntegerField() - step = models.PositiveIntegerField(default=1) width = models.PositiveIntegerField() height = models.PositiveIntegerField() diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 7acc365b..85103449 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: MIT import os +import re import shutil from rest_framework import serializers @@ -187,16 +188,25 @@ class TaskSerializer(WriteOnceMixin, serializers.ModelSerializer): fields = ('url', 'id', 'name', 'size', 'mode', 'owner', 'assignee', 'bug_tracker', 'created_date', 'updated_date', 'overlap', 'segment_size', 'z_order', 'flipped', 'status', 'labels', 'segments', - 'image_quality') + 'image_quality', 'start_frame', 'stop_frame', 'frame_filter') read_only_fields = ('size', 'mode', 'created_date', 'updated_date', 'status') write_once_fields = ('overlap', 'segment_size', 'image_quality') ordering = ['-id'] + def validate_frame_filter(self, value): + match = re.search("step\s*=\s*([1-9]\d*)", value) + if not match: + raise serializers.ValidationError("Invalid frame filter expression") + return value + # pylint: disable=no-self-use def create(self, validated_data): labels = validated_data.pop('label_set') db_task = models.Task.objects.create(size=0, **validated_data) + db_task.start_frame = validated_data.get('start_frame', 0) + db_task.stop_frame = validated_data.get('stop_frame', 0) + db_task.frame_filter = validated_data.get('frame_filter', '') for label in labels: attributes = label.pop('attributespec_set') db_label = models.Label.objects.create(task=db_task, **label) @@ -225,6 +235,9 @@ class TaskSerializer(WriteOnceMixin, serializers.ModelSerializer): instance.flipped = validated_data.get('flipped', instance.flipped) instance.image_quality = validated_data.get('image_quality', instance.image_quality) + instance.start_frame = validated_data.get('start_frame', instance.start_frame) + instance.stop_frame = validated_data.get('stop_frame', instance.stop_frame) + instance.frame_filter = validated_data.get('frame_filter', instance.frame_filter) labels = validated_data.get('label_set', []) for label in labels: attributes = label.pop('attributespec_set', []) diff --git a/cvat/apps/engine/static/engine/js/annotationParser.js b/cvat/apps/engine/static/engine/js/annotationParser.js index 1f0cfc6d..447055c4 100644 --- a/cvat/apps/engine/static/engine/js/annotationParser.js +++ b/cvat/apps/engine/static/engine/js/annotationParser.js @@ -327,7 +327,7 @@ class AnnotationParser { shapes: [], }; - if (path.frame < this._startFrame || path.frame > this._stopFrame) { + if (path.frame > this._stopFrame) { continue; } diff --git a/cvat/apps/engine/static/engine/js/shapeCollection.js b/cvat/apps/engine/static/engine/js/shapeCollection.js index be124699..cffa804e 100644 --- a/cvat/apps/engine/static/engine/js/shapeCollection.js +++ b/cvat/apps/engine/static/engine/js/shapeCollection.js @@ -1453,6 +1453,7 @@ class ShapeCollectionView { let newModels = newShapes.map((el) => el.model); const frameChanged = this._frameMarker !== window.cvat.player.frames.current; + this._scale = window.cvat.player.geometry.scale; if (frameChanged) { this._frameContent.node.parent = null; diff --git a/cvat/apps/engine/static/engine/js/shapeCreator.js b/cvat/apps/engine/static/engine/js/shapeCreator.js index 76f7ec55..78d3b155 100644 --- a/cvat/apps/engine/static/engine/js/shapeCreator.js +++ b/cvat/apps/engine/static/engine/js/shapeCreator.js @@ -54,12 +54,12 @@ class ShapeCreatorModel extends Listener { }); } - if (this._defaultMode === 'interpolation' && this._defaultType === 'box') { + // FIXME: In the future we have to make some generic solution + if (this._defaultMode === 'interpolation' && ['box', 'points'].includes(this._defaultType)) { data.shapes = []; data.shapes.push(Object.assign({}, result, data)); - this._shapeCollection.add(data, `interpolation_box`); - } - else { + this._shapeCollection.add(data, `interpolation_${this._defaultType}`); + } else { Object.assign(data, result); this._shapeCollection.add(data, `annotation_${this._defaultType}`); } @@ -213,11 +213,14 @@ class ShapeCreatorView { } this._typeSelector.on('change', (e) => { - let type = $(e.target).prop('value'); - if (type != 'box' && this._modeSelector.prop('value') != 'annotation') { + // FIXME: In the future we have to make some generic solution + const mode = this._modeSelector.prop('value'); + const type = $(e.target).prop('value'); + if (type !== 'box' && !(type === 'points' && this._polyShapeSize === 1) + && mode !== 'annotation') { this._modeSelector.prop('value', 'annotation'); this._controller.setDefaultShapeMode('annotation'); - showMessage('Poly shapes available only like annotation shapes'); + showMessage('Only the annotation mode allowed for the shape'); } this._controller.setDefaultShapeType(type); }).trigger('change'); @@ -227,20 +230,30 @@ class ShapeCreatorView { }).trigger('change'); this._modeSelector.on('change', (e) => { - let mode = $(e.target).prop('value'); - if (mode != 'annotation' && this._typeSelector.prop('value') != 'box') { + // FIXME: In the future we have to make some generic solution + const mode = $(e.target).prop('value'); + const type = this._typeSelector.prop('value'); + if (mode !== 'annotation' && !(type === 'points' && this._polyShapeSize === 1) + && type !== 'box') { this._typeSelector.prop('value', 'box'); this._controller.setDefaultShapeType('box'); - showMessage('Only boxes available like interpolation shapes'); + showMessage('Only boxes and single point allowed in the interpolation mode'); } this._controller.setDefaultShapeMode(mode); }).trigger('change'); this._polyShapeSizeInput.on('change', (e) => { e.stopPropagation(); - let size = + e.target.value; + let size = +e.target.value; if (size < 0) size = 0; if (size > 100) size = 0; + const mode = this._modeSelector.prop('value'); + const type = this._typeSelector.prop('value'); + if (mode === 'interpolation' && type === 'points' && size !== 1) { + showMessage('Only single point allowed in the interpolation mode'); + size = 1; + } + e.target.value = size || ''; this._polyShapeSize = size; }).trigger('change'); @@ -265,6 +278,7 @@ class ShapeCreatorView { let size = this._polyShapeSize; let sizeDecrement = function() { if (!--size) { + numberOfPoints = this._polyShapeSize; this._drawInstance.draw('done'); } }.bind(this); @@ -323,7 +337,7 @@ class ShapeCreatorView { this._drawInstance.draw('point', e); lastPoint = { x: e.clientX, - y: e.clientY + y: e.clientY, }; } } diff --git a/cvat/apps/engine/static/engine/js/shapes.js b/cvat/apps/engine/static/engine/js/shapes.js index e0373aaf..2ff5c4e9 100644 --- a/cvat/apps/engine/static/engine/js/shapes.js +++ b/cvat/apps/engine/static/engine/js/shapes.js @@ -341,8 +341,8 @@ class ShapeModel extends Listener { } switchOutside(frame) { - // Only for interpolation boxes - if (this._type != 'interpolation_box') { + // Only for interpolation shapes + if (this._type.split('_')[0] !== 'interpolation') { return; } @@ -379,7 +379,7 @@ class ShapeModel extends Listener { if (frame < this._frame) { if (this._frame in this._attributes.mutable) { this._attributes.mutable[frame] = this._attributes.mutable[this._frame]; - delete(this._attributes.mutable[this._frame]); + delete (this._attributes.mutable[this._frame]); } this._frame = frame; } @@ -388,17 +388,17 @@ class ShapeModel extends Listener { } switchKeyFrame(frame) { - // Only for interpolation boxes - if (this._type != 'interpolation_box') { + // Only for interpolation shapes + if (this._type.split('_')[0] !== 'interpolation') { return; } // Undo/redo code - let oldPos = Object.assign({}, this._positions[frame]); + const oldPos = Object.assign({}, this._positions[frame]); window.cvat.addAction('Change Keyframe', () => { this.switchKeyFrame(frame); - if (Object.keys(oldPos).length && oldPos.outside) { - this.switchOutside(frame); + if (frame in this._positions) { + this.updatePosition(frame, oldPos); } }, () => { this.switchKeyFrame(frame); @@ -411,19 +411,18 @@ class ShapeModel extends Listener { this._frame = Object.keys(this._positions).map((el) => +el).sort((a,b) => a - b)[1]; if (frame in this._attributes.mutable) { this._attributes.mutable[this._frame] = this._attributes.mutable[frame]; - delete(this._attributes.mutable[frame]); + delete (this._attributes.mutable[frame]); } } - delete(this._positions[frame]); - } - else { + delete (this._positions[frame]); + } else { let position = this._interpolatePosition(frame); this.updatePosition(frame, position, true); if (frame < this._frame) { if (this._frame in this._attributes.mutable) { this._attributes.mutable[frame] = this._attributes.mutable[this._frame]; - delete(this._attributes.mutable[this._frame]); + delete (this._attributes.mutable[this._frame]); } this._frame = frame; } @@ -917,7 +916,7 @@ class PolyShapeModel extends ShapeModel { } return Object.assign({}, leftPos, { - outside: leftFrame != frame, + outside: leftPos.outside || leftFrame !== frame, }); } @@ -952,9 +951,14 @@ class PolyShapeModel extends ShapeModel { if (this._verifyArea(box)) { if (!silent) { // Undo/redo code - let oldPos = Object.assign({}, this._positions[frame]); + const oldPos = Object.assign({}, this._positions[frame]); window.cvat.addAction('Change Position', () => { - this.updatePosition(frame, oldPos, false); + if (!Object.keys(oldPos).length) { + delete this._positions[frame]; + this.notify('position'); + } else { + this.updatePosition(frame, oldPos, false); + } }, () => { this.updatePosition(frame, pos, false); }, frame); @@ -962,7 +966,7 @@ class PolyShapeModel extends ShapeModel { } if (this._type.startsWith('annotation')) { - if (this._frame != frame) { + if (this._frame !== frame) { throw Error(`Got bad frame for annotation poly shape during update position: ${frame}. Own frame is ${this._frame}`); } this._positions[frame] = pos; @@ -1145,6 +1149,60 @@ class PointsModel extends PolyShapeModel { this._minPoints = 1; } + _interpolatePosition(frame) { + if (this._type.startsWith('annotation')) { + return Object.assign({}, this._positions[this._frame], { + outside: this._frame !== frame, + }); + } + + let [leftFrame, rightFrame] = this._neighboringFrames(frame); + if (frame in this._positions) { + leftFrame = frame; + } + + let leftPos = null; + let rightPos = null; + + if (leftFrame != null) leftPos = this._positions[leftFrame]; + if (rightFrame != null) rightPos = this._positions[rightFrame]; + + if (!leftPos) { + if (rightPos) { + return Object.assign({}, rightPos, { + outside: true, + }); + } + + return { + outside: true, + }; + } + + if (frame === leftFrame || leftPos.outside || !rightPos || rightPos.outside) { + return Object.assign({}, leftPos); + } + + const rightPoints = PolyShapeModel.convertStringToNumberArray(rightPos.points); + const leftPoints = PolyShapeModel.convertStringToNumberArray(leftPos.points); + + if (rightPoints.length === leftPoints.length && leftPoints.length === 1) { + const moveCoeff = (frame - leftFrame) / (rightFrame - leftFrame); + const interpolatedPoints = [{ + x: leftPoints[0].x + (rightPoints[0].x - leftPoints[0].x) * moveCoeff, + y: leftPoints[0].y + (rightPoints[0].y - leftPoints[0].y) * moveCoeff, + }]; + + return Object.assign({}, leftPos, { + points: PolyShapeModel.convertNumberArrayToString(interpolatedPoints), + }); + } + + return Object.assign({}, leftPos, { + outside: true, + }); + } + distance(mousePos, frame) { let pos = this._interpolatePosition(frame); if (pos.outside) return Number.MAX_SAFE_INTEGER; @@ -1958,19 +2016,17 @@ class ShapeView extends Listener { if (type.split('_')[0] == 'interpolation') { let interpolationCenter = document.createElement('center'); - if (type.split('_')[1] == 'box') { - let outsideButton = document.createElement('button'); - outsideButton.classList.add('graphicButton', 'outsideButton'); + let outsideButton = document.createElement('button'); + outsideButton.classList.add('graphicButton', 'outsideButton'); - let keyframeButton = document.createElement('button'); - keyframeButton.classList.add('graphicButton', 'keyFrameButton'); + let keyframeButton = document.createElement('button'); + keyframeButton.classList.add('graphicButton', 'keyFrameButton'); - interpolationCenter.appendChild(outsideButton); - interpolationCenter.appendChild(keyframeButton); + interpolationCenter.appendChild(outsideButton); + interpolationCenter.appendChild(keyframeButton); - this._uis.buttons['outside'] = outsideButton; - this._uis.buttons['keyframe'] = keyframeButton; - } + this._uis.buttons['outside'] = outsideButton; + this._uis.buttons['keyframe'] = keyframeButton; let prevKeyFrameButton = document.createElement('button'); prevKeyFrameButton.classList.add('graphicButton', 'prevKeyFrameButton'); @@ -2928,6 +2984,11 @@ class PolyShapeView extends ShapeView { }); point.on('dblclick.polyshapeEditor', (e) => { + if (this._controller.type === 'interpolation_points') { + // Not available for interpolation points + return; + } + if (e.shiftKey) { if (!window.cvat.mode) { // Get index before detach shape from DOM @@ -3125,7 +3186,7 @@ class PointsView extends PolyShapeView { _drawPointMarkers(position) { - if (this._uis.points) { + if (this._uis.points || position.outside) { return; } diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 5160c7d7..ca1992e4 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -7,6 +7,7 @@ import os import sys import rq import shutil +import subprocess import tempfile import numpy as np from PIL import Image @@ -48,15 +49,27 @@ def rq_handler(job, exc_type, exc_value, traceback): ############################# Internal implementation for server API class _FrameExtractor: - def __init__(self, source_path, compress_quality, flip_flag=False): + def __init__(self, source_path, compress_quality, step=1, start=0, stop=0, flip_flag=False): # translate inversed range 1:95 to 2:32 translated_quality = 96 - compress_quality translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2) + self.source = source_path self.output = tempfile.mkdtemp(prefix='cvat-', suffix='.data') target_path = os.path.join(self.output, '%d.jpg') output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality) + filters = '' + if stop > 0: + filters = 'between(n,' + str(start) + ',' + str(stop) + ')' + elif start > 0: + filters = 'gte(n,' + str(start) + ')' + if step > 1: + filters += ('*' if filters else '') + 'not(mod(n-' + str(start) + ',' + str(step) + '))' + if filters: + filters = "select=\"'" + filters + "'\"" if flip_flag: - output_opts += ' -vf "transpose=2,transpose=2"' + filters += (',' if filters else '') + 'transpose=2,transpose=2' + if filters: + output_opts += ' -vf ' + filters ff = FFmpeg( inputs = {source_path: None}, outputs = {target_path: output_opts}) @@ -170,12 +183,13 @@ def _unpack_archive(archive, upload_dir): Archive(archive).extractall(upload_dir) os.remove(archive) -def _copy_video_to_task(video, db_task): +def _copy_video_to_task(video, db_task, step): job = rq.get_current_job() job.meta['status'] = 'Video is being extracted..' job.save_meta() - extractor = _FrameExtractor(video, db_task.image_quality) + extractor = _FrameExtractor(video, db_task.image_quality, + step, db_task.start_frame, db_task.stop_frame) for frame, image_orig_path in enumerate(extractor): image_dest_path = db_task.get_frame_path(frame) db_task.size += 1 @@ -183,10 +197,11 @@ def _copy_video_to_task(video, db_task): if not os.path.exists(dirname): os.makedirs(dirname) shutil.copyfile(image_orig_path, image_dest_path) + if db_task.stop_frame == 0: + db_task.stop_frame = db_task.start_frame + (db_task.size - 1) * step image = Image.open(db_task.get_frame_path(0)) models.Video.objects.create(task=db_task, path=video, - start_frame=0, stop_frame=db_task.size, step=1, width=image.width, height=image.height) image.close() @@ -351,7 +366,7 @@ def _create_thread(tid, data): if video: db_task.mode = "interpolation" video = os.path.join(upload_dir, video) - _copy_video_to_task(video, db_task) + _copy_video_to_task(video, db_task, db_task.get_frame_step()) else: db_task.mode = "annotation" _copy_images_to_task(upload_dir, db_task) diff --git a/cvat/requirements/testing.txt b/cvat/requirements/testing.txt index cb208059..b6468d78 100644 --- a/cvat/requirements/testing.txt +++ b/cvat/requirements/testing.txt @@ -1,2 +1,2 @@ --f development.txt +-r development.txt fakeredis==1.0.3 \ No newline at end of file diff --git a/docker-compose.ci.yml b/docker-compose.ci.yml index 2af5173d..b0bcb0f3 100644 --- a/docker-compose.ci.yml +++ b/docker-compose.ci.yml @@ -5,6 +5,6 @@ services: build: args: WITH_TESTS: "yes" + network: host environment: - COVERALLS_REPO_TOKEN: - + COVERALLS_REPO_TOKEN: diff --git a/utils/README.md b/utils/README.md index 68735adf..2d856c6c 100644 --- a/utils/README.md +++ b/utils/README.md @@ -4,7 +4,8 @@ ## Description This folder contains some useful utilities for Computer Vision Annotation Tool (CVAT). To read about a certain utility please choose a link: -- [Convert CVAT XML to PASCAL VOC](voc/converter.md) -- [Convert CVAT XML to MS COCO](coco/converter.md) -- [Convert CVAT XML to PNG mask](mask/converter.md) -- [Convert CVAT XML to TFRECORDS](tfrecords/converter.md) +- [Convert CVAT XML to PASCAL VOC](voc/converter.md) +- [Convert CVAT XML to MS COCO](coco/converter.md) +- [Convert CVAT XML to PNG mask](mask/converter.md) +- [Convert CVAT XML to TFRECORDS](tfrecords/converter.md) +- [Convert CVAT XML to YOLO](yolo/converter.md) diff --git a/utils/yolo/__init__.py b/utils/yolo/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/utils/yolo/converter.md b/utils/yolo/converter.md new file mode 100644 index 00000000..6ac3ec05 --- /dev/null +++ b/utils/yolo/converter.md @@ -0,0 +1,38 @@ +# Utility for converting CVAT XML annotation file to YOLO format + +## Description + +Given a CVAT XML, this script reads the CVAT XML and writes the +annotations in YOLO format into a given directory. This implementation +supports both interpolation tracks from video and annotated images. + +## Installation + +Install necessary packages and create a virtual environment. + +```bash +sudo apt-get update +sudo apt-get install -y --no-install-recommends python3-pip python3-venv python3-dev +``` + +```bash +python3 -m venv .env +. .env/bin/activate +cat requirements.txt | xargs -n 1 -L 1 pip install +``` + +## Usage + +Run the script inside the virtual environment: + +```bash +python converter.py --cvat-xml --image-dir --output-dir +``` + +Case you need download frames from annotated video file submited to CVAT: + +```bash +python converter.py --cvat-xml --output-dir --username --password +``` + +Please run `python converter.py --help` for more details. diff --git a/utils/yolo/converter.py b/utils/yolo/converter.py new file mode 100644 index 00000000..23da12b9 --- /dev/null +++ b/utils/yolo/converter.py @@ -0,0 +1,264 @@ +#!/usr/bin/env python +# +# SPDX-License-Identifier: MIT +""" +Given a CVAT XML, this script reads the CVAT XML and writes the +annotations in YOLO format into a given directory. + +This implementation supports both interpolation tracks from video and +annotated images. +""" + +import os +import argparse +import glog as log +from lxml import etree +import requests + + +def parse_args(): + """Parse arguments of command line""" + parser = argparse.ArgumentParser( + description='Convert CVAT XML annotations to YOLO format' + ) + + parser.add_argument( + '--cvat-xml', metavar='FILE', required=True, + help='input file with CVAT annotation in xml format' + ) + + parser.add_argument( + '--image-dir', metavar='DIRECTORY', required=False, + help='directory which contains original images' + ) + + parser.add_argument( + '--output-dir', metavar='DIRECTORY', required=True, + help='directory for output annotations in YOLO format' + ) + + parser.add_argument( + '--username', metavar='USERNAME', required=False, + help='Username from CVAT Login page, required to download images' + ) + + parser.add_argument( + '--password', metavar='PASSWORD', required=False, + help='Password from CVAT Login page, required to download images' + ) + + parser.add_argument( + '--labels', metavar='ILABELS', required=False, + help='Labels (separated by comma) to extract. Example: car,truck,motorcycle' + ) + + return parser.parse_args() + + +def process_cvat_xml(xml_file, image_dir, output_dir,username,password,ilabels): + """ + Transforms a single XML in CVAT format to YOLO TXT files and download images when not in IMAGE_DIR + + :param xml_file: CVAT format XML + :param image_dir: image directory of the dataset + :param output_dir: directory of annotations with YOLO format + :param username: Username used to login CVAT. Required to download images + :param password: Password used to login CVAT. Required to download images + :param ilabels: Comma separated ordered labels + :return: + """ + KNOWN_TAGS = {'box', 'image', 'attribute'} + + if (image_dir is None): + image_dir=os.path.join(output_dir,"data/obj") + os.makedirs(image_dir, exist_ok=True) + + os.makedirs(output_dir, exist_ok=True) + cvat_xml = etree.parse(xml_file) + basename = os.path.splitext( os.path.basename( xml_file ) )[0] + current_labels = {} + traintxt = "" + auto_lbl_count = 0 + + if (ilabels is not None): + vlabels=ilabels.split(',') + for _label in vlabels: + current_labels[_label]=auto_lbl_count + auto_lbl_count+=1 + + tracks= cvat_xml.findall( './/track' ) + + if (tracks is not None) and (len(tracks) > 0): + frames = {} + + for track in tracks: + trackid = int(track.get("id")) + label = track.get("label") + boxes = track.findall( './box' ) + for box in boxes: + frameid = int(box.get('frame')) + outside = int(box.get('outside')) + #occluded = int(box.get('occluded')) #currently unused + #keyframe = int(box.get('keyframe')) #currently unused + xtl = float(box.get('xtl')) + ytl = float(box.get('ytl')) + xbr = float(box.get('xbr')) + ybr = float(box.get('ybr')) + + frame = frames.get( frameid, {} ) + + if outside == 0: + frame[ trackid ] = { 'xtl': xtl, 'ytl': ytl, 'xbr': xbr, 'ybr': ybr, 'label': label } + + frames[ frameid ] = frame + + width = int(cvat_xml.find('.//original_size/width').text) + height = int(cvat_xml.find('.//original_size/height').text) + + taskid = int(cvat_xml.find('.//task/id').text) + + urlsegment = cvat_xml.find(".//segments/segment/url").text + urlbase = urlsegment.split("?")[0] + + httpclient = requests.session() + httpclient.get(urlbase) + + csrftoken = "none" + sessionid = "none" + + # Spit out a list of each object for each frame + for frameid in sorted(frames.keys()): + image_name = "%s_%08d.jpg" % (basename, frameid) + image_path = os.path.join(image_dir, image_name) + if not os.path.exists(image_path): + if username is None: + log.warn('{} image cannot be found. Is `{}` image directory correct?\n'.format(image_path, image_dir)) + else: + log.info('{} image cannot be found. Downloading from task ID {}\n'.format(image_path, taskid)) + + if sessionid == "none": + if "csrftoken" in httpclient.cookies: + csrftoken = httpclient.cookies["csrftoken"] + elif "csrf" in httpclient.cookies: + csrftoken = httpclient.cookies["csrf"] + + login_data = dict(username=username, password=password, + csrfmiddlewaretoken=csrftoken, next='/dashboard') + + urllogin = urlbase+"/auth/login" + httpclient.post(urllogin, data=login_data, + headers=dict(Referer=urllogin)) + + if ("sessionid" in httpclient.cookies): + sessionid = httpclient.cookies["sessionid"] + + url = urlbase+"/api/v1/tasks/"+str(taskid)+"/frames/"+ str(frameid) + + req = httpclient.get(url, headers=dict( + csrftoken=csrftoken, sessionid=sessionid)) + + with open(image_path, 'wb') as fo: + fo.write(req.content) + print('Url saved as %s\n' % image_path) + + + frame = frames[frameid] + + _yoloAnnotationContent="" + + objids = sorted(frame.keys()) + + for objid in objids: + + box = frame[objid] + + label = box.get('label') + xmin = float(box.get('xtl')) + ymin = float(box.get('ytl')) + xmax = float(box.get('xbr')) + ymax = float(box.get('ybr')) + + if not label in current_labels: + current_labels[label] = auto_lbl_count + auto_lbl_count+=1 + + labelid=current_labels[label] + yolo_x= (xmin + ((xmax-xmin)/2))/width + yolo_y= (ymin + ((ymax-ymin)/2))/height + yolo_w = (xmax - xmin) / width + yolo_h = (ymax - ymin) / height + + if len(_yoloAnnotationContent) != 0: + _yoloAnnotationContent += "\n" + + _yoloAnnotationContent+=str(labelid)+" "+"{:.6f}".format(yolo_x) +" "+"{:.6f}".format(yolo_y) +" "+"{:.6f}".format(yolo_w) +" "+"{:.6f}".format(yolo_h) + anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.txt') + anno_path = os.path.join(image_dir, anno_name) + + _yoloFile = open(anno_path, "w", newline="\n") + _yoloFile.write(_yoloAnnotationContent) + _yoloFile.close() + + if len(traintxt)!=0: + traintxt+="\n" + + traintxt+=image_path + + else: + for img_tag in cvat_xml.findall('image'): + image_name = img_tag.get('name') + width = img_tag.get('width') + height = img_tag.get('height') + image_path = os.path.join(image_dir, image_name) + if not os.path.exists(image_path): + log.warn('{} image cannot be found. Is `{}` image directory correct?'. + format(image_path, image_dir)) + + unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS) + if unknown_tags: + log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags)) + + _yoloAnnotationContent = "" + + for box in img_tag.findall('box'): + label = box.get('label') + xmin = float(box.get('xtl')) + ymin = float(box.get('ytl')) + xmax = float(box.get('xbr')) + ymax = float(box.get('ybr')) + + if not label in current_labels: + current_labels[label] = auto_lbl_count + auto_lbl_count += 1 + + labelid = current_labels[label] + yolo_x = (xmin + ((xmax-xmin)/2))/width + yolo_y = (ymin + ((ymax-ymin)/2))/height + yolo_w = (xmax - xmin) / width + yolo_h = (ymax - ymin) / height + + if len(_yoloAnnotationContent) != 0: + _yoloAnnotationContent += "\n" + + _yoloAnnotationContent += str(labelid)+" "+"{:.6f}".format(yolo_x) + " "+"{:.6f}".format( + yolo_y) + " "+"{:.6f}".format(yolo_w) + " "+"{:.6f}".format(yolo_h) + + anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.txt') + anno_path = os.path.join(image_dir, anno_name) + + _yoloFile = open(anno_path, "w", newline="\n") + _yoloFile.write(_yoloAnnotationContent) + _yoloFile.close() + + traintxt_file=open(output_dir+"/train.txt","w",newline="\n") + traintxt_file.write(traintxt) + traintxt_file.close() + + +def main(): + args = parse_args() + process_cvat_xml(args.cvat_xml, args.image_dir, args.output_dir, args.username,args.password,args.labels) + + +if __name__ == "__main__": + main() diff --git a/utils/yolo/requirements.txt b/utils/yolo/requirements.txt new file mode 100644 index 00000000..b76cddb6 --- /dev/null +++ b/utils/yolo/requirements.txt @@ -0,0 +1,4 @@ +argparse>=1.1 +lxml>=3.5.0 +glog>=0.3.1 +requests==2.22.0