SDK: Add an adapter layer that presents a CVAT task as a torchvision dataset (#5417)

3 years ago · 487c60ce2b
parent 82adde42aa
commit 487c60ce2b
8 changed files with 577 additions and 6 deletions
--- a/.github/workflows/full.yml
+++ b/.github/workflows/full.yml
@ -196,7 +196,7 @@ jobs:
      - name: Running REST API and SDK tests
        run: |
-          pip3 install --user /tmp/cvat_sdk/
+          pip3 install --user '/tmp/cvat_sdk/[pytorch]'
          pip3 install --user cvat-cli/
          pip3 install --user -r tests/python/requirements.txt
          pytest tests/python -s -v
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -164,7 +164,7 @@ jobs:
      - name: Running REST API and SDK tests
        run: |
-          pip3 install --user /tmp/cvat_sdk/
+          pip3 install --user '/tmp/cvat_sdk/[pytorch]'
          pip3 install --user cvat-cli/
          pip3 install --user -r tests/python/requirements.txt
          pytest tests/python/ -s -v
--- a/.github/workflows/schedule.yml
+++ b/.github/workflows/schedule.yml
@ -235,7 +235,7 @@ jobs:
          gen/generate.sh
          cd ..
-          pip3 install --user cvat-sdk/
+          pip3 install --user 'cvat-sdk/[pytorch]'
          pip3 install --user cvat-cli/
          pip3 install --user -r tests/python/requirements.txt
          pytest tests/python/
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -21,6 +21,8 @@ from online detectors & interactors) (<https://github.com/opencv/cvat/pull/4543>
 - Authentication with social accounts google & github (<https://github.com/opencv/cvat/pull/5147>, <https://github.com/opencv/cvat/pull/5181>, <https://github.com/opencv/cvat/pull/5295>)
 - REST API tests to export job datasets & annotations and validate their structure  (<https://github.com/opencv/cvat/pull/5160>)
 - Propagation backward on UI (<https://github.com/opencv/cvat/pull/5355>)
 - A PyTorch dataset adapter layer in the SDK
  (<https://github.com/opencv/cvat/pull/5417>)
 ### Changed
 - `api/docs`, `api/swagger`, `api/schema`, `server/about` endpoints now allow unauthorized access (<https://github.com/opencv/cvat/pull/4928>, <https://github.com/opencv/cvat/pull/4935>)
--- a/cvat-sdk/cvat_sdk/pytorch/init.py
+++ b/cvat-sdk/cvat_sdk/pytorch/init.py
@ -0,0 +1,359 @@
 import base64
 import collections
 import json
 import os
 import shutil
 import types
 import zipfile
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
 from typing import (
    Callable,
    Dict,
    FrozenSet,
    List,
    Mapping,
    Optional,
    Sequence,
    Tuple,
    Type,
    TypeVar,
 )
 import appdirs
 import attrs
 import attrs.validators
 import PIL.Image
 import torchvision.datasets
 from typing_extensions import TypedDict
 import cvat_sdk.core
 import cvat_sdk.core.exceptions
 from cvat_sdk.api_client.model_utils import to_json
 from cvat_sdk.core.utils import atomic_writer
 from cvat_sdk.models import DataMetaRead, LabeledData, LabeledImage, LabeledShape, TaskRead
 _ModelType = TypeVar("_ModelType")
 _CACHE_DIR = Path(appdirs.user_cache_dir("cvat-sdk", "CVAT.ai"))
 _NUM_DOWNLOAD_THREADS = 4
 class UnsupportedDatasetError(cvat_sdk.core.exceptions.CvatSdkException):
    pass
@attrs.frozen
 class FrameAnnotations:
    """
    Contains annotations that pertain to a single frame.
    """
    tags: List[LabeledImage] = attrs.Factory(list)
    shapes: List[LabeledShape] = attrs.Factory(list)
@attrs.frozen
 class Target:
    """
    Non-image data for a dataset sample.
    """
    annotations: FrameAnnotations
    """Annotations for the frame corresponding to the sample."""
    label_id_to_index: Mapping[int, int]
    """
    A mapping from label_id values in `LabeledImage` and `LabeledShape` objects
    to an index in the range [0, num_labels), where num_labels is the number of labels
    defined in the task. This mapping is consistent across all samples for a given task.
    """
 class TaskVisionDataset(torchvision.datasets.VisionDataset):
    """
    Represents a task on a CVAT server as a PyTorch Dataset.
    This dataset contains one sample for each frame in the task, in the same
    order as the frames are in the task. Deleted frames are omitted.
    Before transforms are applied, each sample is a tuple of
    (image, target), where:
    * image is a `PIL.Image.Image` object for the corresponding frame.
    * target is a `Target` object containing annotations for the frame.
    This class caches all data and annotations for the task on the local file system
    during construction. If the task is updated on the server, the cache is updated.
    Limitations:
    * Only tasks with image (not video) data are supported at the moment.
    * Track annotations are currently not accessible.
    """
    def __init__(
        self,
        client: cvat_sdk.core.Client,
        task_id: int,
        *,
        transforms: Optional[Callable] = None,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
    ) -> None:
        """
        Creates a dataset corresponding to the task with ID `task_id` on the
        server that `client` is connected to.
        `transforms`, `transform` and `target_transforms` are optional transformation
        functions; see the documentation for `torchvision.datasets.VisionDataset` for
        more information.
        """
        self._logger = client.logger
        self._logger.info(f"Fetching task {task_id}...")
        self._task = client.tasks.retrieve(task_id)
        if not self._task.size or not self._task.data_chunk_size:
            raise UnsupportedDatasetError("The task has no data")
        if self._task.data_original_chunk_type != "imageset":
            raise UnsupportedDatasetError(
                f"{self.__class__.__name__} only supports tasks with image chunks;"
                f" current chunk type is {self._task.data_original_chunk_type!r}"
            )
        # Base64-encode the name to avoid FS-unsafe characters (like slashes)
        server_dir_name = (
            base64.urlsafe_b64encode(client.api_map.host.encode()).rstrip(b"=").decode()
        )
        server_dir = _CACHE_DIR / f"servers/{server_dir_name}"
        self._task_dir = server_dir / f"tasks/{self._task.id}"
        self._initialize_task_dir()
        super().__init__(
            os.fspath(self._task_dir),
            transforms=transforms,
            transform=transform,
            target_transform=target_transform,
        )
        data_meta = self._ensure_model(
            "data_meta.json", DataMetaRead, self._task.get_meta, "data metadata"
        )
        self._active_frame_indexes = sorted(
            set(range(self._task.size)) - set(data_meta.deleted_frames)
        )
        self._logger.info("Downloading chunks...")
        self._chunk_dir = self._task_dir / "chunks"
        self._chunk_dir.mkdir(exist_ok=True, parents=True)
        needed_chunks = {
            index // self._task.data_chunk_size for index in self._active_frame_indexes
        }
        with ThreadPoolExecutor(_NUM_DOWNLOAD_THREADS) as pool:
            for _ in pool.map(self._ensure_chunk, sorted(needed_chunks)):
                # just need to loop through all results so that any exceptions are propagated
                pass
        self._logger.info("All chunks downloaded")
        self._label_id_to_index = types.MappingProxyType(
            {
                label["id"]: label_index
                for label_index, label in enumerate(sorted(self._task.labels, key=lambda l: l.id))
            }
        )
        annotations = self._ensure_model(
            "annotations.json", LabeledData, self._task.get_annotations, "annotations"
        )
        self._frame_annotations: Dict[int, FrameAnnotations] = collections.defaultdict(
            FrameAnnotations
        )
        for tag in annotations.tags:
            self._frame_annotations[tag.frame].tags.append(tag)
        for shape in annotations.shapes:
            self._frame_annotations[shape.frame].shapes.append(shape)
        # TODO: tracks?
    def _initialize_task_dir(self) -> None:
        task_json_path = self._task_dir / "task.json"
        try:
            with open(task_json_path, "rb") as task_json_file:
                saved_task = TaskRead._new_from_openapi_data(**json.load(task_json_file))
        except Exception:
            self._logger.info("Task is not yet cached or the cache is corrupted")
            # If the cache was corrupted, the directory might already be there; clear it.
            if self._task_dir.exists():
                shutil.rmtree(self._task_dir)
        else:
            if saved_task.updated_date < self._task.updated_date:
                self._logger.info(
                    "Task has been updated on the server since it was cached; purging the cache"
                )
                shutil.rmtree(self._task_dir)
        self._task_dir.mkdir(exist_ok=True, parents=True)
        with atomic_writer(task_json_path, "w", encoding="UTF-8") as task_json_file:
            json.dump(to_json(self._task._model), task_json_file, indent=4)
            print(file=task_json_file)  # add final newline
    def _ensure_chunk(self, chunk_index: int) -> None:
        chunk_path = self._chunk_dir / f"{chunk_index}.zip"
        if chunk_path.exists():
            return  # already downloaded previously
        self._logger.info(f"Downloading chunk #{chunk_index}...")
        with atomic_writer(chunk_path, "wb") as chunk_file:
            self._task.download_chunk(chunk_index, chunk_file, quality="original")
    def _ensure_model(
        self,
        filename: str,
        model_type: Type[_ModelType],
        download: Callable[[], _ModelType],
        model_description: str,
    ) -> _ModelType:
        path = self._task_dir / filename
        try:
            with open(path, "rb") as f:
                model = model_type._new_from_openapi_data(**json.load(f))
            self._logger.info(f"Loaded {model_description} from cache")
            return model
        except FileNotFoundError:
            pass
        except Exception:
            self._logger.warning(f"Failed to load {model_description} from cache", exc_info=True)
        self._logger.info(f"Downloading {model_description}...")
        model = download()
        self._logger.info(f"Downloaded {model_description}")
        with atomic_writer(path, "w", encoding="UTF-8") as f:
            json.dump(to_json(model), f, indent=4)
            print(file=f)  # add final newline
        return model
    def __getitem__(self, sample_index: int):
        """
        Returns the sample with index `sample_index`.
        `sample_index` must satisfy the condition `0 <= sample_index < len(self)`.
        """
        frame_index = self._active_frame_indexes[sample_index]
        chunk_index = frame_index // self._task.data_chunk_size
        member_index = frame_index % self._task.data_chunk_size
        with zipfile.ZipFile(self._chunk_dir / f"{chunk_index}.zip", "r") as chunk_zip:
            with chunk_zip.open(chunk_zip.infolist()[member_index]) as chunk_member:
                sample_image = PIL.Image.open(chunk_member)
                sample_image.load()
        sample_target = Target(
            annotations=self._frame_annotations[frame_index],
            label_id_to_index=self._label_id_to_index,
        )
        if self.transforms:
            sample_image, sample_target = self.transforms(sample_image, sample_target)
        return sample_image, sample_target
    def __len__(self) -> int:
        """Returns the number of samples in the dataset."""
        return len(self._active_frame_indexes)
@attrs.frozen
 class ExtractSingleLabelIndex:
    """
    A target transform that takes a `Target` object and produces a single label index
    based on the tag in that object.
    This makes the dataset samples compatible with the image classification networks
    in torchvision.
    If the annotations contain no tags, or multiple tags, raises a `ValueError`.
    """
    def __call__(self, target: Target) -> int:
        tags = target.annotations.tags
        if not tags:
            raise ValueError("sample has no tags")
        if len(tags) > 1:
            raise ValueError("sample has multiple tags")
        return target.label_id_to_index[tags[0].label_id]
 class LabeledBoxes(TypedDict):
    boxes: Sequence[Tuple[float, float, float, float]]
    labels: Sequence[int]
 _SUPPORTED_SHAPE_TYPES = frozenset(["rectangle", "polygon", "polyline", "points", "ellipse"])
@attrs.frozen
 class ExtractBoundingBoxes:
    """
    A target transform that takes a `Target` object and returns a dictionary compatible
    with the object detection networks in torchvision.
    The dictionary contains the following entries:
    "boxes": a sequence of (xmin, ymin, xmax, ymax) tuples, one for each shape
    in the annotations.
    "labels": a sequence of corresponding label indices.
    Limitations:
    * Only the following shape types are supported: rectangle, polygon, polyline,
      points, ellipse.
    * Rotated shapes are not supported.
    Unsupported shapes will cause a `UnsupportedDatasetError` exception to be
    raised unless they are filtered out by `include_shape_types`.
    """
    include_shape_types: FrozenSet[str] = attrs.field(
        converter=frozenset,
        validator=attrs.validators.deep_iterable(attrs.validators.in_(_SUPPORTED_SHAPE_TYPES)),
        kw_only=True,
    )
    """Shapes whose type is not in this set will be ignored."""
    def __call__(self, target: Target) -> LabeledBoxes:
        boxes = []
        labels = []
        for shape in target.annotations.shapes:
            if shape.type.value not in self.include_shape_types:
                continue
            if shape.rotation != 0:
                raise UnsupportedDatasetError("Rotated shapes are not supported")
            x_coords = shape.points[0::2]
            y_coords = shape.points[1::2]
            boxes.append((min(x_coords), min(y_coords), max(x_coords), max(y_coords)))
            labels.append(target.label_id_to_index[shape.label_id])
        return LabeledBoxes(boxes=boxes, labels=labels)
--- a/cvat-sdk/gen/templates/openapi-generator/setup.mustache
+++ b/cvat-sdk/gen/templates/openapi-generator/setup.mustache
@ -76,6 +76,9 @@ setup(
    ],
    python_requires="{{{generatorLanguageVersion}}}",
    install_requires=BASE_REQUIREMENTS,
    extras_require={
        "pytorch": ['appdirs', 'torch', 'torchvision'],
    },
    package_dir={"": "."},
    packages=find_packages(include=["cvat_sdk*"]),
    include_package_data=True,
--- a/tests/python/sdk/test_pytorch.py
+++ b/tests/python/sdk/test_pytorch.py
@ -0,0 +1,207 @@
 # Copyright (C) 2022 CVAT.ai Corporation
 #
 # SPDX-License-Identifier: MIT
 import io
 import os
 from logging import Logger
 from pathlib import Path
 from typing import Tuple
 import pytest
 from cvat_sdk import Client, models
 from cvat_sdk.core.proxies.tasks import ResourceType
 try:
    import cvat_sdk.pytorch as cvatpt
    import PIL.Image
    import torch
    import torchvision.transforms
    import torchvision.transforms.functional as TF
    from torch.utils.data import DataLoader
 except ImportError:
    cvatpt = None
 from shared.utils.helpers import generate_image_files
@pytest.mark.skipif(cvatpt is None, reason="PyTorch dependencies are not installed")
 class TestTaskVisionDataset:
    @pytest.fixture(autouse=True)
    def setup(
        self,
        monkeypatch: pytest.MonkeyPatch,
        tmp_path: Path,
        fxt_login: Tuple[Client, str],
        fxt_logger: Tuple[Logger, io.StringIO],
        fxt_stdout: io.StringIO,
    ):
        self.tmp_path = tmp_path
        logger, self.logger_stream = fxt_logger
        self.stdout = fxt_stdout
        self.client, self.user = fxt_login
        self.client.logger = logger
        api_client = self.client.api_client
        for k in api_client.configuration.logger:
            api_client.configuration.logger[k] = logger
        monkeypatch.setattr(cvatpt, "_CACHE_DIR", self.tmp_path / "cache")
        self._create_task()
        yield
    def _create_task(self):
        self.images = generate_image_files(10)
        image_dir = self.tmp_path / "images"
        image_dir.mkdir()
        image_paths = []
        for image in self.images:
            image_path = image_dir / image.name
            image_path.write_bytes(image.getbuffer())
            image_paths.append(image_path)
        self.task = self.client.tasks.create_from_data(
            models.TaskWriteRequest(
                "PyTorch integration test task",
                labels=[
                    models.PatchedLabelRequest(name="person"),
                    models.PatchedLabelRequest(name="car"),
                ],
            ),
            ResourceType.LOCAL,
            list(map(os.fspath, image_paths)),
            data_params={"chunk_size": 3},
        )
        self.label_ids = sorted(l.id for l in self.task.labels)
        self.task.update_annotations(
            models.PatchedLabeledDataRequest(
                tags=[
                    models.LabeledImageRequest(frame=5, label_id=self.label_ids[0]),
                    models.LabeledImageRequest(frame=6, label_id=self.label_ids[1]),
                    models.LabeledImageRequest(frame=8, label_id=self.label_ids[0]),
                    models.LabeledImageRequest(frame=8, label_id=self.label_ids[1]),
                ],
                shapes=[
                    models.LabeledShapeRequest(
                        frame=6,
                        label_id=self.label_ids[1],
                        type=models.ShapeType("rectangle"),
                        points=[1.0, 2.0, 3.0, 4.0],
                    ),
                    models.LabeledShapeRequest(
                        frame=7,
                        label_id=self.label_ids[0],
                        type=models.ShapeType("points"),
                        points=[1.1, 2.1, 3.1, 4.1],
                    ),
                ],
            )
        )
    def test_basic(self):
        dataset = cvatpt.TaskVisionDataset(self.client, self.task.id)
        assert len(dataset) == self.task.size
        for index, (sample_image, sample_target) in enumerate(dataset):
            sample_image_tensor = TF.pil_to_tensor(sample_image)
            reference_tensor = TF.pil_to_tensor(PIL.Image.open(self.images[index]))
            assert torch.equal(sample_image_tensor, reference_tensor)
            for index, label_id in enumerate(self.label_ids):
                assert sample_target.label_id_to_index[label_id] == index
        assert not dataset[0][1].annotations.tags
        assert not dataset[0][1].annotations.shapes
        assert len(dataset[5][1].annotations.tags) == 1
        assert dataset[5][1].annotations.tags[0].label_id == self.label_ids[0]
        assert not dataset[5][1].annotations.shapes
        assert len(dataset[6][1].annotations.tags) == 1
        assert dataset[6][1].annotations.tags[0].label_id == self.label_ids[1]
        assert len(dataset[6][1].annotations.shapes) == 1
        assert dataset[6][1].annotations.shapes[0].type.value == "rectangle"
        assert dataset[6][1].annotations.shapes[0].points == [1.0, 2.0, 3.0, 4.0]
        assert not dataset[7][1].annotations.tags
        assert len(dataset[7][1].annotations.shapes) == 1
        assert dataset[7][1].annotations.shapes[0].type.value == "points"
        assert dataset[7][1].annotations.shapes[0].points == [1.1, 2.1, 3.1, 4.1]
    def test_deleted_frame(self):
        self.task.remove_frames_by_ids([1])
        dataset = cvatpt.TaskVisionDataset(self.client, self.task.id)
        assert len(dataset) == self.task.size - 1
        # sample #0 is still frame #0
        assert torch.equal(
            TF.pil_to_tensor(dataset[0][0]), TF.pil_to_tensor(PIL.Image.open(self.images[0]))
        )
        # sample #1 is now frame #2
        assert torch.equal(
            TF.pil_to_tensor(dataset[1][0]), TF.pil_to_tensor(PIL.Image.open(self.images[2]))
        )
        # sample #4 is now frame #5
        assert len(dataset[4][1].annotations.tags) == 1
        assert dataset[4][1].annotations.tags[0].label_id == self.label_ids[0]
        assert not dataset[4][1].annotations.shapes
    def test_extract_single_label_index(self):
        dataset = cvatpt.TaskVisionDataset(
            self.client,
            self.task.id,
            transform=torchvision.transforms.PILToTensor(),
            target_transform=cvatpt.ExtractSingleLabelIndex(),
        )
        assert dataset[5][1] == 0
        assert dataset[6][1] == 1
        with pytest.raises(ValueError):
            # no tags
            _ = dataset[7]
        with pytest.raises(ValueError):
            # multiple tags
            _ = dataset[8]
        # make sure the samples can be batched with the default collater
        loader = DataLoader(dataset, batch_size=2, sampler=[5, 6])
        batch = next(iter(loader))
        assert torch.equal(batch[0][0], TF.pil_to_tensor(PIL.Image.open(self.images[5])))
        assert torch.equal(batch[0][1], TF.pil_to_tensor(PIL.Image.open(self.images[6])))
        assert torch.equal(batch[1], torch.tensor([0, 1]))
    def test_extract_bounding_boxes(self):
        dataset = cvatpt.TaskVisionDataset(
            self.client,
            self.task.id,
            transform=torchvision.transforms.PILToTensor(),
            target_transform=cvatpt.ExtractBoundingBoxes(include_shape_types={"rectangle"}),
        )
        assert dataset[0][1] == {"boxes": [], "labels": []}
        assert dataset[6][1] == {"boxes": [(1.0, 2.0, 3.0, 4.0)], "labels": [1]}
        assert dataset[7][1] == {"boxes": [], "labels": []}  # points are filtered out
    def test_transforms(self):
        dataset = cvatpt.TaskVisionDataset(
            self.client,
            self.task.id,
            transforms=lambda x, y: (y, x),
        )
        assert isinstance(dataset[0][0], cvatpt.Target)
        assert isinstance(dataset[0][1], PIL.Image.Image)
--- a/tests/python/shared/utils/helpers.py
+++ b/tests/python/shared/utils/helpers.py
@ -8,9 +8,9 @@ from typing import List
 from PIL import Image
-def generate_image_file(filename="image.png", size=(50, 50)):
+def generate_image_file(filename="image.png", size=(50, 50), color=(0, 0, 0)):
    f = BytesIO()
-    image = Image.new("RGB", size=size)
+    image = Image.new("RGB", size=size, color=color)
    image.save(f, "jpeg")
    f.name = filename
    f.seek(0)
@ -21,7 +21,7 @@ def generate_image_file(filename="image.png", size=(50, 50)):
 def generate_image_files(count) -> List[BytesIO]:
    images = []
    for i in range(count):
-        image = generate_image_file(f"{i}.jpeg")
+        image = generate_image_file(f"{i}.jpeg", color=(i, i, i))
        images.append(image)
    return images