From 9cb48ef2c2323c5de12d03aafc84fec577ac31e5 Mon Sep 17 00:00:00 2001 From: zhiltsov-max Date: Thu, 28 Nov 2019 16:24:48 +0300 Subject: [PATCH] [Datumaro] Disable lazy image caching by default (#876) * Disable lazy image caching by default * Deterministic cache test * Add displacing image cache --- datumaro/datumaro/util/image.py | 35 ++++++++++++++++--- datumaro/datumaro/util/image_cache.py | 38 +++++++++++++++++++++ datumaro/tests/test_images.py | 49 +++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 5 deletions(-) create mode 100644 datumaro/datumaro/util/image_cache.py create mode 100644 datumaro/tests/test_images.py diff --git a/datumaro/datumaro/util/image.py b/datumaro/datumaro/util/image.py index 8e1794aa..de37b4e8 100644 --- a/datumaro/datumaro/util/image.py +++ b/datumaro/datumaro/util/image.py @@ -6,6 +6,8 @@ import cv2 import numpy as np +from datumaro.util.image_cache import ImageCache as _ImageCache + def load_image(path): """ @@ -19,12 +21,35 @@ def load_image(path): return image class lazy_image: - def __init__(self, path, loader=load_image): + def __init__(self, path, loader=load_image, cache=None): self.path = path self.loader = loader - self.image = None + + # Cache: + # - False: do not cache + # - None: use default (don't store in a class variable) + # - object: use this object as a cache + assert cache in [None, False] or isinstance(cache, object) + self.cache = cache def __call__(self): - if self.image is None: - self.image = self.loader(self.path) - return self.image \ No newline at end of file + image = None + image_id = id(self) # path is not necessary hashable or a file path + + cache = self._get_cache() + if cache is not None: + image = self._get_cache().get(image_id) + + if image is None: + image = self.loader(self.path) + if cache is not None: + cache.push(image_id, image) + return image + + def _get_cache(self): + cache = self.cache + if cache is None: + cache = _ImageCache.get_instance() + elif cache == False: + return None + return cache \ No newline at end of file diff --git a/datumaro/datumaro/util/image_cache.py b/datumaro/datumaro/util/image_cache.py new file mode 100644 index 00000000..1d5a5d6b --- /dev/null +++ b/datumaro/datumaro/util/image_cache.py @@ -0,0 +1,38 @@ +from collections import OrderedDict + + +_instance = None + +DEFAULT_CAPACITY = 1000 + +class ImageCache: + @staticmethod + def get_instance(): + global _instance + if _instance is None: + _instance = ImageCache() + return _instance + + def __init__(self, capacity=DEFAULT_CAPACITY): + self.capacity = int(capacity) + self.items = OrderedDict() + + def push(self, item_id, image): + if self.capacity <= len(self.items): + self.items.popitem(last=True) + self.items[item_id] = image + + def get(self, item_id): + default = object() + item = self.items.get(item_id, default) + if item is default: + return None + + self.items.move_to_end(item_id, last=False) # naive splay tree + return item + + def size(self): + return len(self.items) + + def clear(self): + self.items.clear() \ No newline at end of file diff --git a/datumaro/tests/test_images.py b/datumaro/tests/test_images.py new file mode 100644 index 00000000..8c05d614 --- /dev/null +++ b/datumaro/tests/test_images.py @@ -0,0 +1,49 @@ +import numpy as np +import os.path as osp +from PIL import Image + +from unittest import TestCase + +from datumaro.util.test_utils import TestDir +from datumaro.util.image import lazy_image +from datumaro.util.image_cache import ImageCache + + +class LazyImageTest(TestCase): + def test_cache_works(self): + with TestDir() as test_dir: + image = np.ones((100, 100, 3), dtype=np.uint8) + image = Image.fromarray(image).convert('RGB') + + image_path = osp.join(test_dir.path, 'image.jpg') + image.save(image_path) + + caching_loader = lazy_image(image_path, cache=None) + self.assertTrue(caching_loader() is caching_loader()) + + non_caching_loader = lazy_image(image_path, cache=False) + self.assertFalse(non_caching_loader() is non_caching_loader()) + +class ImageCacheTest(TestCase): + def test_cache_fifo_displacement(self): + capacity = 2 + cache = ImageCache(capacity) + + loaders = [lazy_image(None, loader=lambda p: object(), cache=cache) + for _ in range(capacity + 1)] + + first_request = [loader() for loader in loaders[1 : ]] + loaders[0]() # pop something from the cache + + second_request = [loader() for loader in loaders[2 : ]] + second_request.insert(0, loaders[1]()) + + matches = sum([a is b for a, b in zip(first_request, second_request)]) + self.assertEqual(matches, len(first_request) - 1) + + def test_global_cache_is_accessible(self): + loader = lazy_image(None, loader=lambda p: object()) + + ImageCache.get_instance().clear() + self.assertTrue(loader() is loader()) + self.assertEqual(ImageCache.get_instance().size(), 1) \ No newline at end of file