[Datumaro] Disable lazy image caching by default (#876)

* Disable lazy image caching by default
* Deterministic cache test
* Add displacing image cache
main
zhiltsov-max 6 years ago committed by Nikita Manovich
parent c3d464c9a0
commit 9cb48ef2c2

@ -6,6 +6,8 @@
import cv2
import numpy as np
from datumaro.util.image_cache import ImageCache as _ImageCache
def load_image(path):
"""
@ -19,12 +21,35 @@ def load_image(path):
return image
class lazy_image:
def __init__(self, path, loader=load_image):
def __init__(self, path, loader=load_image, cache=None):
self.path = path
self.loader = loader
self.image = None
# Cache:
# - False: do not cache
# - None: use default (don't store in a class variable)
# - object: use this object as a cache
assert cache in [None, False] or isinstance(cache, object)
self.cache = cache
def __call__(self):
if self.image is None:
self.image = self.loader(self.path)
return self.image
image = None
image_id = id(self) # path is not necessary hashable or a file path
cache = self._get_cache()
if cache is not None:
image = self._get_cache().get(image_id)
if image is None:
image = self.loader(self.path)
if cache is not None:
cache.push(image_id, image)
return image
def _get_cache(self):
cache = self.cache
if cache is None:
cache = _ImageCache.get_instance()
elif cache == False:
return None
return cache

@ -0,0 +1,38 @@
from collections import OrderedDict
_instance = None
DEFAULT_CAPACITY = 1000
class ImageCache:
@staticmethod
def get_instance():
global _instance
if _instance is None:
_instance = ImageCache()
return _instance
def __init__(self, capacity=DEFAULT_CAPACITY):
self.capacity = int(capacity)
self.items = OrderedDict()
def push(self, item_id, image):
if self.capacity <= len(self.items):
self.items.popitem(last=True)
self.items[item_id] = image
def get(self, item_id):
default = object()
item = self.items.get(item_id, default)
if item is default:
return None
self.items.move_to_end(item_id, last=False) # naive splay tree
return item
def size(self):
return len(self.items)
def clear(self):
self.items.clear()

@ -0,0 +1,49 @@
import numpy as np
import os.path as osp
from PIL import Image
from unittest import TestCase
from datumaro.util.test_utils import TestDir
from datumaro.util.image import lazy_image
from datumaro.util.image_cache import ImageCache
class LazyImageTest(TestCase):
def test_cache_works(self):
with TestDir() as test_dir:
image = np.ones((100, 100, 3), dtype=np.uint8)
image = Image.fromarray(image).convert('RGB')
image_path = osp.join(test_dir.path, 'image.jpg')
image.save(image_path)
caching_loader = lazy_image(image_path, cache=None)
self.assertTrue(caching_loader() is caching_loader())
non_caching_loader = lazy_image(image_path, cache=False)
self.assertFalse(non_caching_loader() is non_caching_loader())
class ImageCacheTest(TestCase):
def test_cache_fifo_displacement(self):
capacity = 2
cache = ImageCache(capacity)
loaders = [lazy_image(None, loader=lambda p: object(), cache=cache)
for _ in range(capacity + 1)]
first_request = [loader() for loader in loaders[1 : ]]
loaders[0]() # pop something from the cache
second_request = [loader() for loader in loaders[2 : ]]
second_request.insert(0, loaders[1]())
matches = sum([a is b for a, b in zip(first_request, second_request)])
self.assertEqual(matches, len(first_request) - 1)
def test_global_cache_is_accessible(self):
loader = lazy_image(None, loader=lambda p: object())
ImageCache.get_instance().clear()
self.assertTrue(loader() is loader())
self.assertEqual(ImageCache.get_instance().size(), 1)
Loading…
Cancel
Save