library/_functools.py at trunk · bernsteinbear.com/skybison

bernsteinbear.com / skybison
fork atom
this repo has no description
fork atom
skybison / library / _functools.py
at trunk 217 lines 8.1 kB view raw
wrap content
Max Bernstein Add license headers 4y ago
29d072a3
  1# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
  2from _thread import RLock
  3from types import MethodType
  4
  5################################################################################
  6### LRU Cache function decorator
  7################################################################################
  8
  9
 10class _HashedSeq(list):
 11    """This class guarantees that hash() will be called no more than once
 12    per element.  This is important because the lru_cache() will hash
 13    the key multiple times on a cache miss.
 14
 15    """
 16
 17    __slots__ = "hashvalue"
 18
 19    def __init__(self, tup, hash=hash):
 20        self[:] = tup
 21        self.hashvalue = hash(tup)
 22
 23    def __hash__(self):
 24        return self.hashvalue
 25
 26
 27kwd_mark = (object(),)
 28fasttypes = {int, str}
 29
 30
 31def _make_key(args, kwds, typed):
 32    """Make a cache key from optionally typed positional and keyword arguments
 33
 34    The key is constructed in a way that is flat as possible rather than
 35    as a nested structure that would take more memory.
 36
 37    If there is only a single argument and its data type is known to cache
 38    its hash value, then that argument is returned without a wrapper.  This
 39    saves space and improves lookup speed.
 40
 41    """
 42    # All of code below relies on kwds preserving the order input by the user.
 43    # Formerly, we sorted() the kwds before looping.  The new way is *much*
 44    # faster; however, it means that f(x=1, y=2) will now be treated as a
 45    # distinct call from f(y=2, x=1) which will be cached separately.
 46    key = args
 47    if kwds:
 48        key += kwd_mark
 49        for item in kwds.items():
 50            key += item
 51    if typed:
 52        key += tuple(type(v) for v in args)
 53        if kwds:
 54            key += tuple(type(v) for v in kwds.values())
 55    elif len(key) == 1 and type(key[0]) in fasttypes:
 56        return key[0]
 57    return _HashedSeq(key)
 58
 59
 60_sentinel = object()
 61
 62
 63class _lru_cache_wrapper_base:
 64    def __init__(self, user_function, typed, _CacheInfo):
 65        self._user_function = user_function
 66        self._typed = typed
 67        self._CacheInfo = _CacheInfo
 68        self._cache = {}
 69        self._cache_get = self._cache.get  # bound method to lookup a key or return None
 70        self._hits = 0
 71        self._misses = 0
 72
 73    def cache_info(self, maxsize, cache_len):
 74        """Report cache statistics"""
 75        return self._CacheInfo(self._hits, self._misses, maxsize, cache_len)
 76
 77    def __get__(self, instance, owner):
 78        if instance is None:
 79            return self
 80        return MethodType(self, instance)
 81
 82
 83class _uncached_lru_cache_wrapper(_lru_cache_wrapper_base):
 84    def __init__(self, user_function, typed, _CacheInfo):
 85        super().__init__(user_function, typed, _CacheInfo)
 86
 87    def cache_info(self):
 88        return super().cache_info(0, 0)
 89
 90    def cache_clear(self):
 91        """Clear the cache and cache statistics"""
 92        self._misses = 0
 93
 94    def __call__(self, *args, **kwds):
 95        # No caching -- just a statistics update
 96        self._misses += 1
 97        result = self._user_function(*args, **kwds)
 98        return result
 99
100
101class _infinite_lru_cache_wrapper(_lru_cache_wrapper_base):
102    def __init__(self, user_function, typed, _CacheInfo):
103        super().__init__(user_function, typed, _CacheInfo)
104        self._cache_len = self._cache.__len__  # get cache size without calling len()
105
106    def cache_info(self):
107        return super().cache_info(None, self._cache_len())
108
109    def cache_clear(self):
110        """Clear the cache and cache statistics"""
111        self._cache.clear()
112        self._hits = 0
113        self._misses = 0
114
115    def __call__(self, *args, **kwds):
116        # Simple caching without ordering or size limit
117        key = _make_key(args, kwds, self._typed)
118        result = self._cache_get(key, _sentinel)
119        if result is not _sentinel:
120            self._hits += 1
121            return result
122        self._misses += 1
123        result = self._user_function(*args, **kwds)
124        self._cache[key] = result
125        return result
126
127
128class _bounded_lru_cache_wrapper(_lru_cache_wrapper_base):
129    def __init__(self, user_function, maxsize, typed, _CacheInfo):
130        super().__init__(user_function, typed, _CacheInfo)
131        self._maxsize = maxsize
132        self._full = False
133        self._cache_len = self._cache.__len__  # get cache size without calling len()
134        self._lock = RLock()  # because linkedlist updates aren't threadsafe
135        self._root = []  # root of the circular doubly linked list
136        self._root[:] = [
137            self._root,
138            self._root,
139            None,
140            None,
141        ]  # initialize by pointing to self
142
143    def cache_info(self):
144        return super().cache_info(self._maxsize, self._cache_len())
145
146    def cache_clear(self):
147        """Clear the cache and cache statistics"""
148        with self._lock:
149            self._cache.clear()
150            self._root[:] = [self._root, self._root, None, None]
151            self._hits = self._misses = 0
152            self._full = False
153
154    def __call__(self, *args, **kwds):
155        # Size limited caching that tracks accesses by recency
156        PREV, NEXT, KEY, RESULT = 0, 1, 2, 3
157        key = _make_key(args, kwds, self._typed)
158        with self._lock:
159            link = self._cache_get(key)
160            if link is not None:
161                # Move the link to the front of the circular queue
162                link_prev, link_next, _key, result = link
163                link_prev[NEXT] = link_next
164                link_next[PREV] = link_prev
165                last = self._root[PREV]
166                last[NEXT] = self._root[PREV] = link
167                link[PREV] = last
168                link[NEXT] = self._root
169                self._hits += 1
170                return result
171            self._misses += 1
172        result = self._user_function(*args, **kwds)
173        with self._lock:
174            if key in self._cache:
175                # Getting here means that this same key was added to the
176                # cache while the lock was released.  Since the link
177                # update is already done, we need only return the
178                # computed result and update the count of misses.
179                pass
180            elif self._full:
181                # Use the old root to store the new key and result.
182                oldroot = self._root
183                oldroot[KEY] = key
184                oldroot[RESULT] = result
185                # Empty the oldest link and make it the new root.
186                # Keep a reference to the old key and old result to
187                # prevent their ref counts from going to zero during the
188                # update. That will prevent potentially arbitrary object
189                # clean-up code (i.e. __del__) from running while we're
190                # still adjusting the links.
191                self._root = oldroot[NEXT]
192                oldkey = self._root[KEY]
193                self._root[KEY] = self._root[RESULT] = None
194                # Now update the cache dictionary.
195                del self._cache[oldkey]
196                # Save the potentially reentrant cache[key] assignment
197                # for last, after the root and links have been put in
198                # a consistent state.
199                self._cache[key] = oldroot
200            else:
201                # Put result in a new link at the front of the queue.
202                last = self._root[PREV]
203                link = [last, self._root, key, result]
204                last[NEXT] = self._root[PREV] = self._cache[key] = link
205                # Use the cache_len bound method instead of the len() function
206                # which could potentially be wrapped in an lru_cache itself.
207                self._full = self._cache_len() >= self._maxsize
208        return result
209
210
211def _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo):
212    if maxsize == 0:
213        return _uncached_lru_cache_wrapper(user_function, typed, _CacheInfo)
214    elif maxsize is None:
215        return _infinite_lru_cache_wrapper(user_function, typed, _CacheInfo)
216    else:
217        return _bounded_lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo)