Skip to content

Commit

Permalink
Allow sorting query parameters. Gives a nice increase in cache hits f…
Browse files Browse the repository at this point in the history
…or naive apps that send unordered queries.
  • Loading branch information
elnuno committed Mar 22, 2017
1 parent 6abf016 commit 2c6f027
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 13 deletions.
6 changes: 5 additions & 1 deletion cachecontrol/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,20 @@ def __init__(self, cache=None,
serializer=None,
heuristic=None,
cacheable_methods=None,
sort_query=False,
*args, **kw):
super(CacheControlAdapter, self).__init__(*args, **kw)
self.cache = cache or DictCache()
self.heuristic = heuristic
self.cacheable_methods = cacheable_methods or ('GET',)
self.sort_query = sort_query

controller_factory = controller_class or CacheController
self.controller = controller_factory(
self.cache,
cache_etags=cache_etags,
serializer=serializer,
sort_query=sort_query
)

def send(self, request, cacheable_methods=None, **kw):
Expand Down Expand Up @@ -117,7 +120,8 @@ def _update_chunk_length(self):

# See if we should invalidate the cache.
if request.method in self.invalidating_methods and resp.ok:
cache_url = self.controller.cache_url(request.url)
cache_url = self.controller.cache_url(request.url,
sort_query=self.sort_query)
self.cache.delete(cache_url)

# Give the request a from_cache attr to let people use it
Expand Down
4 changes: 2 additions & 2 deletions cachecontrol/caches/file_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ def delete(self, key):
pass


def url_to_file_path(url, filecache):
def url_to_file_path(url, filecache, sort_query=False):
"""Return the file cache path based on the URL.
This does not ensure the file exists!
"""
key = CacheController.cache_url(url)
key = CacheController.cache_url(url, sort_query=sort_query)
return filecache._fn(key)
25 changes: 17 additions & 8 deletions cachecontrol/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,15 @@ class CacheController(object):
"""An interface to see if request should cached or not.
"""
def __init__(self, cache=None, cache_etags=True, serializer=None,
status_codes=None):
status_codes=None, sort_query=False):
self.cache = cache or DictCache()
self.cache_etags = cache_etags
self.serializer = serializer or Serializer()
self.cacheable_status_codes = status_codes or (200, 203, 300, 301)
self.sort_query = sort_query

@classmethod
def _urlnorm(cls, uri):
def _urlnorm(cls, uri, sort_query=False):
"""Normalize the URL to create a safe key for the cache"""
(scheme, authority, path, query, fragment) = parse_uri(uri)
if not scheme or not authority:
Expand All @@ -50,6 +51,14 @@ def _urlnorm(cls, uri):
if not path:
path = "/"

# Sorting the query might induce behavior changes in the response.
# Use with care. However, assuming param randomization, a query with
# four params has a 96% chance of missing the cache on the second
# request if a response has already been recorded. The chance of a
# hit grows to 50% after a dozen requests.
if query and sort_query:
query = '&'.join(sorted(query.split('&')))

# Could do syntax based normalization of the URI before
# computing the digest. See Section 6.2.2 of Std 66.
request_uri = query and "?".join([path, query]) or path
Expand All @@ -58,8 +67,8 @@ def _urlnorm(cls, uri):
return defrag_uri

@classmethod
def cache_url(cls, uri):
return cls._urlnorm(uri)
def cache_url(cls, uri, sort_query=False):
return cls._urlnorm(uri, sort_query=sort_query)

def parse_cache_control(self, headers):
"""
Expand Down Expand Up @@ -90,7 +99,7 @@ def cached_request(self, request):
Return a cached response if it exists in the cache, otherwise
return False.
"""
cache_url = self.cache_url(request.url)
cache_url = self.cache_url(request.url, sort_query=self.sort_query)
logger.debug('Looking up "%s" in the cache', cache_url)
cc = self.parse_cache_control(request.headers)

Expand Down Expand Up @@ -207,7 +216,7 @@ def cached_request(self, request):
return False

def conditional_headers(self, request):
cache_url = self.cache_url(request.url)
cache_url = self.cache_url(request.url, sort_query=self.sort_query)
resp = self.serializer.loads(request, self.cache.get(cache_url))
new_headers = {}

Expand Down Expand Up @@ -255,7 +264,7 @@ def cache_response(self, request, response, body=None,
cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(response_headers)

cache_url = self.cache_url(request.url)
cache_url = self.cache_url(request.url, sort_query=self.sort_query)
logger.debug('Updating cache with response from "%s"', cache_url)

# Delete it from the cache if we happen to have it stored there
Expand Down Expand Up @@ -317,7 +326,7 @@ def update_cached_response(self, request, response):
This should only ever be called when we've sent an ETag and
gotten a 304 as the response.
"""
cache_url = self.cache_url(request.url)
cache_url = self.cache_url(request.url, sort_query=self.sort_query)

cached_response = self.serializer.loads(
request,
Expand Down
6 changes: 4 additions & 2 deletions cachecontrol/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ def CacheControl(sess,
heuristic=None,
controller_class=None,
adapter_class=None,
cacheable_methods=None):
cacheable_methods=None,
sort_query=False):

cache = cache or DictCache()
adapter_class = adapter_class or CacheControlAdapter
Expand All @@ -19,7 +20,8 @@ def CacheControl(sess,
serializer=serializer,
heuristic=heuristic,
controller_class=controller_class,
cacheable_methods=cacheable_methods
cacheable_methods=cacheable_methods,
sort_query=sort_query
)
sess.mount('http://', adapter)
sess.mount('https://', adapter)
Expand Down

0 comments on commit 2c6f027

Please # to comment.