From 9f8158be04a09adbc58873ffebf27ad88c9dbaa9 Mon Sep 17 00:00:00 2001 From: Tobias Henkel Date: Wed, 20 Dec 2017 19:11:56 +0100 Subject: [PATCH] Fix github caching We do caching of the requests to github using cachecontrol which is injected into the urllib3 session. The caching needs to be entirely etag based as max-age based caching leads to working with stale data. Unlike documented [1] cachecontrol doesn't priorize the etag caching but doesn't even rerequest until max-age was elapsed. Thus we need to add a custom caching heuristic [2] which simply drops the cache-control header containing max-age. This way we force cachecontrol to only rely on the etag headers. [1] http://cachecontrol.readthedocs.io/en/latest/etags.html [2] http://cachecontrol.readthedocs.io/en/latest/custom_heuristics.html Change-Id: If47e1eaa942914a243fc03666b83cd896665bd71 --- zuul/driver/github/githubconnection.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/zuul/driver/github/githubconnection.py b/zuul/driver/github/githubconnection.py index f987f4712a..82cac6b958 100644 --- a/zuul/driver/github/githubconnection.py +++ b/zuul/driver/github/githubconnection.py @@ -24,6 +24,7 @@ import re import cachecontrol from cachecontrol.cache import DictCache +from cachecontrol.heuristics import BaseHeuristic import iso8601 import jwt import requests @@ -431,9 +432,26 @@ class GithubConnection(BaseConnection): # NOTE(jamielennox): Better here would be to cache to memcache or file # or something external - but zuul already sucks at restarting so in # memory probably doesn't make this much worse. + + # NOTE(tobiash): Unlike documented cachecontrol doesn't priorize + # the etag caching but doesn't even re-request until max-age was + # elapsed. + # + # Thus we need to add a custom caching heuristic which simply drops + # the cache-control header containing max-age. This way we force + # cachecontrol to only rely on the etag headers. + # + # http://cachecontrol.readthedocs.io/en/latest/etags.html + # http://cachecontrol.readthedocs.io/en/latest/custom_heuristics.html + class NoAgeHeuristic(BaseHeuristic): + def update_headers(self, response): + if 'cache-control' in response.headers: + del response.headers['cache-control'] + self.cache_adapter = cachecontrol.CacheControlAdapter( DictCache(), - cache_etags=True) + cache_etags=True, + heuristic=NoAgeHeuristic()) # The regex is based on the connection host. We do not yet support # cross-connection dependency gathering