From 9bd994c0967e586a94bdae0d145015c4be1cafc6 Mon Sep 17 00:00:00 2001 From: Tobias Henkel Date: Fri, 11 Sep 2020 15:45:05 +0200 Subject: [PATCH] Ignore 500 errors when requesting pr files Github responds with a 500 error if the diff is too large to make sense. This is currently handled by the retry handler with backoff and can significantly delay event processing. However Github will never respond with something different so this needs to be excluded from the retry handler. Further it has been revealed that our error handling code incorrectly results in an empty changed files list. However in this case we must set the changed files to None so zuul takes care of this itself. Change-Id: Ie825a8801032d5a1ec66afb1244aa3b571fd1d39 --- zuul/driver/github/githubconnection.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/zuul/driver/github/githubconnection.py b/zuul/driver/github/githubconnection.py index da242401ee..a5af140883 100644 --- a/zuul/driver/github/githubconnection.py +++ b/zuul/driver/github/githubconnection.py @@ -23,6 +23,7 @@ import threading import time import json from collections import OrderedDict +from json.decoder import JSONDecodeError import cherrypy import cachecontrol @@ -228,6 +229,23 @@ class GithubRetryHandler: if not 500 <= response.status_code < 600: return + try: + data = response.json() + errors = data.get('errors', []) + for error in errors: + resource = error.get('resource') + field = error.get('field') + code = error.get('code') + if (resource == 'PullRequest' and + field == 'diff' and + code == 'not_available'): + # Github responds with 500 if the diff is too large so we + # need to ignore it because retries won't help. + return + except JSONDecodeError: + # If there is no json just continue with retry handling. + pass + if hasattr(response.request, 'zuul_retry_count'): retry_count = response.request.zuul_retry_count retry_delay = min(response.request.zuul_retry_delay * 2, @@ -1407,7 +1425,9 @@ class GithubConnection(BaseConnection): # Github's pull requests files API only returns at max # the first 300 changed files of a PR in alphabetical order. # https://developer.github.com/v3/pulls/#list-pull-requests-files - if len(change.files) < change.pr.get('changed_files', 0): + if change.files is None: + log.warning("Got no files of PR.") + elif len(change.files) < change.pr.get('changed_files', 0): log.warning("Got only %s files but PR has %s files.", len(change.files), change.pr.get('changed_files', 0)) @@ -1596,7 +1616,7 @@ class GithubConnection(BaseConnection): self.log.warning("Failed to get list of files from Github. " "Using empty file list to trigger update " "via the merger: %s", exc) - pr['files'] = [] + pr['files'] = None labels = [l['name'] for l in pr['labels']] pr['labels'] = labels