Improving Bandit Baseline Reporting

This commit changes the Bandit Baseline reporting functionality.
The previous behavior was to compare each file and if the issue
total was different we'd output all of the issues from the file.

While this worked to implement a gate it wasn't easy to use for
penetration testing and the report generated could be difficult
for developers to parse to figure out why the gate broke.

The new approach will pair down the issues to determine exactly
which were not present in the baseline.  Next since code lines
can move aorund we can't figure out exactly where in the file
the issue is, but we can present a list of candidate issues.

A candidate issue matches the core properties of the finding
including file, severity, confidence, and issue text.

Since we're now presenting candidate issues for each finding we
also need a way to display that.  Baseline will now use separate
specifically designed formatters that show candidate issues.

Change-Id: Id5aa45ac4977e84f1c08a6b8e142acc6e5fc7a88
This commit is contained in:
Travis McPeak
2015-11-06 09:32:19 +01:00
parent 2a328eb786
commit 227cf60428
5 changed files with 168 additions and 14 deletions

View File

@@ -101,9 +101,18 @@ class Issue(object):
self.severity = data["issue_severity"]
self.confidence = data["issue_confidence"]
self.text = data["issue_text"]
self.test = data["test_name"]
self.lineno = data["line_number"]
self.linerange = data["line_range"]
def matches_issue(self, compared_issue):
# if the issue text, severity, confidence, and filename match, it's
# the same issue from our perspective
match_types = ['text', 'severity', 'confidence', 'fname', 'test']
return all(getattr(self, field) == getattr(compared_issue, field)
for field in match_types)
def issue_from_dict(data):
i = Issue(severity=data["issue_severity"])

View File

@@ -20,10 +20,6 @@ import logging
import os
import sys
import six
from collections import Counter
from bandit.core import constants as b_constants
from bandit.core import extension_loader
from bandit.core import issue
@@ -131,13 +127,10 @@ class BanditManager():
if not self.baseline:
return results
outs = []
base = Counter([jd.fname for jd in self.baseline])
vals = Counter([jd.fname for jd in results])
for key, val in six.iteritems(vals):
if key not in base or val != base[key]:
outs.extend([r for r in results if r.fname == key])
return outs
unmatched = _compare_baseline_results(self.baseline, results)
# if it's a baseline we'll return a dictionary of issues and a list of
# candidate issues
return _find_candidate_matches(unmatched, results)
def results_count(self, sev_filter=b_constants.LOW,
conf_filter=b_constants.LOW):
@@ -366,3 +359,60 @@ def _matches_glob_list(filename, glob_list):
if fnmatch.fnmatch(filename, glob):
return True
return False
def _compare_baseline_results(baseline, results):
"""Compare a baseline list of issues to list of results
This function compares a baseline set of issues to a current set of issues
to find results that weren't present in the baseline. To do this we'll
compare filename, severity, confidence, and issue text (using the
Issue.matches_issue() method.
:param baseline: Baseline list of issues
:param results: Current list of issues
:return: List of unmatched issues
"""
unmatched_issues = []
# approach here: go through each issue in current results, check if it was
# present in the baseline. If it was, remove it from the baseline (so we
# don't count it twice). If it wasn't then we have an unmatched issue, so
# add it to the unmatched list.
for new_issue in results:
# keep track of index in the baseline where the issue was so we can
# remove it from the list
for found_index, baseline_issue in enumerate(baseline):
if new_issue.matches_issue(baseline_issue):
break
# we went through all the results and didn't find it, add to unmatched
if found_index == len(baseline):
unmatched_issues.append(new_issue)
# we found it, remove from the baseline
else:
del baseline[found_index]
return unmatched_issues
def _find_candidate_matches(unmatched_issues, results_list):
"""Returns a dictionary with issue candidates
For example, let's say we find a new command injection issue in a file
which used to have two. Bandit can't tell which of the command injection
issues in the file are new, so it will show all three. The user should
be able to pick out the new one.
:param unmatched_issues: List of issues that weren't present before
:param results_list: Master list of current Bandit findings
:return: A dictionary with a list of candidates for each issue
"""
issue_candidates = {}
for unmatched in unmatched_issues:
issue_candidates[unmatched] = ([i for i in results_list if
unmatched.matches_issue(i)])
return issue_candidates

View File

@@ -476,7 +476,7 @@ class FunctionalTests(testtools.TestCase):
"filename": "%s/examples/flask_debug.py",
"issue_confidence": "MEDIUM",
"issue_severity": "HIGH",
"issue_text": "A Flask app appears to be run with debug=True ..",
"issue_text": "A Flask app appears to be run with debug=True, which exposes the Werkzeug debugger and allows the execution of arbitrary code.",
"line_number": 10,
"line_range": [
10
@@ -490,4 +490,4 @@ class FunctionalTests(testtools.TestCase):
self.b_mgr.populate_baseline(json)
self.run_example('flask_debug.py')
self.assertEqual(len(self.b_mgr.baseline), 1)
self.assertEqual(self.b_mgr.get_issue_list(), [])
self.assertEqual(self.b_mgr.get_issue_list(), {})

View File

@@ -70,6 +70,49 @@ class IssueTests(testtools.TestCase):
result = issue.filter(bandit.UNDEFINED, level)
self.assertTrue((test >= rank) == result)
def test_matches_issue(self):
issue_a = _get_issue_instance()
issue_b = _get_issue_instance(severity=bandit.HIGH)
issue_c = _get_issue_instance(confidence=bandit.LOW)
issue_d = _get_issue_instance()
issue_d.text = 'ABCD'
issue_e = _get_issue_instance()
issue_e.fname = 'file1.py'
issue_f = issue_a
issue_g = _get_issue_instance()
issue_g.test = 'ZZZZ'
issue_h = issue_a
issue_h.lineno = 12345
# positive tests
self.assertTrue(issue_a.matches_issue(issue_a))
self.assertTrue(issue_a.matches_issue(issue_f))
self.assertTrue(issue_f.matches_issue(issue_a))
# severity doesn't match
self.assertFalse(issue_a.matches_issue(issue_b))
# confidence doesn't match
self.assertFalse(issue_a.matches_issue(issue_c))
# text doesn't match
self.assertFalse(issue_a.matches_issue(issue_d))
# filename doesn't match
self.assertFalse(issue_a.matches_issue(issue_e))
# plugin name doesn't match
self.assertFalse(issue_a.matches_issue(issue_g))
# line number doesn't match but should pass because we don't test that
self.assertTrue(issue_a.matches_issue(issue_h))
def _get_issue_instance(severity=bandit.MEDIUM, confidence=bandit.MEDIUM):

View File

@@ -53,7 +53,7 @@ class TempFile(fixtures.Fixture):
class ManagerTests(testtools.TestCase):
def _get_issue_instance(sev=constants.MEDIUM, conf=constants.MEDIUM):
def _get_issue_instance(self, sev=constants.MEDIUM, conf=constants.MEDIUM):
new_issue = issue.Issue(sev, conf, 'Test issue')
new_issue.fname = 'code.py'
new_issue.test = 'bandit_plugin'
@@ -278,3 +278,55 @@ class ManagerTests(testtools.TestCase):
m.formatters_mgr = {'txt': fmt}
self.manager.output_results(3, constants.LOW, constants.LOW,
"dummy", "test")
def test_compare_baseline(self):
issue_a = self._get_issue_instance()
issue_a.fname = 'file1.py'
issue_b = self._get_issue_instance()
issue_b.fname = 'file2.py'
issue_c = self._get_issue_instance(sev=constants.HIGH)
issue_c.fname = 'file1.py'
# issue c is in results, not in baseline
self.assertEqual([issue_c],
manager._compare_baseline_results(
[issue_a, issue_b],
[issue_a, issue_b, issue_c]))
# baseline and results are the same
self.assertEqual([],
manager._compare_baseline_results([issue_a, issue_b, issue_c],
[issue_a, issue_b, issue_c]))
# results are better than baseline
self.assertEqual([],
manager._compare_baseline_results([issue_a, issue_b, issue_c],
[issue_a, issue_b]))
def test_find_candidate_matches(self):
issue_a = self._get_issue_instance()
issue_b = self._get_issue_instance()
issue_c = self._get_issue_instance()
issue_c.fname = 'file1.py'
# issue a and b are the same, both should be returned as candidates
self.assertEqual({issue_a: [issue_a, issue_b]},
manager._find_candidate_matches([issue_a], [issue_a, issue_b]))
# issue a and c are different, only a should be returned
self.assertEqual({issue_a: [issue_a]},
manager._find_candidate_matches([issue_a], [issue_a, issue_c]))
# c doesn't match a, empty list should be returned
self.assertEqual({issue_a: []},
manager._find_candidate_matches([issue_a], [issue_c]))
# a and b match, a and b should both return a and b candidates
self.assertEqual({issue_a: [issue_a, issue_b],
issue_b: [issue_a, issue_b]},
manager._find_candidate_matches([issue_a, issue_b],
[issue_a, issue_b, issue_c]))