Use a best score matcher

Instead of finding a match by examining upper bounds and lower bounds of individual specs use a scoring matcher that scores all combinations of candidate requirement sets and then finds the best match among all those potential candidates to select the final match from. This has the advantage of finding a requirement set that will best fit the input requirement set, removing requirements with higher scores (less compatible) and leaving those with lower scores (more compatible) as the better matches. Fixes bug 1288481 Change-Id: Ic95e4d607e04c7d7d4125bc5fbb5ebf205194c0c
2014-03-09 12:02:54 -07:00
parent e43eac2861
commit e79b0022d9
2 changed files with 294 additions and 124 deletions
--- a/anvil/tests/test_tools.py
+++ b/anvil/tests/test_tools.py
@@ -0,0 +1,102 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+#    Copyright (C) 2014 Yahoo! Inc. All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from anvil import shell as sh
+from anvil import test
+
+
+class TestTools(test.TestCase):
+    def setUp(self):
+        super(TestTools, self).setUp()
+        self.multipip = sh.which("multipip", ['tools'])
+
+    def _run_multipip(self, versions):
+        cmd = [self.multipip]
+        cmd.extend(versions)
+        return sh.execute(cmd, check_exit_code=False)
+
+    def _extract_conflicts(self, stderr):
+        conflicts = {}
+        current_name = None
+        capturing = False
+        for line in stderr.splitlines():
+            if line.endswith(": incompatible requirements"):
+                capturing = False
+                current_name = line.split(":", 1)[0].lower().strip()
+                if current_name not in conflicts:
+                    conflicts[current_name] = []
+                continue
+            if line.startswith("Choosing") and current_name:
+                capturing = False
+                continue
+            if line.startswith("Conflicting") and current_name:
+                capturing = True
+                continue
+            if capturing and current_name and line.startswith("\t"):
+                try:
+                    line = line.lstrip()
+                    _where, req = line.split(":", 1)
+                    req = req.strip()
+                    if req:
+                        conflicts[current_name].append(req)
+                except ValueError:
+                    pass
+        return conflicts
+
+    def test_multipip_ok(self):
+        versions = [
+            "x>1",
+            "x>2",
+        ]
+        (stdout, stderr) = self._run_multipip(versions)
+        stdout = stdout.strip()
+        self.assertEqual("x>1,>2", stdout)
+        self.assertEqual({}, self._extract_conflicts(stderr))
+
+    def test_multipip_varied(self):
+        versions = [
+            'x!=2',
+            'x!=3',
+            "y>3",
+        ]
+        (stdout, stderr) = self._run_multipip(versions)
+        stdout = stdout.strip()
+        self.assertEqual({}, self._extract_conflicts(stderr))
+        self.assertEqual("x!=2,!=3\ny>3", stdout)
+
+    def test_multipip_best_pick(self):
+        versions = [
+            "x>1",
+            "x>=2",
+            "x!=2",
+        ]
+        (stdout, stderr) = self._run_multipip(versions)
+        stdout = stdout.strip()
+        self.assertEqual('x>1,!=2', stdout)
+        self.assertEqual(["x>=2"], self._extract_conflicts(stderr)['x'])
+
+    def test_multipip_best_pick_again(self):
+        versions = [
+            "x>1",
+            "x>=2",
+            "x!=2",
+            'x>4',
+            'x>5',
+        ]
+        (stdout, stderr) = self._run_multipip(versions)
+        stdout = stdout.strip()
+        self.assertEqual('x>1,!=2,>4,>5', stdout)
+        self.assertEqual(["x>=2"], self._extract_conflicts(stderr)['x'])
--- a/tools/multipip
+++ b/tools/multipip
@@ -1,10 +1,16 @@
 #!/usr/bin/python

-import argparse
+from __future__ import print_function
+
+import collections
+import itertools
 import logging
 import re
 import sys

+import argparse
+import six
+
 import pip.index
 import pip.req
 import pkg_resources
@@ -96,18 +102,154 @@ def install_requirement_parse(line, comes_from):
    return install_requirement_ensure_req_field(req)


-def incompatible_requirement(chosen, conflicting, incompatibles):
-    if chosen.req.key not in incompatibles:
-        incompatibles.add(chosen.req.key)
-        print >> sys.stderr, "%s: incompatible requirements" % chosen.req.key
-        print >> sys.stderr, "Choosing:"
-        print >> sys.stderr, ("\t%s: %s" %
-                              (chosen.comes_from,
-                               install_requirement_str(chosen)))
-        print >> sys.stderr, "Conflicting:"
-    print >> sys.stderr, ("\t%s: %s" %
-                          (conflicting.comes_from,
-                           install_requirement_str(conflicting)))
+def iter_combinations(elements, include_empty=False):
+    """Iterates over all combinations of the given elements list."""
+    if include_empty:
+        start = 0
+    else:
+        start = 1
+    for i in range(start, len(elements) + 1):
+        for c in itertools.combinations(elements, i):
+            yield c
+
+
+def conflict_scorer(versioned):
+    """Scores a list of (op, version) tuples, a higher score means more
+    conflicts while a lower score means less conflicts.
+    """
+    if len(versioned) == 1:
+        return 0
+    op_versions = collections.defaultdict(list)
+    for (op, version) in versioned:
+        op_versions[op].append(version)
+    score = 0
+    for version in sorted(op_versions.get("==", [])):
+        for (op, version2) in versioned:
+            if version != version2:
+                score += 1
+    for version in sorted(op_versions.get("!=", [])):
+        for (op, version2) in versioned:
+            if op in ["!=", ">", "<"]:
+                continue
+            if version2 == version:
+                score += 1
+    for version in sorted(op_versions.get(">", [])):
+        for (op, version2) in versioned:
+            if (op, version2) == (">", version):
+                continue
+            if op in ["<", "<="] and version2 <= version:
+                score += 1
+            if op == "==" and version2 == version:
+                score += 1
+    for version in sorted(op_versions.get(">=", [])):
+        for (op, version2) in versioned:
+            if (op, version2) == (">=", version):
+                continue
+            if op in ["<", "<="] and version2 < version:
+                score += 1
+    for version in sorted(op_versions.get("<", [])):
+        for (op, version2) in versioned:
+            if (op, version2) == ("<", version):
+                continue
+            if op in [">", ">="] and version2 >= version:
+                score += 1
+            if op == "==" and version2 == version:
+                score += 1
+    for version in sorted(op_versions.get("<=", [])):
+        for (op, version2) in versioned:
+            if (op, version2) == ("<=", version):
+                continue
+            if op in [">", ">="] and version2 > version:
+                score += 1
+    return score
+
+
+def find_best_match(versioned, scorer_func):
+    """Iterates over all combinations of the given version and comparator in
+    the provided lists and finds the one with the best score (closest to zero
+    with the maximum number of elements).
+    """
+    scored = []
+    for combo in iter_combinations(versioned):
+        scored.append((combo, scorer_func(combo)))
+
+    # Find the lowest score with the highest number of elements.
+    min_score = sys.maxint
+    for (combo, combo_score) in scored:
+        if combo_score < min_score:
+            min_score = combo_score
+    max_elems = -1
+    best_match = []
+    for (combo, combo_score) in scored:
+        if min_score == combo_score:
+            if len(combo) > max_elems:
+                best_match = combo
+                max_elems = len(combo)
+
+    incompatibles = set()
+    for (combo, combo_score) in scored:
+        for spec in combo:
+            if spec not in best_match:
+                incompatibles.add(spec)
+    return (best_match, incompatibles)
+
+
+def best_match(req_key, req_list):
+    """Attempts to find the versions which will work the best for the given
+    requirement specification list.
+    """
+    all_specs = []
+    req_specs = []
+    for req in req_list:
+        if req.req.specs:
+            all_specs.extend(req.req.specs)
+            req_specs.append((req, tuple(req.req.specs)))
+    if not all_specs:
+        return (req_list[0], [])
+
+    def spec_sort(spec1, spec2):
+        (op1, version1) = spec1
+        (op2, version2) = spec2
+        c = cmp(version1, version2)
+        if c == 0:
+            c = cmp(op1, op2)
+        return c
+
+    def reform(specs, versions, default_source='compiled'):
+        # Covert the parsed versions back into the string versions so that
+        # we can return that as matches (instead of the comparable versions).
+        tmp_specs = []
+        for (op, version) in specs:
+            tmp_specs.append((op, versions[version]))
+        # Try to see if any of the requirements that we had actually had this
+        # exact spec, if so then just return that as the requirement, if not
+        # create a requirement instead.
+        specs = tuple(tmp_specs)
+        for (req, req_spec) in req_specs:
+            if specs == req_spec:
+                return req
+        spec_pieces = []
+        for (op, version) in specs:
+            spec_pieces.append("%s%s" % (op, version))
+        spec = "%s%s" % (req_key, ",".join(spec_pieces))
+        return pip.req.InstallRequirement.from_line(spec, default_source)
+
+    versions = {}
+    versioned = set()
+    for (op, version) in all_specs:
+        parsed_version = pkg_resources.parse_version(version)
+        versioned.add((op, parsed_version))
+        versions[parsed_version] = version
+    versioned = list(sorted(versioned, cmp=spec_sort))
+    initial_score = conflict_scorer(versioned)
+    if initial_score == 0:
+        return (reform(versioned, versions), [])
+    else:
+        match, incompatibles = find_best_match(versioned, conflict_scorer)
+        incompatibles = [reform([s], versions,
+                                default_source='compiled conflict')
+                         for s in incompatibles]
+        return (reform(match, versions), incompatibles)


 def parse_requirements(options):
@@ -143,131 +285,53 @@ def parse_requirements(options):
            ignored_requirements.append(req)
        except Exception as ex:
            raise RequirementException("Cannot parse `%s': %s" % (req_spec, ex))
-    return all_requirements, ignored_requirements
+    return (all_requirements, ignored_requirements)


-def join_one_requirement(req_list):
-    """Join requirement list for one package together.
-
-    Possible returns:
-    * ==A - exact version (even when there are conflicts)
-    * >=?A,<=?B,(!=C)+ - line segment (no conflicts detected)
-    * >=?A,(!=C)+ - more than (also when conflicts detected)
-
-    :param:req_list list of pip.req.InstallRequirement
-    :return: pip.req.InstallRequirement
-    """
-    if len(req_list) == 1:
-        return req_list[0]
-    lower_bound_str = None
-    lower_bound_version = None
-    upper_bound_str = None
-    upper_bound_version = None
-    conflicts = []
-    for req in req_list:
-        for spec in req.req.specs:
-            if spec[0] == "==":
-                return req
-            spec_str = "%s%s" % spec
-            if spec[0] == "!=":
-                conflicts.append(spec_str)
-                continue
-            version = pkg_resources.parse_version(spec[1])
-            # strict_check is < or >, not <= or >=
-            strict_check = len(spec[0]) == 1
-            if spec[0][0] == ">":
-                if (not lower_bound_version or (version > lower_bound_version) or
-                    (strict_check and version == lower_bound_version)):
-                    lower_bound_version = version
-                    lower_bound_str = spec_str
-            else:
-                if (not upper_bound_version or (version < upper_bound_version) or
-                    (strict_check and version == upper_bound_version)):
-                    upper_bound_version = version
-                    upper_bound_str = spec_str
-    req_key = req_list[0].req.key
-    if lower_bound_version and upper_bound_version:
-        if lower_bound_version > upper_bound_version:
-            upper_bound_str = None
-        if lower_bound_version == upper_bound_version:
-            if lower_bound_str[1] == "=" and upper_bound_str[1] == "=":
-                return pip.req.InstallRequirement.from_line(
-                    "%s==%s" % (req_key, upper_bound_str[2:]),
-                    "compiled")
-            else:
-                upper_bound_str = None
-    req_specs = []
-    if lower_bound_str:
-        req_specs.append(lower_bound_str)
-    if upper_bound_str:
-        req_specs.append(upper_bound_str)
-    req_specs.extend(conflicts)
-    return pip.req.InstallRequirement.from_line(
-        "%s%s" % (req_key, ",".join(req_specs)),
-        "compiled")
-
-
-def join_requirements(options):
-    all_requirements, ignored_requirements = parse_requirements(options)
+def join_requirements(requirements, ignored_requirements):
    skip_keys = set(pkg.req.key for pkg in ignored_requirements)
-
-    incompatibles = set()
-    joined_requirements = []
-    for req_key, req_list in all_requirements.iteritems():
+    incompatibles = {}
+    joined_requirements = {}
+    for (req_key, req_list) in six.iteritems(requirements):
        if req_key in skip_keys:
            continue
-        joined_req = join_one_requirement(req_list)
-        joined_requirements.append(joined_req)
-
-        segment_ok = False
-        lower_version = None
-        lower_strict = False
-        exact_version = None
-        conflicts = []
-        for parsed, trans, op, ver in joined_req.req.index:
-            if op[0] == ">":
-                lower_version = parsed
-                lower_strict = len(op) == 1
-            elif op[0] == "<":
-                segment_ok = True
-            elif op[0] == "=":
-                exact_version = parsed
-            else:
-                conflicts.append(parsed)
-        if exact_version:
-            for req in req_list:
-                if exact_version not in req.req:
-                    incompatible_requirement(joined_req, req,
-                                             incompatibles)
-        else:
-            for req in req_list:
-                for parsed, trans, op, ver in req.req.index:
-                    if op[0] == "=":
-                        if parsed in conflicts:
-                            incompatible_requirement(joined_req, req,
-                                                     incompatibles)
-                            break
-                    elif not segment_ok and op[0] == "<":
-                        # analyse lower bound: x >= A or x > A
-                        if (lower_version > parsed or (
-                                lower_version == parsed and
-                                (lower_strict or len(op) != 2))):
-                            incompatible_requirement(joined_req, req,
-                                                     incompatibles)
-                            break
+        match, req_incompatibles = best_match(req_key, req_list)
+        joined_requirements[req_key] = match
+        if req_incompatibles:
+            incompatibles[req_key] = req_incompatibles
    return (joined_requirements, incompatibles)


 def print_requirements(joined_requirements):
    formatted_requirements = []
-    for req in joined_requirements:
+    for req_key in sorted(six.iterkeys(joined_requirements)):
+        req = joined_requirements[req_key]
        if req.url:
            req = "%s#egg=%s" % (req.url, req.req)
        else:
            req = str(req.req)
        formatted_requirements.append(req)
-    for req in sorted(formatted_requirements):
-        print req
+    for req in formatted_requirements:
+        print(req)
+
+
+def print_incompatibles(incompatibles, joined_requirements):
+    for req_key in sorted(six.iterkeys(incompatibles)):
+        req_incompatibles = incompatibles[req_key]
+        if not req_incompatibles:
+            continue
+        print("%s: incompatible requirements" % (req_key),
+              file=sys.stderr)
+        chosen = joined_requirements[req_key]
+        print("Choosing:", file=sys.stderr)
+        print("\t%s: %s" % (chosen.comes_from,
+                            install_requirement_str(chosen)),
+              file=sys.stderr)
+        print("Conflicting:", file=sys.stderr)
+        for conflicting in req_incompatibles:
+            print("\t%s: %s" % (conflicting.comes_from,
+                                install_requirement_str(conflicting)),
+                  file=sys.stderr)


 def main():
@@ -275,10 +339,14 @@ def main():
    options = parser.parse_args()
    setup_logging(options)
    try:
-        joined_requirements, incompatibles = join_requirements(options)
+        requirements, ignored_requirements = parse_requirements(options)
    except RequirementException as ex:
        logger.error("Requirement failure: %s", ex)
        sys.exit(BAD_REQUIREMENTS)
+    else:
+        joined_requirements, incompatibles = join_requirements(requirements,
+                                                               ignored_requirements)
+    print_incompatibles(incompatibles, joined_requirements)
    print_requirements(joined_requirements)
    if incompatibles:
        sys.exit(INCOMPATIBLE_REQUIREMENTS)