246 lines
7.8 KiB
Python
246 lines
7.8 KiB
Python
# Copyright 2008 Google Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Utility to read and apply a unified diff without forking patch(1).
|
|
|
|
For a discussion of the unified diff format, see my blog on Artima:
|
|
http://www.artima.com/weblogs/viewpost.jsp?thread=164293
|
|
"""
|
|
|
|
import difflib
|
|
import logging
|
|
import re
|
|
import sys
|
|
|
|
|
|
_CHUNK_RE = re.compile(r"""
|
|
@@
|
|
\s+
|
|
-
|
|
(?: (\d+) (?: , (\d+) )?)
|
|
\s+
|
|
\+
|
|
(?: (\d+) (?: , (\d+) )?)
|
|
\s+
|
|
@@
|
|
""", re.VERBOSE)
|
|
|
|
|
|
def PatchLines(old_lines, patch_lines, name="<patch>"):
|
|
"""Patches the old_lines with patches read from patch_lines.
|
|
|
|
This only reads unified diffs. The header lines are ignored.
|
|
Yields (tag, old, new) tuples where old and new are lists of lines.
|
|
The tag can either start with "error" or be a tag from difflib: "equal",
|
|
"insert", "delete", "replace". After "error" is yielded, no more
|
|
tuples are yielded. It is possible that consecutive "equal" tuples
|
|
are yielded.
|
|
"""
|
|
chunks = ParsePatchToChunks(patch_lines, name)
|
|
if chunks is None:
|
|
return iter([("error: ParsePatchToChunks failed", [], [])])
|
|
return PatchChunks(old_lines, chunks)
|
|
|
|
|
|
def PatchChunks(old_lines, chunks):
|
|
"""Patche old_lines with chunks.
|
|
|
|
Yields (tag, old, new) tuples where old and new are lists of lines.
|
|
The tag can either start with "error" or be a tag from difflib: "equal",
|
|
"insert", "delete", "replace". After "error" is yielded, no more
|
|
tuples are yielded. It is possible that consecutive "equal" tuples
|
|
are yielded.
|
|
"""
|
|
if not chunks:
|
|
# The patch is a no-op
|
|
yield ("equal", old_lines, old_lines)
|
|
return
|
|
|
|
old_pos = 0
|
|
for (old_i, old_j), (new_i, new_j), old_chunk, new_chunk in chunks:
|
|
eq = old_lines[old_pos:old_i]
|
|
if eq:
|
|
yield "equal", eq, eq
|
|
old_pos = old_i
|
|
# Check that the patch matches the target file
|
|
if old_lines[old_i:old_j] != old_chunk:
|
|
logging.error("mismatch:%s.%s.", old_lines[old_i:old_j], old_chunk)
|
|
yield ("error: old chunk mismatch", old_lines[old_i:old_j], old_chunk)
|
|
return
|
|
# TODO(guido): ParsePatch knows the diff details, but throws the info away
|
|
sm = difflib.SequenceMatcher(None, old_chunk, new_chunk)
|
|
for tag, i1, i2, j1, j2 in sm.get_opcodes():
|
|
yield tag, old_chunk[i1:i2], new_chunk[j1:j2]
|
|
old_pos = old_j
|
|
|
|
# Copy the final matching chunk if any.
|
|
eq = old_lines[old_pos:]
|
|
if eq:
|
|
yield ("equal", eq, eq)
|
|
|
|
|
|
_NO_NEWLINE_MESSAGE = "\\ No newline at end of file"
|
|
|
|
|
|
def ParsePatchToChunks(lines, name="<patch>"):
|
|
"""Parses a patch from a list of lines.
|
|
|
|
Return a list of chunks, where each chunk is a tuple:
|
|
|
|
old_range, new_range, old_lines, new_lines
|
|
|
|
Returns a list of chunks (possibly empty); or None if there's a problem.
|
|
"""
|
|
lineno = 0
|
|
raw_chunk = []
|
|
chunks = []
|
|
old_range = new_range = None
|
|
old_last = new_last = 0
|
|
in_prelude = True
|
|
for line in lines:
|
|
lineno += 1
|
|
if in_prelude:
|
|
# Skip leading lines until after we've seen one starting with '+++'
|
|
if line.startswith("+++"):
|
|
in_prelude = False
|
|
continue
|
|
match = _CHUNK_RE.match(line)
|
|
if match:
|
|
if raw_chunk:
|
|
# Process the lines in the previous chunk
|
|
old_chunk = []
|
|
new_chunk = []
|
|
for tag, rest in raw_chunk:
|
|
if tag in (" ", "-"):
|
|
old_chunk.append(rest)
|
|
if tag in (" ", "+"):
|
|
new_chunk.append(rest)
|
|
# Check consistency
|
|
old_i, old_j = old_range
|
|
new_i, new_j = new_range
|
|
if len(old_chunk) != old_j - old_i or len(new_chunk) != new_j - new_i:
|
|
logging.warn("%s:%s: previous chunk has incorrect length",
|
|
name, lineno)
|
|
return None
|
|
chunks.append((old_range, new_range, old_chunk, new_chunk))
|
|
raw_chunk = []
|
|
# Parse the @@ header
|
|
old_ln, old_n, new_ln, new_n = match.groups()
|
|
old_ln, old_n, new_ln, new_n = map(long,
|
|
(old_ln, old_n or 1,
|
|
new_ln, new_n or 1))
|
|
# Convert the numbers to list indices we can use
|
|
if old_n == 0:
|
|
old_i = old_ln
|
|
else:
|
|
old_i = old_ln - 1
|
|
old_j = old_i + old_n
|
|
old_range = old_i, old_j
|
|
if new_n == 0:
|
|
new_i = new_ln
|
|
else:
|
|
new_i = new_ln - 1
|
|
new_j =new_i + new_n
|
|
new_range = new_i, new_j
|
|
# Check header consistency with previous header
|
|
if old_i < old_last or new_i < new_last:
|
|
logging.warn("%s:%s: chunk header out of order: %r",
|
|
name, lineno, line)
|
|
return None
|
|
if old_i - old_last != new_i - new_last:
|
|
logging.warn("%s:%s: inconsistent chunk header: %r",
|
|
name, lineno, line)
|
|
return None
|
|
old_last = old_j
|
|
new_last = new_j
|
|
else:
|
|
tag, rest = line[0], line[1:]
|
|
if tag in (" ", "-", "+"):
|
|
raw_chunk.append((tag, rest))
|
|
elif line.startswith(_NO_NEWLINE_MESSAGE):
|
|
# TODO(guido): need to check that no more lines follow for this file
|
|
if raw_chunk:
|
|
last_tag, last_rest = raw_chunk[-1]
|
|
if last_rest.endswith("\n"):
|
|
raw_chunk[-1] = (last_tag, last_rest[:-1])
|
|
else:
|
|
# Only log if it's a non-blank line. Blank lines we see a lot.
|
|
if line and line.strip():
|
|
logging.warn("%s:%d: indecypherable input: %r", name, lineno, line)
|
|
if chunks or raw_chunk:
|
|
break # Trailing garbage isn't so bad
|
|
return None
|
|
if raw_chunk:
|
|
# Process the lines in the last chunk
|
|
old_chunk = []
|
|
new_chunk = []
|
|
for tag, rest in raw_chunk:
|
|
if tag in (" ", "-"):
|
|
old_chunk.append(rest)
|
|
if tag in (" ", "+"):
|
|
new_chunk.append(rest)
|
|
# Check consistency
|
|
old_i, old_j = old_range
|
|
new_i, new_j = new_range
|
|
if len(old_chunk) != old_j - old_i or len(new_chunk) != new_j - new_i:
|
|
print >>sys.stderr, ("%s:%s: last chunk has incorrect length" %
|
|
(name, lineno))
|
|
return None
|
|
chunks.append((old_range, new_range, old_chunk, new_chunk))
|
|
raw_chunk = []
|
|
return chunks
|
|
|
|
|
|
# TODO: can we share some of this code with ParsePatchToChunks?
|
|
def ParsePatchToLines(lines):
|
|
"""Parses a patch from a list of lines.
|
|
|
|
Returns None on error, otherwise a list of 3-tuples:
|
|
(old_line_no, new_line_no, line)
|
|
|
|
A line number can be 0 if it doesn't exist in the old/new file.
|
|
"""
|
|
result = []
|
|
in_prelude = True
|
|
for line in lines:
|
|
if in_prelude:
|
|
result.append((0, 0, line))
|
|
# Skip leading lines until after we've seen one starting with '+++'
|
|
if line.startswith("+++"):
|
|
in_prelude = False
|
|
elif line.startswith("@"):
|
|
result.append((0, 0, line))
|
|
match = _CHUNK_RE.match(line)
|
|
if not match:
|
|
logging.warn("ParsePatchToLines match failed on %s", line)
|
|
return None
|
|
old_ln = int(match.groups()[0])
|
|
new_ln = int(match.groups()[2])
|
|
else:
|
|
if line[0] == "-":
|
|
result.append((old_ln, 0, line))
|
|
old_ln += 1
|
|
elif line[0] == "+":
|
|
result.append((0, new_ln, line))
|
|
new_ln += 1
|
|
elif line[0] == " ":
|
|
result.append((old_ln, new_ln, line))
|
|
old_ln += 1
|
|
new_ln += 1
|
|
elif line.startswith(_NO_NEWLINE_MESSAGE):
|
|
continue
|
|
else: # Something else, could be property changes etc.
|
|
result.append((0, 0, line))
|
|
return result
|