# -*- coding: utf-8 -*- # # Copyright 2010-2014 The pygit2 contributors # # This file is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License, version 2, # as published by the Free Software Foundation. # # In addition to the permissions in the GNU General Public License, # the authors give you unlimited permission to link the compiled # version of this file into combinations with other programs, # and to distribute those combinations without any restriction # coming from the use of this file. (The General Public License # restrictions do apply in other respects; for example, they cover # modification of the file, and distribution when not linked into # a combined executable.) # # This file is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; see the file COPYING. If not, write to # the Free Software Foundation, 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301, USA. # Import from the future from __future__ import absolute_import # Import from the Standard Library from string import hexdigits import sys, tarfile from time import time if sys.version_info[0] < 3: from cStringIO import StringIO else: from io import BytesIO as StringIO # Import from pygit2 from _pygit2 import Repository as _Repository from _pygit2 import Oid, GIT_OID_HEXSZ, GIT_OID_MINPREFIXLEN from _pygit2 import GIT_CHECKOUT_SAFE, GIT_CHECKOUT_RECREATE_MISSING, GIT_DIFF_NORMAL from _pygit2 import GIT_FILEMODE_LINK from _pygit2 import Reference, Tree, Commit, Blob from .config import Config from .errors import check_error from .ffi import ffi, C from .index import Index from .remote import RemoteCollection from .blame import Blame from .utils import to_bytes, is_string from .submodule import Submodule class Repository(_Repository): def __init__(self, *args, **kwargs): super(Repository, self).__init__(*args, **kwargs) self._common_init() @classmethod def _from_c(cls, ptr, owned): cptr = ffi.new('git_repository **') cptr[0] = ptr repo = cls.__new__(cls) super(cls, repo)._from_c(bytes(ffi.buffer(cptr)[:]), owned) repo._common_init() return repo def _common_init(self): self.remotes = RemoteCollection(self) # Get the pointer as the contents of a buffer and store it for # later access repo_cptr = ffi.new('git_repository **') ffi.buffer(repo_cptr)[:] = self._pointer[:] self._repo = repo_cptr[0] def lookup_submodule(self, path): csub = ffi.new('git_submodule **') cpath = ffi.new('char[]', to_bytes(path)) err = C.git_submodule_lookup(csub, self._repo, cpath) check_error(err) return Submodule._from_c(self, csub[0]) # # Mapping interface # def get(self, key, default=None): value = self.git_object_lookup_prefix(key) return value if (value is not None) else default def __getitem__(self, key): value = self.git_object_lookup_prefix(key) if value is None: raise KeyError(key) return value def __contains__(self, key): return self.git_object_lookup_prefix(key) is not None def __repr__(self): return "pygit2.Repository(%r)" % self.path # # Remotes # def create_remote(self, name, url): """Create a new remote. Return a <Remote> object. This method is deprecated, please use Remote.remotes.create() """ return self.remotes.create(name, url) # # Configuration # @property def config(self): """The configuration file for this repository. If a the configuration hasn't been set yet, the default config for repository will be returned, including global and system configurations (if they are available). """ cconfig = ffi.new('git_config **') err = C.git_repository_config(cconfig, self._repo) check_error(err) return Config.from_c(self, cconfig[0]) @property def config_snapshot(self): """A snapshot for this repositiory's configuration This allows reads over multiple values to use the same version of the configuration files. """ cconfig = ffi.new('git_config **') err = C.git_repository_config_snapshot(cconfig, self._repo) check_error(err) return Config.from_c(self, cconfig[0]) # # References # def create_reference(self, name, target, force=False): """Create a new reference "name" which points to an object or to another reference. Based on the type and value of the target parameter, this method tries to guess whether it is a direct or a symbolic reference. Keyword arguments: force If True references will be overridden, otherwise (the default) an exception is raised. Examples:: repo.create_reference('refs/heads/foo', repo.head.target) repo.create_reference('refs/tags/foo', 'refs/heads/master') repo.create_reference('refs/tags/foo', 'bbb78a9cec580') """ direct = ( type(target) is Oid or ( all(c in hexdigits for c in target) and GIT_OID_MINPREFIXLEN <= len(target) <= GIT_OID_HEXSZ)) if direct: return self.create_reference_direct(name, target, force) return self.create_reference_symbolic(name, target, force) # # Checkout # @staticmethod def _checkout_args_to_options(strategy=None, directory=None): # Create the options struct to pass copts = ffi.new('git_checkout_options *') check_error(C.git_checkout_init_options(copts, 1)) # References we need to keep to strings and so forth refs = [] # pygit2's default is SAFE | RECREATE_MISSING copts.checkout_strategy = GIT_CHECKOUT_SAFE | GIT_CHECKOUT_RECREATE_MISSING # and go through the arguments to see what the user wanted if strategy: copts.checkout_strategy = strategy if directory: target_dir = ffi.new('char[]', to_bytes(directory)) refs.append(target_dir) copts.target_directory = target_dir return copts, refs def checkout_head(self, **kwargs): """Checkout HEAD For arguments, see Repository.checkout(). """ copts, refs = Repository._checkout_args_to_options(**kwargs) check_error(C.git_checkout_head(self._repo, copts)) def checkout_index(self, **kwargs): """Checkout the repository's index For arguments, see Repository.checkout(). """ copts, refs = Repository._checkout_args_to_options(**kwargs) check_error(C.git_checkout_index(self._repo, ffi.NULL, copts)) def checkout_tree(self, treeish, **kwargs): """Checkout the given treeish For arguments, see Repository.checkout(). """ copts, refs = Repository._checkout_args_to_options(**kwargs) cptr = ffi.new('git_object **') ffi.buffer(cptr)[:] = treeish._pointer[:] check_error(C.git_checkout_tree(self._repo, cptr[0], copts)) def checkout(self, refname=None, **kwargs): """ Checkout the given reference using the given strategy, and update the HEAD. The reference may be a reference name or a Reference object. The default strategy is GIT_CHECKOUT_SAFE | GIT_CHECKOUT_RECREATE_MISSING. To checkout from the HEAD, just pass 'HEAD':: >>> checkout('HEAD') This is identical to calling checkout_head(). If no reference is given, checkout from the index. Arguments: :param str|Reference refname: The reference to checkout. After checkout, the current branch will be switched to this one. :param int strategy: A ``GIT_CHECKOUT_`` value. The default is ``GIT_CHECKOUT_SAFE``. :param str directory: Alternative checkout path to workdir. """ # Case 1: Checkout index if refname is None: return self.checkout_index(**kwargs) # Case 2: Checkout head if refname == 'HEAD': return self.checkout_head(**kwargs) # Case 3: Reference if isinstance(refname, Reference): reference = refname refname = refname.name else: reference = self.lookup_reference(refname) oid = reference.resolve().target treeish = self[oid] self.checkout_tree(treeish, **kwargs) head = self.lookup_reference('HEAD') if head.type == C.GIT_REF_SYMBOLIC: from_ = self.head.shorthand else: from_ = head.target.hex self.set_head(refname) # # Setting HEAD # def set_head(self, target): """Set HEAD to point to the given target Arguments: target The new target for HEAD. Can be a string or Oid (to detach) """ if isinstance(target, Oid): oid = ffi.new('git_oid *') ffi.buffer(oid)[:] = target.raw[:] err = C.git_repository_set_head_detached(self._repo, oid) check_error(err) return # if it's a string, then it's a reference name err = C.git_repository_set_head(self._repo, to_bytes(target)) check_error(err) # # Diff # def diff(self, a=None, b=None, cached=False, flags=GIT_DIFF_NORMAL, context_lines=3, interhunk_lines=0): """ Show changes between the working tree and the index or a tree, changes between the index and a tree, changes between two trees, or changes between two blobs. Keyword arguments: cached use staged changes instead of workdir flag a GIT_DIFF_* constant context_lines the number of unchanged lines that define the boundary of a hunk (and to display before and after) interhunk_lines the maximum number of unchanged lines between hunk boundaries before the hunks will be merged into a one Examples:: # Changes in the working tree not yet staged for the next commit >>> diff() # Changes between the index and your last commit >>> diff(cached=True) # Changes in the working tree since your last commit >>> diff('HEAD') # Changes between commits >>> t0 = revparse_single('HEAD') >>> t1 = revparse_single('HEAD^') >>> diff(t0, t1) >>> diff('HEAD', 'HEAD^') # equivalent If you want to diff a tree against an empty tree, use the low level API (Tree.diff_to_tree()) directly. """ def whatever_to_tree_or_blob(obj): if obj is None: return None # If it's a string, then it has to be valid revspec if is_string(obj): obj = self.revparse_single(obj) # First we try to get to a blob try: obj = obj.peel(Blob) except Exception: # And if that failed, try to get a tree, raising a type # error if that still doesn't work try: obj = obj.peel(Tree) except Exception: raise TypeError('unexpected "%s"' % type(obj)) return obj a = whatever_to_tree_or_blob(a) b = whatever_to_tree_or_blob(b) opt_keys = ['flags', 'context_lines', 'interhunk_lines'] opt_values = [flags, context_lines, interhunk_lines] # Case 1: Diff tree to tree if isinstance(a, Tree) and isinstance(b, Tree): return a.diff_to_tree(b, **dict(zip(opt_keys, opt_values))) # Case 2: Index to workdir elif a is None and b is None: return self.index.diff_to_workdir(*opt_values) # Case 3: Diff tree to index or workdir elif isinstance(a, Tree) and b is None: if cached: return a.diff_to_index(self.index, *opt_values) else: return a.diff_to_workdir(*opt_values) # Case 4: Diff blob to blob if isinstance(a, Blob) and isinstance(b, Blob): return a.diff(b) raise ValueError("Only blobs and treeish can be diffed") def state_cleanup(self): """Remove all the metadata associated with an ongoing command like merge, revert, cherry-pick, etc. For example: MERGE_HEAD, MERGE_MSG, etc. """ C.git_repository_state_cleanup(self._repo) # # blame # def blame(self, path, flags=None, min_match_characters=None, newest_commit=None, oldest_commit=None, min_line=None, max_line=None): """Return a Blame object for a single file. Arguments: path Path to the file to blame. flags A GIT_BLAME_* constant. min_match_characters The number of alphanum chars that must be detected as moving/copying within a file for it to associate those lines with the parent commit. newest_commit The id of the newest commit to consider. oldest_commit The id of the oldest commit to consider. min_line The first line in the file to blame. max_line The last line in the file to blame. Examples:: repo.blame('foo.c', flags=GIT_BLAME_TRACK_COPIES_SAME_FILE)"); """ options = ffi.new('git_blame_options *') C.git_blame_init_options(options, C.GIT_BLAME_OPTIONS_VERSION) if min_match_characters: options.min_match_characters = min_match_characters if newest_commit: if not isinstance(newest_commit, Oid): newest_commit = Oid(hex=newest_commit) ffi.buffer(ffi.addressof(options, 'newest_commit'))[:] = newest_commit.raw if oldest_commit: if not isinstance(oldest_commit, Oid): oldest_commit = Oid(hex=oldest_commit) ffi.buffer(ffi.addressof(options, 'oldest_commit'))[:] = oldest_commit.raw if min_line: options.min_line = min_line if max_line: options.max_line = max_line cblame = ffi.new('git_blame **') err = C.git_blame_file(cblame, self._repo, to_bytes(path), options) check_error(err) return Blame._from_c(self, cblame[0]) # # Index # @property def index(self): """Index representing the repository's index file.""" cindex = ffi.new('git_index **') err = C.git_repository_index(cindex, self._repo) check_error(err, True) return Index.from_c(self, cindex) # # Merging # @staticmethod def _merge_options(favor): """Return a 'git_merge_opts *'""" def favor_to_enum(favor): if favor == 'normal': return C.GIT_MERGE_FILE_FAVOR_NORMAL elif favor == 'ours': return C.GIT_MERGE_FILE_FAVOR_OURS elif favor == 'theirs': return C.GIT_MERGE_FILE_FAVOR_THEIRS elif favor == 'union': return C.GIT_MERGE_FILE_FAVOR_UNION else: return None favor_val = favor_to_enum(favor) if favor_val is None: raise ValueError("unkown favor value %s" % favor) opts = ffi.new('git_merge_options *') err = C.git_merge_init_options(opts, C.GIT_MERGE_OPTIONS_VERSION) check_error(err) opts.file_favor = favor_val return opts def merge_file_from_index(self, ancestor, ours, theirs): """Merge files from index. Return a string with the merge result containing possible conflicts. ancestor The index entry which will be used as a common ancestor. ours The index entry to take as "ours" or base. theirs The index entry which will be merged into "ours" """ cmergeresult = ffi.new('git_merge_file_result *') cancestor, ancestor_str_ref = ( ancestor._to_c() if ancestor is not None else (ffi.NULL, ffi.NULL)) cours, ours_str_ref = ( ours._to_c() if ours is not None else (ffi.NULL, ffi.NULL)) ctheirs, theirs_str_ref = ( theirs._to_c() if theirs is not None else (ffi.NULL, ffi.NULL)) err = C.git_merge_file_from_index( cmergeresult, self._repo, cancestor, cours, ctheirs, ffi.NULL); check_error(err) ret = ffi.string(cmergeresult.ptr, cmergeresult.len).decode('utf-8') C.git_merge_file_result_free(cmergeresult) return ret def merge_commits(self, ours, theirs, favor='normal'): """Merge two arbitrary commits Arguments: ours The commit to take as "ours" or base. theirs The commit which will be merged into "ours" favor How to deal with file-level conflicts. Can be one of * normal (default). Conflicts will be preserved. * ours. The "ours" side of the conflict region is used. * theirs. The "theirs" side of the conflict region is used. * union. Unique lines from each side will be used. for all but NORMAL, the index will not record a conflict. Both "ours" and "theirs" can be any object which peels to a commit or the id (string or Oid) of an object which peels to a commit. Returns an index with the result of the merge """ ours_ptr = ffi.new('git_commit **') theirs_ptr = ffi.new('git_commit **') cindex = ffi.new('git_index **') if is_string(ours) or isinstance(ours, Oid): ours = self[ours] if is_string(theirs) or isinstance(theirs, Oid): theirs = self[theirs] ours = ours.peel(Commit) theirs = theirs.peel(Commit) opts = self._merge_options(favor) ffi.buffer(ours_ptr)[:] = ours._pointer[:] ffi.buffer(theirs_ptr)[:] = theirs._pointer[:] err = C.git_merge_commits(cindex, self._repo, ours_ptr[0], theirs_ptr[0], opts) check_error(err) return Index.from_c(self, cindex) def merge_trees(self, ancestor, ours, theirs, favor='normal'): """Merge two trees Arguments: ancestor The tree which is the common ancestor between 'ours' and 'theirs' ours The commit to take as "ours" or base. theirs The commit which will be merged into "ours" favor How to deal with file-level conflicts. Can be one of * normal (default). Conflicts will be preserved. * ours. The "ours" side of the conflict region is used. * theirs. The "theirs" side of the conflict region is used. * union. Unique lines from each side will be used. for all but NORMAL, the index will not record a conflict. Returns an Index that reflects the result of the merge. """ ancestor_ptr = ffi.new('git_tree **') ours_ptr = ffi.new('git_tree **') theirs_ptr = ffi.new('git_tree **') cindex = ffi.new('git_index **') if is_string(ancestor) or isinstance(ancestor, Oid): ancestor = self[ancestor] if is_string(ours) or isinstance(ours, Oid): ours = self[ours] if is_string(theirs) or isinstance(theirs, Oid): theirs = self[theirs] ancestor = ancestor.peel(Tree) ours = ours.peel(Tree) theirs = theirs.peel(Tree) opts = self._merge_options(favor) ffi.buffer(ancestor_ptr)[:] = ancestor._pointer[:] ffi.buffer(ours_ptr)[:] = ours._pointer[:] ffi.buffer(theirs_ptr)[:] = theirs._pointer[:] err = C.git_merge_trees(cindex, self._repo, ancestor_ptr[0], ours_ptr[0], theirs_ptr[0], opts) check_error(err) return Index.from_c(self, cindex) # # Utility for writing a tree into an archive # def write_archive(self, treeish, archive, timestamp=None, prefix=''): """Write treeish into an archive If no timestamp is provided and 'treeish' is a commit, its committer timestamp will be used. Otherwise the current time will be used. All path names in the archive are added to 'prefix', which defaults to an empty string. Arguments: treeish The treeish to write. archive An archive from the 'tarfile' module timestamp Timestamp to use for the files in the archive. prefix Extra prefix to add to the path names in the archive. Example:: >>> import tarfile, pygit2 >>>> with tarfile.open('foo.tar', 'w') as archive: >>>> repo = pygit2.Repsitory('.') >>>> repo.write_archive(archive, repo.head.target) """ # Try to get a tree form whatever we got if isinstance(treeish, Tree): tree = treeish if isinstance(treeish, Oid) or is_string(treeish): treeish = self[treeish] # if we don't have a timestamp, try to get it from a commit if not timestamp: try: commit = treeish.peel(Commit) timestamp = commit.committer.time except Exception: pass # as a last resort, use the current timestamp if not timestamp: timestamp = int(time()) tree = treeish.peel(Tree) index = Index() index.read_tree(tree) for entry in index: content = self[entry.id].read_raw() info = tarfile.TarInfo(prefix + entry.path) info.size = len(content) info.mtime = timestamp info.uname = info.gname = 'root' # just because git does this if entry.mode == GIT_FILEMODE_LINK: info.type = archive.SYMTYPE info.linkname = content info.mode = 0o777 # symlinks get placeholder info.size = 0 archive.addfile(info) else: archive.addfile(info, StringIO(content)) # # Ahead-behind, which mostly lives on its own namespace # def ahead_behind(self, local, upstream): """Calculate how many different commits are in the non-common parts of the history between the two given ids. Ahead is how many commits are in the ancestry of the 'local' commit which are not in the 'upstream' commit. Behind is the opposite. Arguments local The commit which is considered the local or current state upstream The commit which is considered the upstream Returns a tuple of two integers with the number of commits ahead and behind respectively. """ if not isinstance(local, Oid): local = self.expand_id(local) if not isinstance(upstream, Oid): upstream = self.expand_id(upstream) ahead, behind = ffi.new('size_t*'), ffi.new('size_t*') oid1, oid2 = ffi.new('git_oid *'), ffi.new('git_oid *') ffi.buffer(oid1)[:] = local.raw[:] ffi.buffer(oid2)[:] = upstream.raw[:] err = C.git_graph_ahead_behind(ahead, behind, self._repo, oid1, oid2) check_error(err) return int(ahead[0]), int(behind[0]) # # Git attributes # def get_attr(self, path, name, flags=0): """Retrieve an attribute for a file by path Arguments path The path of the file to look up attributes for, relative to the workdir root name The name of the attribute to look up flags A combination of GIT_ATTR_CHECK_ flags which determine the lookup order Returns either a boolean, None (if the value is unspecified) or string with the value of the attribute. """ cvalue = ffi.new('char **') err = C.git_attr_get(cvalue, self._repo, flags, to_bytes(path), to_bytes(name)) check_error(err) # Now let's see if we can figure out what the value is attr_kind = C.git_attr_value(cvalue[0]) if attr_kind == C.GIT_ATTR_UNSPECIFIED_T: return None elif attr_kind == C.GIT_ATTR_TRUE_T: return True elif attr_kind == C.GIT_ATTR_FALSE_T: return False elif attr_kind == C.GIT_ATTR_VALUE_T: return ffi.string(cvalue[0]).decode('utf-8') assert False, "the attribute value from libgit2 is invalid" # # Identity for reference operations # @property def ident(self): cname = ffi.new('char **') cemail = ffi.new('char **') err = C.git_repository_ident(cname, cemail, self._repo) check_error(err) return (ffi.string(cname).decode('utf-8'), ffi.string(cemail).decode('utf-8')) def set_ident(self, name, email): """Set the identity to be used for reference operations Updates to some references also append data to their reflog. You can use this method to set what identity will be used. If none is set, it will be read from the configuration. """ err = C.git_repository_set_ident(self._repo, to_bytes(name), to_bytes(email)) check_error(err)