mirror of
https://github.com/AdaCore/git-hooks.git
synced 2026-02-12 12:43:11 -08:00
This is preparatory work for being to pass a certain amount of data to user-defined hooks. The intent of this change is to limit the number of times we compute that information to at most once. Generally speaking, the main part of this change consists in adding the following new methods to class CommitInfo: - raw_revlog; - raw_revlog_lines; and The rest of this change is mostly adjustments to the code that needs to access commits' rev logs to get them from a shared CommitInfo object rather than from a play revision (SHA1). Additionally, the function is_revert_commit in git.py, which took a commit revision as a paramenter and needed a call to "git log" to get the commit's body, has been replaced by a new method in class CommitInfo. An alternative approach might have been to keep the function, and change its parameter to be a CommitInfo object. But it seemed more natural to make this a method of the CommitInfo class instead, so this is what this change does. Change-Id: Ia4bf23f24226d1e9eddafc61afd37db37f0f5287 TN: T209-005
512 lines
18 KiB
Python
512 lines
18 KiB
Python
# Utility functions for git
|
|
#
|
|
# Derived in a very large part from the gnome git hooks, themselves
|
|
# apparently adapted form git-bz.
|
|
#
|
|
# Original copyright header:
|
|
#
|
|
# | Copyright (C) 2008 Owen Taylor
|
|
# | Copyright (C) 2009 Red Hat, Inc
|
|
# |
|
|
# | This program is free software; you can redistribute it and/or
|
|
# | modify it under the terms of the GNU General Public License
|
|
# | as published by the Free Software Foundation; either version 2
|
|
# | of the License, or (at your option) any later version.
|
|
# |
|
|
# | This program is distributed in the hope that it will be useful,
|
|
# | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# | GNU General Public License for more details.
|
|
# |
|
|
# | You should have received a copy of the GNU General Public License
|
|
# | along with this program; if not, If not, see
|
|
# | http://www.gnu.org/licenses/.
|
|
# |
|
|
# | (These are adapted from git-bz)
|
|
|
|
import os
|
|
import re
|
|
from subprocess import Popen, PIPE, STDOUT
|
|
import subprocess
|
|
|
|
|
|
class CalledProcessError(subprocess.CalledProcessError):
|
|
"""An exception raised in case of failure in this module.
|
|
"""
|
|
# Initially, defining this exception here was a way to shield
|
|
# the script from the fact that subprocess.CalledProcessError
|
|
# is not defined in Python 2.4. So the exception was simply
|
|
# a clone of the exception defined in subprocess.
|
|
#
|
|
# But we now require Python 2.7 or later, so this exception
|
|
# is now guarantied to be available. However, for convenience
|
|
# of use (users of this module then need not import symbols from
|
|
# module subprocess), make that class an identical child.
|
|
pass
|
|
|
|
|
|
def git_run(command, *args, **kwargs):
|
|
"""Run a git command.
|
|
|
|
PARAMETERS
|
|
Non-keyword arguments are passed verbatim as command line arguments
|
|
Keyword arguments are turned into command line options
|
|
<name>=True => --<name>
|
|
<name>='<str>' => --<name>=<str>
|
|
Special keyword arguments:
|
|
_cwd=<str>: Run the git command from the given directory.
|
|
_env=<dict>: Same as the "env" parameter of the Popen constructor.
|
|
_input=<str>: Feed <str> to stdinin of the command
|
|
_outfile=<file): Use <file> as the output file descriptor
|
|
_split_lines: Return an array with one string per returned line
|
|
"""
|
|
to_run = ['git', command.replace("_", "-")]
|
|
|
|
cwd = None
|
|
env = None
|
|
input = None
|
|
outfile = None
|
|
do_split_lines = False
|
|
for (k, v) in kwargs.iteritems():
|
|
if k == '_cwd':
|
|
cwd = v
|
|
elif k == '_env':
|
|
env = v
|
|
elif k == '_input':
|
|
input = v
|
|
elif k == '_outfile':
|
|
outfile = v
|
|
elif k == '_split_lines':
|
|
do_split_lines = True
|
|
elif v is True:
|
|
if len(k) == 1:
|
|
to_run.append("-" + k)
|
|
else:
|
|
to_run.append("--" + k.replace("_", "-"))
|
|
else:
|
|
to_run.append("--" + k.replace("_", "-") + "=" + v)
|
|
|
|
to_run.extend(args)
|
|
|
|
stdout = outfile if outfile else PIPE
|
|
stdin = None if input is None else PIPE
|
|
|
|
process = Popen(to_run, stdout=stdout, stderr=STDOUT, stdin=stdin,
|
|
cwd=cwd, env=env)
|
|
output, error = process.communicate(input)
|
|
# We redirected stderr to the same fd as stdout, so error should
|
|
# not contain anything.
|
|
assert not error
|
|
|
|
if process.returncode != 0:
|
|
raise CalledProcessError(process.returncode,
|
|
" ".join(to_run),
|
|
output)
|
|
|
|
if outfile:
|
|
return None
|
|
else:
|
|
# Strip any trailing whitespaces and newlines at the end of
|
|
# the output. This is because Git commands often add an extra
|
|
# newline at the end of the data we're querying.
|
|
output = output.rstrip()
|
|
|
|
if do_split_lines:
|
|
return output.splitlines()
|
|
else:
|
|
return output
|
|
|
|
|
|
class Git:
|
|
"""Wrapper to allow us to do git.<command>(...) instead of git_run()
|
|
|
|
One difference: The `_outfile' parameter may be a string, in which
|
|
case the output is redirected to that file (if the file is already
|
|
present, it is overwritten).
|
|
"""
|
|
def __getattr__(self, command):
|
|
def f(*args, **kwargs):
|
|
try:
|
|
# If a string _outfile parameter was given, turn it
|
|
# into a file descriptor.
|
|
tmp_fd = None
|
|
if (('_outfile' in kwargs and
|
|
isinstance(kwargs['_outfile'], basestring))):
|
|
tmp_fd = open(kwargs['_outfile'], 'w')
|
|
kwargs['_outfile'] = tmp_fd
|
|
return git_run(command, *args, **kwargs)
|
|
finally:
|
|
if tmp_fd is not None:
|
|
tmp_fd.close()
|
|
return f
|
|
|
|
|
|
git = Git()
|
|
|
|
|
|
def get_git_dir():
|
|
"""Return the full path to the repository's .git directory.
|
|
|
|
This function is just a convenient short-cut for running
|
|
"git rev-parse --git-dir", with an abspath call added to make
|
|
sure that the returned path is always absolute.
|
|
|
|
REMARK
|
|
For bare repositories, there is no .git/ subdirectory.
|
|
In that case, the function returns the equivalent, which
|
|
is the path of the repository itself.
|
|
"""
|
|
# Note: The abspath call seems to be needed when calling
|
|
# git either from the repository root dir (in which case
|
|
# it returns either '.' or '.git' depending on whether
|
|
# this is a bare repository or not), or when calling it
|
|
# from the .git directory itself (in which case it returns
|
|
# '.').
|
|
return os.path.abspath(git.rev_parse(git_dir=True))
|
|
|
|
|
|
def is_null_rev(rev):
|
|
"""Return True iff rev is the a NULL commit SHA1.
|
|
"""
|
|
return re.match("0+$", rev) is not None
|
|
|
|
|
|
def empty_tree_rev():
|
|
"""Return the empty tree's SHA1.
|
|
|
|
This is a SHA1 one can use as the parent of a commit that
|
|
does not have a parent (root commit).
|
|
"""
|
|
# To compute this SHA1 requires a call to git, so cache
|
|
# the result in an attribute called 'cached_rev'.
|
|
if not hasattr(empty_tree_rev, 'cached_rev'):
|
|
empty_tree_rev.cached_rev = git.mktree(_input='')
|
|
return empty_tree_rev.cached_rev
|
|
|
|
|
|
def is_valid_commit(rev):
|
|
"""Return True if rev is a valid commit.
|
|
|
|
PARAMETERS
|
|
rev: The commit SHA1 we want to test.
|
|
"""
|
|
try:
|
|
git.cat_file('-e', rev)
|
|
return True
|
|
except CalledProcessError:
|
|
return False
|
|
|
|
|
|
def get_object_type(rev):
|
|
"""Determine the object type of the given commit.
|
|
|
|
PARAMETERS
|
|
rev: The commit SHA1 that we want to inspect.
|
|
|
|
RETURN VALUE
|
|
The string returned by "git cat-file -t REV", or else "delete"
|
|
if REV is a null SHA1 (all zeroes).
|
|
"""
|
|
if is_null_rev(rev):
|
|
rev_type = "delete"
|
|
else:
|
|
rev_type = git.cat_file(rev, t=True)
|
|
return rev_type
|
|
|
|
|
|
def commit_rev(rev):
|
|
"""Resolve rev into a commit revision (SHA1).
|
|
|
|
For commit revs, this is a no-op. But of other types of revisions
|
|
(such as a tag, for instance), this resolves the tag into the actual
|
|
object it points to.
|
|
|
|
PARAMETERS
|
|
rev: A revision.
|
|
"""
|
|
return git.rev_list('-n1', rev)
|
|
|
|
|
|
def commit_oneline(rev):
|
|
"""Return a short one-line summary of the commit.
|
|
|
|
PARAMETERS
|
|
rev: A commit revision (SHA1).
|
|
"""
|
|
info = git.rev_list(rev, max_count='1', oneline=True)
|
|
(short_rev, subject) = info.split(None, 1)
|
|
return "%s... %s" % (short_rev, subject[0:59])
|
|
|
|
|
|
def get_module_name():
|
|
"""Return a short identifer name for the git repository.
|
|
|
|
The identifier name is determined using the directory name where
|
|
the git repository is stored, with the .git suffix stripped.
|
|
"""
|
|
absdir = get_git_dir()
|
|
if absdir.endswith(os.sep + '.git'):
|
|
absdir = os.path.dirname(absdir)
|
|
projectshort = os.path.basename(absdir)
|
|
if projectshort.endswith(".git"):
|
|
projectshort = projectshort[:-4]
|
|
|
|
return projectshort
|
|
|
|
|
|
def file_exists(commit_rev, filename):
|
|
"""Return True if a file exists for a given commit.
|
|
|
|
PARAMETERS
|
|
commit_rev: The commit to inspect.
|
|
filename: The filename to search for in the given commit_rev.
|
|
The file name must be relative to the repository's root dir.
|
|
|
|
RETURN VALUE
|
|
A boolean.
|
|
"""
|
|
try:
|
|
git.cat_file('-e', '%s:%s' % (commit_rev, filename))
|
|
except CalledProcessError:
|
|
# cat-file -e returned non-zero; the file does not exist.
|
|
return False
|
|
return True
|
|
|
|
|
|
def parse_tag_object(tag_name):
|
|
"""Return a dictionary providing info on an annotated tag.
|
|
|
|
The behavior of this function is undefined if tag_name is not
|
|
a valid annotated tag.
|
|
|
|
PARAMETERS
|
|
tag_name: The name of the tag. It can be the "short" tag name
|
|
(Eg: "some-tag"), or the reference name (/refs/tags/some-tag,
|
|
for instance).
|
|
|
|
RETURN VALUE
|
|
A dictionary with the following keys:
|
|
'tagger': The name of the user who created the tag.
|
|
'date': The date the tag was created.
|
|
'message': The revision log used when creating the tag.
|
|
'signed_p': True if the tag was signed, False otherwise.
|
|
"""
|
|
# Provide default values for certain fields.
|
|
result = {'tagger': '*** Failed to determine tagger ***',
|
|
'date': '*** Failed to determine tag creation date ***',
|
|
'signed_p': False}
|
|
|
|
# We used to be able to extract everything we need about the tag
|
|
# from the output of "git cat-file -p". Unfortunately, at least
|
|
# as of git version 1.8.3.2, the date is no longer pretty-printed,
|
|
# giving us now a timestamp and a TZ (Eg: '1340722274 -0700')
|
|
# instead of a human-readable date (Eg: 'Tue Jun 26 07:51:14 2012
|
|
# -0700').
|
|
#
|
|
# This seems to be a deliberate change, and attempts to find
|
|
# a way to either get git to pretty-print that timestamp have
|
|
# failed. Attempts to convert that timestamp ourselves have
|
|
# also failed; in the example above we get a translation which
|
|
# appears to be off by an odd number of hours: '18:51:14 -0700'
|
|
# instead of '07:51:14 -0700'. The difference of 11 hours is
|
|
# odd.
|
|
#
|
|
# After having wasted a certain amount of time, it seems to me
|
|
# that the only practical solution is to get git to pretty-print
|
|
# the timestamp. The only way I found to inspect the tag itself
|
|
# was via "git show". "git show" prints the tagger and date fine,
|
|
# as well as the tag's revision log. But it follows the tag
|
|
# description with a description of the tagged commit (the same
|
|
# we'd get if we did "git show" of that commit). That part makes
|
|
# the extraction of the tag's revision log a little harder.
|
|
# On top of that, trying to touch the output via the --format
|
|
# command-line option in order to facilitate a bit the parsing
|
|
# immediately results in the "Date:" field disappearing from
|
|
# the tag section! ARGH!
|
|
#
|
|
# Rather than add more heuristics about how the commit's section
|
|
# starts, we'll limit the extract from the output of "git show"
|
|
# to the tagger and date fields only. And we will overcome the
|
|
# rev-log/signature extraction issue by calling "git cat-file"
|
|
# (as we used to do before).
|
|
|
|
for line in git.show(tag_name, _split_lines=True):
|
|
if line.strip() == '':
|
|
break
|
|
elif line.startswith('Tagger:'):
|
|
result['tagger'] = line.partition(':')[2].strip()
|
|
elif line.startswith('Date:'):
|
|
result['date'] = line.partition(':')[2].strip()
|
|
|
|
# Now, get the revision log using "git cat-file -p".
|
|
#
|
|
# The first section contains information about the tag, such as
|
|
# the tag name, type, and tagger. We have already collected
|
|
# that information above, so skip it (we know that it ends with
|
|
# an empty line).
|
|
#
|
|
# The second section contains the revision history, optionally
|
|
# followed by the PGP signature (if the tag was signed).
|
|
|
|
revision_log = []
|
|
section_no = 1
|
|
|
|
for line in git.cat_file(tag_name, p=True, _split_lines=True):
|
|
if section_no == 1:
|
|
if line.strip() == "":
|
|
# We have reached the end of this section, moving on
|
|
# to the next.
|
|
section_no += 1
|
|
continue
|
|
else:
|
|
if line.startswith('-----BEGIN PGP SIGNATURE-----'):
|
|
result['signed_p'] = True
|
|
# We don't want to include the PGP signature in
|
|
# the message, and we know there isn't anything else
|
|
# after the PGP signature, so we're done.
|
|
break
|
|
revision_log.append(line)
|
|
result['message'] = "\n".join([" " + line for line in revision_log])
|
|
|
|
return result
|
|
|
|
|
|
def git_show_ref(*args):
|
|
"""Call "git show-ref [args]" and return the result as a dictionary.
|
|
|
|
The key of the dictionary is the reference name, and the value
|
|
is a string containing the reference's rev (SHA1).
|
|
|
|
This function assumes that all arguments are valid, and
|
|
the usual CalledProcessError will be raised if not.
|
|
|
|
PARAMETERS
|
|
*args: Each argument is passed to the "git show-ref"
|
|
as a pattern.
|
|
|
|
RETURN VALUE
|
|
A dictionary of references that matched the given patterns,
|
|
minus the references matching the hooks.ignore-refs config.
|
|
"""
|
|
# We cannot import that at module level, because module config
|
|
# actually depends on this module. So we import it here instead.
|
|
from config import git_config
|
|
|
|
matching_refs = git.show_ref(*args, _split_lines=True)
|
|
result = {}
|
|
for ref_info in matching_refs:
|
|
rev, ref = ref_info.split(None, 2)
|
|
result[ref] = rev
|
|
|
|
# Remove all references which matching the hooks.ignore-refs config.
|
|
#
|
|
# It would probably have been more efficient to check the reference
|
|
# against the exclusion list before adding them to the dictionary.
|
|
# I felt that the resulting code was harder to read. Given the
|
|
# typical number of entries, the impact should be barely measurable.
|
|
ignore_refs_list = [regex.strip()
|
|
for regex in git_config('hooks.ignore-refs')]
|
|
|
|
for ref_name in result.keys():
|
|
for ignore_ref_re in ignore_refs_list:
|
|
if re.match(ignore_ref_re, ref_name):
|
|
del result[ref_name]
|
|
break
|
|
|
|
return result
|
|
|
|
|
|
def commit_parents(rev):
|
|
"""Return the commit parents.
|
|
|
|
PARAMETERS
|
|
rev: The revision for which the parents need to be computed.
|
|
|
|
RETURN VALUE
|
|
A list of revisions corresponding to each parent, ordered
|
|
(ie: the first parent is first on the list, etc). If this is
|
|
a headeless commit, return an empty list.
|
|
"""
|
|
return git.log('-n1', '--pretty=format:%P', rev).strip().split()
|
|
|
|
|
|
def commit_subject(rev):
|
|
"""Return the commit's subject.
|
|
|
|
PARAMETERS
|
|
rev: A commit revision.
|
|
"""
|
|
info = git.rev_list(rev, max_count='1', oneline=True)
|
|
_, subject = info.split(None, 1)
|
|
return subject
|
|
|
|
|
|
def diff_tree(*args, **kwargs):
|
|
"""Same as git.diff_tree, but handling weird filenames properly.
|
|
|
|
When the diff-tree output lists some files whose name contain
|
|
some unusual characters (double-quote, tabs, newlines, backslashes),
|
|
the filename is quoted, and those special characters are
|
|
escaped. This function provides an interface to "git diff-tree"
|
|
which handles everything.
|
|
|
|
PARAMETERS
|
|
Same as with git.diff_tree.
|
|
*** NOTE *** Do not use _split_lines. It is useless in this case,
|
|
and would likely interfere with this implementation.
|
|
|
|
RETURN VALUE
|
|
A list, with one element per file modified. Each element
|
|
is a 6-element tuple, organized as follow:
|
|
(old_mode, new_mode, old_sha1, new_sha1, status, filename)
|
|
"""
|
|
assert '_split_lines' not in kwargs, \
|
|
'git.py::diff_tree should never be called with _split_lines'
|
|
|
|
# To avoid having to deal with the parsing of quoted filenames,
|
|
# we use the -z option of "git diff-tree". What this does is
|
|
# that it separates the filename from the rest of the data
|
|
# using the NUL character instead of a space or newline.
|
|
#
|
|
# To parse the output, we split it at each NUL character.
|
|
# This means that the output gets split into a sequence of
|
|
# pairs of lines, with the first line containing the information
|
|
# about a given file, and the line following it containing
|
|
# the name of the file.
|
|
diff_data = git.diff_tree('-z', *args, **kwargs).split('\x00')
|
|
|
|
# When doing a "git diff-tree" with a single tree-ish, the output
|
|
# starts with the hash of what is being compared. We're not
|
|
# interested in this piece of information, so strip it.
|
|
if diff_data and diff_data[0] and not diff_data[0].startswith(':'):
|
|
assert re.match('[0-9a-fA-F]+$', diff_data[0]) is not None
|
|
diff_data.pop(0)
|
|
|
|
if len(diff_data) % 2 == 1 and not diff_data[-1]:
|
|
# Each filename ends with a NUL character, so the terminating
|
|
# NUL character in the last entry caused the split to add
|
|
# one empty element at the end. This is expected, so just
|
|
# remove it.
|
|
diff_data.pop()
|
|
|
|
# As per the above, we should now have an even number of elements
|
|
# in our list.
|
|
assert len(diff_data) % 2 == 0
|
|
|
|
result = []
|
|
while diff_data:
|
|
stats = diff_data.pop(0)
|
|
filename = diff_data.pop(0)
|
|
|
|
# The stats line should start with a colon and then be followed
|
|
# by space-separated information about the changes made to our
|
|
# file. Strip that colon before we do the splitting.
|
|
assert stats.startswith(':')
|
|
stats = stats[1:]
|
|
|
|
(old_mode, new_mode, old_sha1, new_sha1, status) = stats.split(None, 4)
|
|
result.append((old_mode, new_mode, old_sha1, new_sha1, status,
|
|
filename))
|
|
|
|
return result
|