264 lines
9.8 KiB
Python
264 lines
9.8 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
# The LLVM Compiler Infrastructure
|
||
|
#
|
||
|
# This file is distributed under the University of Illinois Open Source
|
||
|
# License. See LICENSE.TXT for details.
|
||
|
""" This module is responsible to capture the compiler invocation of any
|
||
|
build process. The result of that should be a compilation database.
|
||
|
|
||
|
This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES
|
||
|
mechanisms provided by the dynamic linker. The related library is implemented
|
||
|
in C language and can be found under 'libear' directory.
|
||
|
|
||
|
The 'libear' library is capturing all child process creation and logging the
|
||
|
relevant information about it into separate files in a specified directory.
|
||
|
The parameter of this process is the output directory name, where the report
|
||
|
files shall be placed. This parameter is passed as an environment variable.
|
||
|
|
||
|
The module also implements compiler wrappers to intercept the compiler calls.
|
||
|
|
||
|
The module implements the build command execution and the post-processing of
|
||
|
the output files, which will condensates into a compilation database. """
|
||
|
|
||
|
import sys
|
||
|
import os
|
||
|
import os.path
|
||
|
import re
|
||
|
import itertools
|
||
|
import json
|
||
|
import glob
|
||
|
import logging
|
||
|
from libear import build_libear, TemporaryDirectory
|
||
|
from libscanbuild import command_entry_point, compiler_wrapper, \
|
||
|
wrapper_environment, run_command, run_build
|
||
|
from libscanbuild import duplicate_check
|
||
|
from libscanbuild.compilation import split_command
|
||
|
from libscanbuild.arguments import parse_args_for_intercept_build
|
||
|
from libscanbuild.shell import encode, decode
|
||
|
|
||
|
__all__ = ['capture', 'intercept_build', 'intercept_compiler_wrapper']
|
||
|
|
||
|
GS = chr(0x1d)
|
||
|
RS = chr(0x1e)
|
||
|
US = chr(0x1f)
|
||
|
|
||
|
COMPILER_WRAPPER_CC = 'intercept-cc'
|
||
|
COMPILER_WRAPPER_CXX = 'intercept-c++'
|
||
|
TRACE_FILE_EXTENSION = '.cmd' # same as in ear.c
|
||
|
WRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'})
|
||
|
|
||
|
|
||
|
@command_entry_point
|
||
|
def intercept_build():
|
||
|
""" Entry point for 'intercept-build' command. """
|
||
|
|
||
|
args = parse_args_for_intercept_build()
|
||
|
return capture(args)
|
||
|
|
||
|
|
||
|
def capture(args):
|
||
|
""" The entry point of build command interception. """
|
||
|
|
||
|
def post_processing(commands):
|
||
|
""" To make a compilation database, it needs to filter out commands
|
||
|
which are not compiler calls. Needs to find the source file name
|
||
|
from the arguments. And do shell escaping on the command.
|
||
|
|
||
|
To support incremental builds, it is desired to read elements from
|
||
|
an existing compilation database from a previous run. These elements
|
||
|
shall be merged with the new elements. """
|
||
|
|
||
|
# create entries from the current run
|
||
|
current = itertools.chain.from_iterable(
|
||
|
# creates a sequence of entry generators from an exec,
|
||
|
format_entry(command) for command in commands)
|
||
|
# read entries from previous run
|
||
|
if 'append' in args and args.append and os.path.isfile(args.cdb):
|
||
|
with open(args.cdb) as handle:
|
||
|
previous = iter(json.load(handle))
|
||
|
else:
|
||
|
previous = iter([])
|
||
|
# filter out duplicate entries from both
|
||
|
duplicate = duplicate_check(entry_hash)
|
||
|
return (entry
|
||
|
for entry in itertools.chain(previous, current)
|
||
|
if os.path.exists(entry['file']) and not duplicate(entry))
|
||
|
|
||
|
with TemporaryDirectory(prefix='intercept-') as tmp_dir:
|
||
|
# run the build command
|
||
|
environment = setup_environment(args, tmp_dir)
|
||
|
exit_code = run_build(args.build, env=environment)
|
||
|
# read the intercepted exec calls
|
||
|
exec_traces = itertools.chain.from_iterable(
|
||
|
parse_exec_trace(os.path.join(tmp_dir, filename))
|
||
|
for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd'))))
|
||
|
# do post processing
|
||
|
entries = post_processing(exec_traces)
|
||
|
# dump the compilation database
|
||
|
with open(args.cdb, 'w+') as handle:
|
||
|
json.dump(list(entries), handle, sort_keys=True, indent=4)
|
||
|
return exit_code
|
||
|
|
||
|
|
||
|
def setup_environment(args, destination):
|
||
|
""" Sets up the environment for the build command.
|
||
|
|
||
|
It sets the required environment variables and execute the given command.
|
||
|
The exec calls will be logged by the 'libear' preloaded library or by the
|
||
|
'wrapper' programs. """
|
||
|
|
||
|
c_compiler = args.cc if 'cc' in args else 'cc'
|
||
|
cxx_compiler = args.cxx if 'cxx' in args else 'c++'
|
||
|
|
||
|
libear_path = None if args.override_compiler or is_preload_disabled(
|
||
|
sys.platform) else build_libear(c_compiler, destination)
|
||
|
|
||
|
environment = dict(os.environ)
|
||
|
environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination})
|
||
|
|
||
|
if not libear_path:
|
||
|
logging.debug('intercept gonna use compiler wrappers')
|
||
|
environment.update(wrapper_environment(args))
|
||
|
environment.update({
|
||
|
'CC': COMPILER_WRAPPER_CC,
|
||
|
'CXX': COMPILER_WRAPPER_CXX
|
||
|
})
|
||
|
elif sys.platform == 'darwin':
|
||
|
logging.debug('intercept gonna preload libear on OSX')
|
||
|
environment.update({
|
||
|
'DYLD_INSERT_LIBRARIES': libear_path,
|
||
|
'DYLD_FORCE_FLAT_NAMESPACE': '1'
|
||
|
})
|
||
|
else:
|
||
|
logging.debug('intercept gonna preload libear on UNIX')
|
||
|
environment.update({'LD_PRELOAD': libear_path})
|
||
|
|
||
|
return environment
|
||
|
|
||
|
|
||
|
@command_entry_point
|
||
|
def intercept_compiler_wrapper():
|
||
|
""" Entry point for `intercept-cc` and `intercept-c++`. """
|
||
|
|
||
|
return compiler_wrapper(intercept_compiler_wrapper_impl)
|
||
|
|
||
|
|
||
|
def intercept_compiler_wrapper_impl(_, execution):
|
||
|
""" Implement intercept compiler wrapper functionality.
|
||
|
|
||
|
It does generate execution report into target directory.
|
||
|
The target directory name is from environment variables. """
|
||
|
|
||
|
message_prefix = 'execution report might be incomplete: %s'
|
||
|
|
||
|
target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR')
|
||
|
if not target_dir:
|
||
|
logging.warning(message_prefix, 'missing target directory')
|
||
|
return
|
||
|
# write current execution info to the pid file
|
||
|
try:
|
||
|
target_file_name = str(os.getpid()) + TRACE_FILE_EXTENSION
|
||
|
target_file = os.path.join(target_dir, target_file_name)
|
||
|
logging.debug('writing execution report to: %s', target_file)
|
||
|
write_exec_trace(target_file, execution)
|
||
|
except IOError:
|
||
|
logging.warning(message_prefix, 'io problem')
|
||
|
|
||
|
|
||
|
def write_exec_trace(filename, entry):
|
||
|
""" Write execution report file.
|
||
|
|
||
|
This method shall be sync with the execution report writer in interception
|
||
|
library. The entry in the file is a JSON objects.
|
||
|
|
||
|
:param filename: path to the output execution trace file,
|
||
|
:param entry: the Execution object to append to that file. """
|
||
|
|
||
|
with open(filename, 'ab') as handler:
|
||
|
pid = str(entry.pid)
|
||
|
command = US.join(entry.cmd) + US
|
||
|
content = RS.join([pid, pid, 'wrapper', entry.cwd, command]) + GS
|
||
|
handler.write(content.encode('utf-8'))
|
||
|
|
||
|
|
||
|
def parse_exec_trace(filename):
|
||
|
""" Parse the file generated by the 'libear' preloaded library.
|
||
|
|
||
|
Given filename points to a file which contains the basic report
|
||
|
generated by the interception library or wrapper command. A single
|
||
|
report file _might_ contain multiple process creation info. """
|
||
|
|
||
|
logging.debug('parse exec trace file: %s', filename)
|
||
|
with open(filename, 'r') as handler:
|
||
|
content = handler.read()
|
||
|
for group in filter(bool, content.split(GS)):
|
||
|
records = group.split(RS)
|
||
|
yield {
|
||
|
'pid': records[0],
|
||
|
'ppid': records[1],
|
||
|
'function': records[2],
|
||
|
'directory': records[3],
|
||
|
'command': records[4].split(US)[:-1]
|
||
|
}
|
||
|
|
||
|
|
||
|
def format_entry(exec_trace):
|
||
|
""" Generate the desired fields for compilation database entries. """
|
||
|
|
||
|
def abspath(cwd, name):
|
||
|
""" Create normalized absolute path from input filename. """
|
||
|
fullname = name if os.path.isabs(name) else os.path.join(cwd, name)
|
||
|
return os.path.normpath(fullname)
|
||
|
|
||
|
logging.debug('format this command: %s', exec_trace['command'])
|
||
|
compilation = split_command(exec_trace['command'])
|
||
|
if compilation:
|
||
|
for source in compilation.files:
|
||
|
compiler = 'c++' if compilation.compiler == 'c++' else 'cc'
|
||
|
command = [compiler, '-c'] + compilation.flags + [source]
|
||
|
logging.debug('formated as: %s', command)
|
||
|
yield {
|
||
|
'directory': exec_trace['directory'],
|
||
|
'command': encode(command),
|
||
|
'file': abspath(exec_trace['directory'], source)
|
||
|
}
|
||
|
|
||
|
|
||
|
def is_preload_disabled(platform):
|
||
|
""" Library-based interposition will fail silently if SIP is enabled,
|
||
|
so this should be detected. You can detect whether SIP is enabled on
|
||
|
Darwin by checking whether (1) there is a binary called 'csrutil' in
|
||
|
the path and, if so, (2) whether the output of executing 'csrutil status'
|
||
|
contains 'System Integrity Protection status: enabled'.
|
||
|
|
||
|
:param platform: name of the platform (returned by sys.platform),
|
||
|
:return: True if library preload will fail by the dynamic linker. """
|
||
|
|
||
|
if platform in WRAPPER_ONLY_PLATFORMS:
|
||
|
return True
|
||
|
elif platform == 'darwin':
|
||
|
command = ['csrutil', 'status']
|
||
|
pattern = re.compile(r'System Integrity Protection status:\s+enabled')
|
||
|
try:
|
||
|
return any(pattern.match(line) for line in run_command(command))
|
||
|
except:
|
||
|
return False
|
||
|
else:
|
||
|
return False
|
||
|
|
||
|
|
||
|
def entry_hash(entry):
|
||
|
""" Implement unique hash method for compilation database entries. """
|
||
|
|
||
|
# For faster lookup in set filename is reverted
|
||
|
filename = entry['file'][::-1]
|
||
|
# For faster lookup in set directory is reverted
|
||
|
directory = entry['directory'][::-1]
|
||
|
# On OS X the 'cc' and 'c++' compilers are wrappers for
|
||
|
# 'clang' therefore both call would be logged. To avoid
|
||
|
# this the hash does not contain the first word of the
|
||
|
# command.
|
||
|
command = ' '.join(decode(entry['command'])[1:])
|
||
|
|
||
|
return '<>'.join([filename, directory, command])
|