Bug 1241989 - Speed up install manifests by avoiding path operations. r=gps

This is two straightforward optimizations in FileCopier: avoiding a redundant iteration
over the directory structure to find destination files (which includes a
call to normpath) and avoiding redundant calls to determine directories to preserve
when remove_unaccounted is not specified (which include a call to dirname).

Running a no-op install of _tests with this patch results in a reduction of about
25,000 calls to normpath and remove about 220,000 calls to dirname, resulting in
an overall speedup of 10-20%.
This commit is contained in:
Chris Manchester 2016-01-22 13:54:33 -08:00
parent cc00ec47c4
commit f379d17a7e

View File

@ -285,11 +285,6 @@ class FileCopier(FileRegistry):
# friends.
required_dirs = set([destination])
dest_files = set()
for p, f in self:
dest_files.add(os.path.normpath(os.path.join(destination, p)))
required_dirs |= set(os.path.normpath(os.path.join(destination, d))
for d in self.required_directories())
@ -377,10 +372,12 @@ class FileCopier(FileRegistry):
existing_files.add(os.path.normpath(os.path.join(root, f)))
# Now we reconcile the state of the world against what we want.
dest_files = set()
# Install files.
for p, f in self:
destfile = os.path.normpath(os.path.join(destination, p))
dest_files.add(destfile)
if f.copy(destfile, skip_if_older):
result.updated_files.add(destfile)
else:
@ -412,20 +409,19 @@ class FileCopier(FileRegistry):
# Then don't remove directories if we didn't remove unaccounted files
# and one of those files exists.
if not remove_unaccounted:
parents = set()
pathsep = os.path.sep
for f in existing_files:
parent = f
previous = ''
parents = set()
path = f
while True:
parent = os.path.dirname(parent)
parents.add(parent)
if previous == parent:
# All the paths are normalized and relative by this point,
# so os.path.dirname would only do extra work.
dirname = path.rpartition(pathsep)[0]
if dirname in parents:
break
previous = parent
remove_dirs -= parents
parents.add(dirname)
path = dirname
remove_dirs -= parents
# Remove empty directories that aren't required.
for d in sorted(remove_dirs, key=len, reverse=True):