2013-01-23 02:23:14 -08:00
|
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
|
|
|
|
import sys
|
|
|
|
import hashlib
|
|
|
|
from mozpack.packager.unpack import UnpackFinder
|
2013-02-02 22:18:55 -08:00
|
|
|
from collections import OrderedDict
|
2013-01-23 02:23:14 -08:00
|
|
|
|
|
|
|
'''
|
|
|
|
Find files duplicated in a given packaged directory, independently of its
|
|
|
|
package format.
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
|
|
def find_dupes(source):
|
|
|
|
md5s = OrderedDict()
|
|
|
|
for p, f in UnpackFinder(source):
|
|
|
|
content = f.open().read()
|
|
|
|
m = hashlib.md5(content).digest()
|
|
|
|
if not m in md5s:
|
|
|
|
md5s[m] = (len(content), [])
|
|
|
|
md5s[m][1].append(p)
|
|
|
|
total = 0
|
|
|
|
num_dupes = 0
|
|
|
|
for m, (size, paths) in md5s.iteritems():
|
|
|
|
if len(paths) > 1:
|
|
|
|
print 'Duplicates %d bytes%s:' % (size,
|
|
|
|
' (%d times)' % (len(paths) - 1) if len(paths) > 2 else '')
|
|
|
|
print ''.join(' %s\n' % p for p in paths)
|
|
|
|
total += (len(paths) - 1) * size
|
|
|
|
num_dupes += 1
|
|
|
|
if num_dupes:
|
|
|
|
print "WARNING: Found %d duplicated files taking %d bytes" % \
|
|
|
|
(num_dupes, total) + " (uncompressed)"
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
if len(sys.argv) != 2:
|
|
|
|
import os
|
|
|
|
print >>sys.stderr, "Usage: %s directory" % \
|
|
|
|
os.path.basename(sys.argv[0])
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
find_dupes(sys.argv[1])
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|