# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. import sys, os, subprocess, struct, re local_file_header = [ ("signature", "uint32"), ("min_version", "uint16"), ("general_flag", "uint16"), ("compression", "uint16"), ("lastmod_time", "uint16"), ("lastmod_date", "uint16"), ("crc32", "uint32"), ("compressed_size", "uint32"), ("uncompressed_size", "uint32"), ("filename_size", "uint16"), ("extra_field_size", "uint16"), ("filename", "filename_size"), ("extra_field", "extra_field_size"), ("data", "compressed_size") ] cdir_entry = [ ("signature", "uint32"), ("creator_version", "uint16"), ("min_version", "uint16"), ("general_flag", "uint16"), ("compression", "uint16"), ("lastmod_time", "uint16"), ("lastmod_date", "uint16"), ("crc32", "uint32"), ("compressed_size", "uint32"), ("uncompressed_size", "uint32"), ("filename_size", "uint16"), ("extrafield_size", "uint16"), ("filecomment_size", "uint16"), ("disknum", "uint16"), ("internal_attr", "uint16"), ("external_attr", "uint32"), ("offset", "uint32"), ("filename", "filename_size"), ("extrafield", "extrafield_size"), ("filecomment", "filecomment_size"), ] cdir_end = [ ("signature", "uint32"), ("disk_num", "uint16"), ("cdir_disk", "uint16"), ("disk_entries", "uint16"), ("cdir_entries", "uint16"), ("cdir_size", "uint32"), ("cdir_offset", "uint32"), ("comment_size", "uint16"), ] type_mapping = { "uint32":"I", "uint16":"H"} def format_struct (format): string_fields = {} fmt = "<" for (name,value) in iter(format): try: fmt += type_mapping[value][0] except KeyError: string_fields[name] = value return (fmt, string_fields) def size_of(format): return struct.calcsize(format_struct(format)[0]) class MyStruct: def __init__(self, format, string_fields): self.__dict__["struct_members"] = {} self.__dict__["format"] = format self.__dict__["string_fields"] = string_fields def addMember(self, name, value): self.__dict__["struct_members"][name] = value def __getattr__(self, item): try: return self.__dict__["struct_members"][item] except: pass print("no %s" %item) print(self.__dict__["struct_members"]) raise AttributeError def __setattr__(self, item, value): if item in self.__dict__["struct_members"]: self.__dict__["struct_members"][item] = value else: raise AttributeError def pack(self): extra_data = "" values = [] string_fields = self.__dict__["string_fields"] struct_members = self.__dict__["struct_members"] format = self.__dict__["format"] for (name,_) in format: if name in string_fields: extra_data = extra_data + struct_members[name] else: values.append(struct_members[name]); return struct.pack(format_struct(format)[0], *values) + extra_data ENDSIG = 0x06054b50 def assert_true(cond, msg): if not cond: raise Exception(msg) exit(1) class BinaryBlob: def __init__(self, f): self.data = open(f, "rb").read() self.offset = 0 self.length = len(self.data) def readAt(self, pos, length): self.offset = pos + length return self.data[pos:self.offset] def read_struct (self, format, offset = None): if offset == None: offset = self.offset (fstr, string_fields) = format_struct(format) size = struct.calcsize(fstr) data = self.readAt(offset, size) ret = struct.unpack(fstr, data) retstruct = MyStruct(format, string_fields) i = 0 for (name,_) in iter(format): member_desc = None if not name in string_fields: member_data = ret[i] i = i + 1 else: # zip has data fields which are described by other struct fields, this does # additional reads to fill em in member_desc = string_fields[name] member_data = self.readAt(self.offset, retstruct.__getattr__(member_desc)) retstruct.addMember(name, member_data) # sanity check serialization code data = self.readAt(offset, self.offset - offset) out_data = retstruct.pack() assert_true(out_data == data, "Serialization fail %d !=%d"% (len(out_data), len(data))) return retstruct def optimizejar(jar, outjar, inlog = None): if inlog is not None: inlog = open(inlog).read().rstrip() # in the case of an empty log still move the index forward if len(inlog) == 0: inlog = [] else: inlog = inlog.split("\n") outlog = [] jarblob = BinaryBlob(jar) dirend = jarblob.read_struct(cdir_end, jarblob.length - size_of(cdir_end)) assert_true(dirend.signature == ENDSIG, "no signature in the end"); cdir_offset = dirend.cdir_offset readahead = 0 if inlog is None and cdir_offset == 4: readahead = struct.unpack("= old_entry_offset + len(data): outlog.append(entry.filename) reordered_count += 1 if inlog is None: dirend.cdir_offset = out_offset if dups_found > 0: print("WARNING: Found %d duplicate files taking %d bytes"%(dups_found, dupe_bytes)) dirend.cdir_size = len(cdir_data) dirend.disk_entries = dirend.cdir_entries dirend_data = dirend.pack() assert_true(size_of(cdir_end) == len(dirend_data), "Failed to serialize directory end correctly. Serialized size;%d, expected:%d"%(len(dirend_data), size_of(cdir_end))); outfd.seek(dirend.cdir_offset) outfd.write(cdir_data) outfd.write(dirend_data) # for ordered jars the central directory is written in the begining of the file, so a second central-directory # entry has to be written in the end of the file if inlog is not None: outfd.seek(0) outfd.write(struct.pack("