Bug 847479 - Add smart filters borrowed from xz-utils to improve SeekableZStream compression rate. r=nfroyd

This commit is contained in:
Mike Hommey 2013-03-06 07:29:59 +01:00
parent 60e28c920b
commit d471ff050e
4 changed files with 167 additions and 7 deletions

View File

@ -35,3 +35,11 @@ CPPSRCS += \
$(NULL) $(NULL)
include $(topsrcdir)/config/rules.mk include $(topsrcdir)/config/rules.mk
ifeq (arm,$(TARGET_CPU))
ifdef MOZ_THUMB2
HOST_CXXFLAGS += -DTARGET_THUMB
else
HOST_CXXFLAGS += -DTARGET_ARM
endif
endif

View File

@ -29,6 +29,7 @@ SeekableZStream::Init(const void *buf, size_t length)
lastChunkSize = header->lastChunkSize; lastChunkSize = header->lastChunkSize;
windowBits = header->windowBits; windowBits = header->windowBits;
offsetTable.Init(&header[1], header->nChunks); offsetTable.Init(&header[1], header->nChunks);
filter = GetFilter(header->filter);
/* Sanity check */ /* Sanity check */
if ((chunkSize == 0) || if ((chunkSize == 0) ||
@ -99,5 +100,80 @@ SeekableZStream::DecompressChunk(void *where, size_t chunk, size_t length)
log("inflateEnd failed: %s", zStream.msg); log("inflateEnd failed: %s", zStream.msg);
return false; return false;
} }
if (filter)
filter(chunk * chunkSize, UNFILTER, (unsigned char *)where, chunkLen);
return true; return true;
} }
/* Branch/Call/Jump conversion filter for Thumb, derived from xz-utils
* by Igor Pavlov and Lasse Collin, published in the public domain */
static void
BCJ_Thumb_filter(off_t offset, SeekableZStream::FilterDirection dir,
unsigned char *buf, size_t size)
{
size_t i;
for (i = 0; i + 4 <= size; i += 2) {
if ((buf[i + 1] & 0xf8) == 0xf0 && (buf[i + 3] & 0xf8) == 0xf8) {
uint32_t src = (buf[i] << 11)
| ((buf[i + 1] & 0x07) << 19)
| buf[i + 2]
| ((buf[i + 3] & 0x07) << 8);
src <<= 1;
uint32_t dest;
if (dir == SeekableZStream::FILTER)
dest = offset + (uint32_t)(i) + 4 + src;
else
dest = src - (offset + (uint32_t)(i) + 4);
dest >>= 1;
buf[i] = dest >> 11;
buf[i + 1] = 0xf0 | ((dest >> 19) & 0x07);
buf[i + 2] = dest;
buf[i + 3] = 0xf8 | ((dest >> 8) & 0x07);
i += 2;
}
}
}
/* Branch/Call/Jump conversion filter for ARM, derived from xz-utils
* by Igor Pavlov and Lasse Collin, published in the public domain */
static void
BCJ_ARM_filter(off_t offset, SeekableZStream::FilterDirection dir,
unsigned char *buf, size_t size)
{
size_t i;
for (i = 0; i + 4 <= size; i += 4) {
if (buf[i + 3] == 0xeb) {
uint32_t src = buf[i]
| (buf[i + 1] << 8)
| (buf[i + 2] << 16);
src <<= 2;
uint32_t dest;
if (dir == SeekableZStream::FILTER)
dest = offset + (uint32_t)(i) + 8 + src;
else
dest = src - (offset + (uint32_t)(i) + 8);
dest >>= 2;
buf[i] = dest;
buf[i + 1] = dest >> 8;
buf[i + 2] = dest >> 16;
}
}
}
SeekableZStream::ZStreamFilter
SeekableZStream::GetFilter(SeekableZStream::FilterId id)
{
switch (id) {
case BCJ_THUMB:
return BCJ_Thumb_filter;
case BCJ_ARM:
return BCJ_ARM_filter;
default:
return NULL;
}
return NULL;
}

View File

@ -22,7 +22,8 @@ struct SeekableZStreamHeader: public Zip::SignedEntity<SeekableZStreamHeader>
{ {
SeekableZStreamHeader() SeekableZStreamHeader()
: Zip::SignedEntity<SeekableZStreamHeader>(magic) : Zip::SignedEntity<SeekableZStreamHeader>(magic)
, totalSize(0), chunkSize(0), nChunks(0), lastChunkSize(0), windowBits(0) { } , totalSize(0), chunkSize(0), nChunks(0), lastChunkSize(0), windowBits(0)
, filter(0) { }
/* Reuse Zip::SignedEntity to handle the magic number used in the Seekable /* Reuse Zip::SignedEntity to handle the magic number used in the Seekable
* ZStream file format. The magic number is "SeZz". */ * ZStream file format. The magic number is "SeZz". */
@ -43,8 +44,8 @@ struct SeekableZStreamHeader: public Zip::SignedEntity<SeekableZStreamHeader>
/* windowBits value used when deflating */ /* windowBits value used when deflating */
signed char windowBits; signed char windowBits;
/* Padding */ /* Filter Id */
unsigned char unused; unsigned char filter;
/* Maximum supported size for chunkSize */ /* Maximum supported size for chunkSize */
/* Can't use std::min here because it's not constexpr */ /* Can't use std::min here because it's not constexpr */
@ -94,6 +95,28 @@ public:
return offsetTable.numElements(); return offsetTable.numElements();
} }
/**
* Filters used to improve compression rate.
*/
enum FilterDirection {
FILTER,
UNFILTER
};
typedef void (*ZStreamFilter)(off_t, FilterDirection,
unsigned char *, size_t);
enum FilterId {
NONE,
BCJ_THUMB,
BCJ_ARM,
FILTER_MAX
};
static ZStreamFilter GetFilter(FilterId id);
static ZStreamFilter GetFilter(uint16_t id) {
return GetFilter(static_cast<FilterId>(id));
}
private: private:
/* RAW Seekable SZtream buffer */ /* RAW Seekable SZtream buffer */
const unsigned char *buffer; const unsigned char *buffer;
@ -112,6 +135,9 @@ private:
/* Offsets table */ /* Offsets table */
Array<le_uint32> offsetTable; Array<le_uint32> offsetTable;
/* Filter */
ZStreamFilter filter;
}; };
#endif /* SeekableZStream_h */ #endif /* SeekableZStream_h */

View File

@ -80,18 +80,31 @@ public:
const char *outName, Buffer &outBuf); const char *outName, Buffer &outBuf);
}; };
class SzipCompress: public SzipAction class SzipCompress: public SzipAction
{ {
public: public:
int run(const char *name, Buffer &origBuf, int run(const char *name, Buffer &origBuf,
const char *outName, Buffer &outBuf); const char *outName, Buffer &outBuf);
SzipCompress(size_t aChunkSize) SzipCompress(size_t aChunkSize, SeekableZStream::FilterId aFilter)
: chunkSize(aChunkSize ? aChunkSize : 16384) : chunkSize(aChunkSize ? aChunkSize : 16384)
, filter(aFilter == SeekableZStream::FILTER_MAX ? DEFAULT_FILTER : aFilter)
{} {}
private: private:
const static SeekableZStream::FilterId DEFAULT_FILTER =
#if defined(TARGET_THUMB)
SeekableZStream::BCJ_THUMB;
#elif defined(TARGET_ARM)
SeekableZStream::BCJ_ARM;
#else
SeekableZStream::NONE;
#endif
size_t chunkSize; size_t chunkSize;
SeekableZStream::FilterId filter;
}; };
/* Decompress a seekable compressed stream */ /* Decompress a seekable compressed stream */
@ -153,6 +166,7 @@ int SzipCompress::run(const char *name, Buffer &origBuf,
header->chunkSize = chunkSize; header->chunkSize = chunkSize;
header->totalSize = offset; header->totalSize = offset;
header->windowBits = -15; // Raw stream, window size of 32k. header->windowBits = -15; // Raw stream, window size of 32k.
header->filter = filter;
/* Initialize zlib structure */ /* Initialize zlib structure */
z_stream zStream; z_stream zStream;
@ -160,7 +174,29 @@ int SzipCompress::run(const char *name, Buffer &origBuf,
zStream.avail_out = origSize - offset; zStream.avail_out = origSize - offset;
zStream.next_out = static_cast<Bytef*>(outBuf) + offset; zStream.next_out = static_cast<Bytef*>(outBuf) + offset;
Bytef *origData = static_cast<Bytef*>(origBuf); /* Filter buffer */
SeekableZStream::ZStreamFilter filter =
SeekableZStream::GetFilter(header->filter);
Buffer filteredData;
Bytef *origData;
if (filter) {
filteredData.Resize(origSize);
origData = filteredData;
memcpy(origData, origBuf, origSize);
size_t size = origSize;
Bytef *data = origData;
size_t avail = 0;
/* Filter needs to be applied in chunks. */
while (size) {
avail = std::min(size, chunkSize);
filter(data - origData, SeekableZStream::FILTER, data, avail);
size -= avail;
data += avail;
}
} else {
origData = origBuf;
}
size_t avail = 0; size_t avail = 0;
size_t size = origSize; size_t size = origSize;
while (size) { while (size) {
@ -233,6 +269,7 @@ int main(int argc, char* argv[])
char **firstArg; char **firstArg;
bool compress = true; bool compress = true;
size_t chunkSize = 0; size_t chunkSize = 0;
SeekableZStream::FilterId filter = SeekableZStream::FILTER_MAX;
for (firstArg = &argv[1]; argc > 3; argc--, firstArg++) { for (firstArg = &argv[1]; argc > 3; argc--, firstArg++) {
if (!firstArg[0] || firstArg[0][0] != '-') if (!firstArg[0] || firstArg[0][0] != '-')
@ -250,17 +287,30 @@ int main(int argc, char* argv[])
log("Invalid chunk size"); log("Invalid chunk size");
return 1; return 1;
} }
} else if (strcmp(firstArg[0], "-f") == 0) {
firstArg++;
argc--;
if (!firstArg[0])
break;
if (strcmp(firstArg[0], "arm") == 0)
filter = SeekableZStream::BCJ_ARM;
else if (strcmp(firstArg[0], "thumb") == 0)
filter = SeekableZStream::BCJ_THUMB;
else {
log("Invalid filter");
return 1;
}
} }
} }
if (argc != 3 || !firstArg[0] || !firstArg[1] || if (argc != 3 || !firstArg[0] || !firstArg[1] ||
(strcmp(firstArg[0], firstArg[1]) == 0)) { (strcmp(firstArg[0], firstArg[1]) == 0)) {
log("usage: %s [-d] [-c CHUNKSIZE] in_file out_file", argv[0]); log("usage: %s [-d] [-c CHUNKSIZE] [-f FILTER] in_file out_file", argv[0]);
return 1; return 1;
} }
if (compress) { if (compress) {
action = new SzipCompress(chunkSize); action = new SzipCompress(chunkSize, filter);
} else { } else {
if (chunkSize) { if (chunkSize) {
log("-c is incompatible with -d"); log("-c is incompatible with -d");