e79aa3c0ed
Former-commit-id: a2155e9bd80020e49e72e86c44da02a8ac0e57a4
393 lines
16 KiB
C#
393 lines
16 KiB
C#
namespace System.IO.Compression {
|
|
using System;
|
|
using System.Diagnostics;
|
|
|
|
internal class FastEncoderWindow {
|
|
private byte[] window; // complete bytes window
|
|
private int bufPos; // the start index of uncompressed bytes
|
|
private int bufEnd; // the end index of uncompressed bytes
|
|
|
|
// Be very careful about increasing the window size; the code tables will have to
|
|
// be updated, since they assume that extra_distance_bits is never larger than a
|
|
// certain size.
|
|
const int FastEncoderHashShift = 4;
|
|
const int FastEncoderHashtableSize = 2048;
|
|
const int FastEncoderHashMask = FastEncoderHashtableSize-1;
|
|
const int FastEncoderWindowSize = 8192;
|
|
const int FastEncoderWindowMask = FastEncoderWindowSize - 1;
|
|
const int FastEncoderMatch3DistThreshold = 16384;
|
|
internal const int MaxMatch = 258;
|
|
internal const int MinMatch = 3;
|
|
|
|
// Following constants affect the search,
|
|
// they should be modifiable if we support different compression levels in future.
|
|
const int SearchDepth = 32;
|
|
const int GoodLength = 4;
|
|
const int NiceLength = 32;
|
|
const int LazyMatchThreshold = 6;
|
|
|
|
// Hashtable structure
|
|
private ushort[] prev; // next most recent occurance of chars with same hash value
|
|
private ushort[] lookup; // hash table to find most recent occurance of chars with same hash value
|
|
|
|
public FastEncoderWindow() {
|
|
ResetWindow();
|
|
}
|
|
|
|
public int BytesAvailable { // uncompressed bytes
|
|
get {
|
|
Debug.Assert(bufEnd - bufPos >= 0, "Ending pointer can't be in front of starting pointer!");
|
|
return bufEnd - bufPos;
|
|
}
|
|
}
|
|
|
|
public DeflateInput UnprocessedInput {
|
|
get {
|
|
DeflateInput input = new DeflateInput();
|
|
input.Buffer = window;
|
|
input.StartIndex = bufPos;
|
|
input.Count = bufEnd - bufPos;
|
|
return input;
|
|
}
|
|
}
|
|
|
|
public void FlushWindow() {
|
|
ResetWindow();
|
|
}
|
|
|
|
private void ResetWindow() {
|
|
window = new byte[2 * FastEncoderWindowSize + MaxMatch + 4];
|
|
prev = new ushort[FastEncoderWindowSize + MaxMatch];
|
|
lookup = new ushort[FastEncoderHashtableSize];
|
|
bufPos = FastEncoderWindowSize;
|
|
bufEnd = bufPos;
|
|
}
|
|
|
|
public int FreeWindowSpace { // Free space in the window
|
|
get {
|
|
return 2 * FastEncoderWindowSize - bufEnd;
|
|
}
|
|
}
|
|
|
|
// copy bytes from input buffer into window
|
|
public void CopyBytes(byte[] inputBuffer, int startIndex, int count) {
|
|
Array.Copy(inputBuffer, startIndex, window, bufEnd, count);
|
|
bufEnd += count;
|
|
}
|
|
|
|
// slide the history window to the left by FastEncoderWindowSize bytes
|
|
public void MoveWindows() {
|
|
int i;
|
|
Debug.Assert(bufPos == 2*FastEncoderWindowSize, "only call this at the end of the window");
|
|
|
|
// verify that the hash table is correct
|
|
VerifyHashes(); // Debug only code
|
|
|
|
Array.Copy(window, bufPos - FastEncoderWindowSize, window, 0, FastEncoderWindowSize);
|
|
|
|
// move all the hash pointers back
|
|
for (i = 0; i < FastEncoderHashtableSize; i++) {
|
|
int val = ((int) lookup[i]) - FastEncoderWindowSize;
|
|
|
|
if (val <= 0) { // too far away now? then set to zero
|
|
lookup[i] = (ushort) 0;
|
|
} else {
|
|
lookup[i] = (ushort) val;
|
|
}
|
|
}
|
|
|
|
// prev[]'s are absolute pointers, not relative pointers, so we have to move them back too
|
|
// making prev[]'s into relative pointers poses problems of its own
|
|
for (i = 0; i < FastEncoderWindowSize; i++) {
|
|
long val = ((long) prev[i]) - FastEncoderWindowSize;
|
|
|
|
if (val <= 0) {
|
|
prev[i] = (ushort) 0;
|
|
} else {
|
|
prev[i] = (ushort) val;
|
|
}
|
|
}
|
|
|
|
#if DEBUG
|
|
// For debugging, wipe the window clean, so that if there is a bug in our hashing,
|
|
// the hash pointers will now point to locations which are not valid for the hash value
|
|
// (and will be caught by our ASSERTs).
|
|
Array.Clear(window, FastEncoderWindowSize, window.Length - FastEncoderWindowSize);
|
|
#endif
|
|
|
|
VerifyHashes(); // debug: verify hash table is correct
|
|
|
|
bufPos = FastEncoderWindowSize;
|
|
bufEnd = bufPos;
|
|
|
|
}
|
|
|
|
private uint HashValue(uint hash, byte b) {
|
|
return(hash << FastEncoderHashShift) ^ b;
|
|
}
|
|
|
|
// insert string into hash table and return most recent location of same hash value
|
|
private uint InsertString(ref uint hash) {
|
|
// Note we only use the lowest 11 bits of the hash vallue (hash table size is 11).
|
|
// This enables fast calculation of hash value for the input string.
|
|
// If we want to get the next hash code starting at next position,
|
|
// we can just increment bufPos and call this function.
|
|
|
|
hash = HashValue( hash, window[bufPos+2] );
|
|
|
|
// Need to assert the hash value
|
|
uint search = lookup[hash & FastEncoderHashMask];
|
|
lookup[hash & FastEncoderHashMask] = (ushort) bufPos;
|
|
prev[bufPos & FastEncoderWindowMask] = (ushort) search;
|
|
return search;
|
|
}
|
|
|
|
//
|
|
// insert strings into hashtable
|
|
// Arguments:
|
|
// hash : intial hash value
|
|
// matchLen : 1 + number of strings we need to insert.
|
|
//
|
|
private void InsertStrings(ref uint hash, int matchLen) {
|
|
Debug.Assert(matchLen > 0, "Invalid match Len!");
|
|
if (bufEnd - bufPos <= matchLen) {
|
|
bufPos += (matchLen-1);
|
|
}
|
|
else {
|
|
while (--matchLen > 0) {
|
|
InsertString(ref hash);
|
|
bufPos++;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Find out what we should generate next. It can be a symbol, a distance/length pair
|
|
// or a symbol followed by distance/length pair
|
|
//
|
|
internal bool GetNextSymbolOrMatch(Match match) {
|
|
Debug.Assert(bufPos >= FastEncoderWindowSize && bufPos < (2*FastEncoderWindowSize), "Invalid Buffer Position!");
|
|
|
|
// initialise the value of the hash, no problem if locations bufPos, bufPos+1
|
|
// are invalid (not enough data), since we will never insert using that hash value
|
|
uint hash = HashValue( 0 , window[bufPos]);
|
|
hash = HashValue( hash , window[bufPos + 1]);
|
|
|
|
int matchLen;
|
|
int matchPos = 0;
|
|
|
|
VerifyHashes(); // Debug only code
|
|
if (bufEnd - bufPos <= 3) {
|
|
// The hash value becomes corrupt when we get within 3 characters of the end of the
|
|
// input window, since the hash value is based on 3 characters. We just stop
|
|
// inserting into the hash table at this point, and allow no matches.
|
|
matchLen = 0;
|
|
}
|
|
else {
|
|
// insert string into hash table and return most recent location of same hash value
|
|
int search = (int)InsertString(ref hash);
|
|
|
|
// did we find a recent location of this hash value?
|
|
if (search != 0) {
|
|
// yes, now find a match at what we'll call position X
|
|
matchLen = FindMatch(search, out matchPos, SearchDepth, NiceLength);
|
|
|
|
// truncate match if we're too close to the end of the input window
|
|
if (bufPos + matchLen > bufEnd)
|
|
matchLen = bufEnd - bufPos;
|
|
}
|
|
else {
|
|
// no most recent location found
|
|
matchLen = 0;
|
|
}
|
|
}
|
|
|
|
if (matchLen < MinMatch) {
|
|
// didn't find a match, so output unmatched char
|
|
match.State = MatchState.HasSymbol;
|
|
match.Symbol = window[bufPos];
|
|
bufPos++;
|
|
}
|
|
else {
|
|
// bufPos now points to X+1
|
|
bufPos++;
|
|
|
|
// is this match so good (long) that we should take it automatically without
|
|
// checking X+1 ?
|
|
if (matchLen <= LazyMatchThreshold) {
|
|
int nextMatchLen;
|
|
int nextMatchPos = 0;
|
|
|
|
// search at position X+1
|
|
int search = (int)InsertString(ref hash);
|
|
|
|
// no, so check for a better match at X+1
|
|
if (search != 0) {
|
|
nextMatchLen = FindMatch(search, out nextMatchPos,
|
|
matchLen < GoodLength ? SearchDepth : (SearchDepth >> 2),NiceLength);
|
|
|
|
// truncate match if we're too close to the end of the window
|
|
// note: nextMatchLen could now be < MinMatch
|
|
if (bufPos + nextMatchLen > bufEnd) {
|
|
nextMatchLen = bufEnd - bufPos;
|
|
}
|
|
} else {
|
|
nextMatchLen = 0;
|
|
}
|
|
|
|
// right now X and X+1 are both inserted into the search tree
|
|
if (nextMatchLen > matchLen) {
|
|
// since nextMatchLen > matchLen, it can't be < MinMatch here
|
|
|
|
// match at X+1 is better, so output unmatched char at X
|
|
match.State = MatchState.HasSymbolAndMatch;
|
|
match.Symbol = window[bufPos-1];
|
|
match.Position = nextMatchPos;
|
|
match.Length = nextMatchLen;
|
|
|
|
// insert remainder of second match into search tree
|
|
// example: (*=inserted already)
|
|
//
|
|
// X X+1 X+2 X+3 X+4
|
|
// * *
|
|
// nextmatchlen=3
|
|
// bufPos
|
|
//
|
|
// If nextMatchLen == 3, we want to perform 2
|
|
// insertions (at X+2 and X+3). However, first we must
|
|
// inc bufPos.
|
|
//
|
|
bufPos++; // now points to X+2
|
|
matchLen = nextMatchLen;
|
|
InsertStrings(ref hash, matchLen);
|
|
} else {
|
|
// match at X is better, so take it
|
|
match.State = MatchState.HasMatch;
|
|
match.Position = matchPos;
|
|
match.Length = matchLen;
|
|
|
|
// Insert remainder of first match into search tree, minus the first
|
|
// two locations, which were inserted by the FindMatch() calls.
|
|
//
|
|
// For example, if matchLen == 3, then we've inserted at X and X+1
|
|
// already (and bufPos is now pointing at X+1), and now we need to insert
|
|
// only at X+2.
|
|
//
|
|
matchLen--;
|
|
bufPos++; // now bufPos points to X+2
|
|
InsertStrings(ref hash, matchLen);
|
|
}
|
|
} else { // match_length >= good_match
|
|
// in assertion: bufPos points to X+1, location X inserted already
|
|
// first match is so good that we're not even going to check at X+1
|
|
match.State = MatchState.HasMatch;
|
|
match.Position = matchPos;
|
|
match.Length = matchLen;
|
|
|
|
// insert remainder of match at X into search tree
|
|
InsertStrings(ref hash, matchLen);
|
|
}
|
|
}
|
|
|
|
if (bufPos == 2*FastEncoderWindowSize) {
|
|
MoveWindows();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//
|
|
// Find a match starting at specified position and return length of match
|
|
// Arguments:
|
|
// search : where to start searching
|
|
// matchPos : return match position here
|
|
// searchDepth : # links to traverse
|
|
// NiceLength : stop immediately if we find a match >= NiceLength
|
|
//
|
|
int FindMatch(int search, out int matchPos, int searchDepth, int niceLength ) {
|
|
Debug.Assert(bufPos >= 0 && bufPos < 2*FastEncoderWindowSize, "Invalid Buffer position!");
|
|
Debug.Assert(search < bufPos, "Invalid starting search point!");
|
|
Debug.Assert(RecalculateHash((int)search) == RecalculateHash(bufPos));
|
|
|
|
int bestMatch = 0; // best match length found so far
|
|
int bestMatchPos = 0; // absolute match position of best match found
|
|
|
|
// the earliest we can look
|
|
int earliest = bufPos - FastEncoderWindowSize;
|
|
Debug.Assert(earliest >= 0, "bufPos is less than FastEncoderWindowSize!");
|
|
|
|
byte wantChar = window[bufPos];
|
|
while (search > earliest) {
|
|
// make sure all our hash links are valid
|
|
Debug.Assert(RecalculateHash((int)search) == RecalculateHash(bufPos), "Corrupted hash link!");
|
|
|
|
// Start by checking the character that would allow us to increase the match
|
|
// length by one. This improves performance quite a bit.
|
|
if (window[search + bestMatch] == wantChar) {
|
|
int j;
|
|
|
|
// Now make sure that all the other characters are correct
|
|
for (j = 0; j < MaxMatch; j++) {
|
|
if (window[bufPos+j] != window[search+j])
|
|
break;
|
|
}
|
|
|
|
if (j > bestMatch) {
|
|
bestMatch = j;
|
|
bestMatchPos = search; // absolute position
|
|
if (j > NiceLength) break;
|
|
wantChar = window[bufPos+j];
|
|
}
|
|
}
|
|
|
|
if (--searchDepth == 0) {
|
|
break;
|
|
}
|
|
|
|
Debug.Assert(prev[search & FastEncoderWindowMask] < search, "we should always go backwards!");
|
|
|
|
search = prev[search & FastEncoderWindowMask];
|
|
}
|
|
|
|
// doesn't necessarily mean we found a match; bestMatch could be > 0 and < MinMatch
|
|
matchPos = bufPos - bestMatchPos - 1; // convert absolute to relative position
|
|
|
|
// don't allow match length 3's which are too far away to be worthwhile
|
|
if (bestMatch == 3 && matchPos >= FastEncoderMatch3DistThreshold) {
|
|
return 0;
|
|
}
|
|
|
|
Debug.Assert(bestMatch < MinMatch || matchPos < FastEncoderWindowSize, "Only find match inside FastEncoderWindowSize");
|
|
return bestMatch;
|
|
}
|
|
|
|
|
|
[Conditional("DEBUG")]
|
|
void VerifyHashes() {
|
|
for (int i = 0; i < FastEncoderHashtableSize; i++) {
|
|
ushort where = lookup[i];
|
|
ushort nextWhere;
|
|
|
|
while (where != 0 && bufPos - where < FastEncoderWindowSize) {
|
|
Debug.Assert(RecalculateHash(where) == i, "Incorrect Hashcode!");
|
|
nextWhere = prev[where & FastEncoderWindowMask];
|
|
if (bufPos - nextWhere >= FastEncoderWindowSize) {
|
|
break;
|
|
}
|
|
|
|
Debug.Assert(nextWhere < where, "pointer is messed up!");
|
|
where = nextWhere;
|
|
}
|
|
}
|
|
}
|
|
|
|
// can't use conditional attribute here.
|
|
uint RecalculateHash(int position) {
|
|
return (uint)(((window[position] << (2*FastEncoderHashShift)) ^
|
|
(window[position+1] << FastEncoderHashShift) ^
|
|
(window[position+2])) & FastEncoderHashMask);
|
|
}
|
|
}
|
|
}
|
|
|
|
|