393 lines
16 KiB
C#
Raw Normal View History

namespace System.IO.Compression {
using System;
using System.Diagnostics;
internal class FastEncoderWindow {
private byte[] window; // complete bytes window
private int bufPos; // the start index of uncompressed bytes
private int bufEnd; // the end index of uncompressed bytes
// Be very careful about increasing the window size; the code tables will have to
// be updated, since they assume that extra_distance_bits is never larger than a
// certain size.
const int FastEncoderHashShift = 4;
const int FastEncoderHashtableSize = 2048;
const int FastEncoderHashMask = FastEncoderHashtableSize-1;
const int FastEncoderWindowSize = 8192;
const int FastEncoderWindowMask = FastEncoderWindowSize - 1;
const int FastEncoderMatch3DistThreshold = 16384;
internal const int MaxMatch = 258;
internal const int MinMatch = 3;
// Following constants affect the search,
// they should be modifiable if we support different compression levels in future.
const int SearchDepth = 32;
const int GoodLength = 4;
const int NiceLength = 32;
const int LazyMatchThreshold = 6;
// Hashtable structure
private ushort[] prev; // next most recent occurance of chars with same hash value
private ushort[] lookup; // hash table to find most recent occurance of chars with same hash value
public FastEncoderWindow() {
ResetWindow();
}
public int BytesAvailable { // uncompressed bytes
get {
Debug.Assert(bufEnd - bufPos >= 0, "Ending pointer can't be in front of starting pointer!");
return bufEnd - bufPos;
}
}
public DeflateInput UnprocessedInput {
get {
DeflateInput input = new DeflateInput();
input.Buffer = window;
input.StartIndex = bufPos;
input.Count = bufEnd - bufPos;
return input;
}
}
public void FlushWindow() {
ResetWindow();
}
private void ResetWindow() {
window = new byte[2 * FastEncoderWindowSize + MaxMatch + 4];
prev = new ushort[FastEncoderWindowSize + MaxMatch];
lookup = new ushort[FastEncoderHashtableSize];
bufPos = FastEncoderWindowSize;
bufEnd = bufPos;
}
public int FreeWindowSpace { // Free space in the window
get {
return 2 * FastEncoderWindowSize - bufEnd;
}
}
// copy bytes from input buffer into window
public void CopyBytes(byte[] inputBuffer, int startIndex, int count) {
Array.Copy(inputBuffer, startIndex, window, bufEnd, count);
bufEnd += count;
}
// slide the history window to the left by FastEncoderWindowSize bytes
public void MoveWindows() {
int i;
Debug.Assert(bufPos == 2*FastEncoderWindowSize, "only call this at the end of the window");
// verify that the hash table is correct
VerifyHashes(); // Debug only code
Array.Copy(window, bufPos - FastEncoderWindowSize, window, 0, FastEncoderWindowSize);
// move all the hash pointers back
for (i = 0; i < FastEncoderHashtableSize; i++) {
int val = ((int) lookup[i]) - FastEncoderWindowSize;
if (val <= 0) { // too far away now? then set to zero
lookup[i] = (ushort) 0;
} else {
lookup[i] = (ushort) val;
}
}
// prev[]'s are absolute pointers, not relative pointers, so we have to move them back too
// making prev[]'s into relative pointers poses problems of its own
for (i = 0; i < FastEncoderWindowSize; i++) {
long val = ((long) prev[i]) - FastEncoderWindowSize;
if (val <= 0) {
prev[i] = (ushort) 0;
} else {
prev[i] = (ushort) val;
}
}
#if DEBUG
// For debugging, wipe the window clean, so that if there is a bug in our hashing,
// the hash pointers will now point to locations which are not valid for the hash value
// (and will be caught by our ASSERTs).
Array.Clear(window, FastEncoderWindowSize, window.Length - FastEncoderWindowSize);
#endif
VerifyHashes(); // debug: verify hash table is correct
bufPos = FastEncoderWindowSize;
bufEnd = bufPos;
}
private uint HashValue(uint hash, byte b) {
return(hash << FastEncoderHashShift) ^ b;
}
// insert string into hash table and return most recent location of same hash value
private uint InsertString(ref uint hash) {
// Note we only use the lowest 11 bits of the hash vallue (hash table size is 11).
// This enables fast calculation of hash value for the input string.
// If we want to get the next hash code starting at next position,
// we can just increment bufPos and call this function.
hash = HashValue( hash, window[bufPos+2] );
// Need to assert the hash value
uint search = lookup[hash & FastEncoderHashMask];
lookup[hash & FastEncoderHashMask] = (ushort) bufPos;
prev[bufPos & FastEncoderWindowMask] = (ushort) search;
return search;
}
//
// insert strings into hashtable
// Arguments:
// hash : intial hash value
// matchLen : 1 + number of strings we need to insert.
//
private void InsertStrings(ref uint hash, int matchLen) {
Debug.Assert(matchLen > 0, "Invalid match Len!");
if (bufEnd - bufPos <= matchLen) {
bufPos += (matchLen-1);
}
else {
while (--matchLen > 0) {
InsertString(ref hash);
bufPos++;
}
}
}
//
// Find out what we should generate next. It can be a symbol, a distance/length pair
// or a symbol followed by distance/length pair
//
internal bool GetNextSymbolOrMatch(Match match) {
Debug.Assert(bufPos >= FastEncoderWindowSize && bufPos < (2*FastEncoderWindowSize), "Invalid Buffer Position!");
// initialise the value of the hash, no problem if locations bufPos, bufPos+1
// are invalid (not enough data), since we will never insert using that hash value
uint hash = HashValue( 0 , window[bufPos]);
hash = HashValue( hash , window[bufPos + 1]);
int matchLen;
int matchPos = 0;
VerifyHashes(); // Debug only code
if (bufEnd - bufPos <= 3) {
// The hash value becomes corrupt when we get within 3 characters of the end of the
// input window, since the hash value is based on 3 characters. We just stop
// inserting into the hash table at this point, and allow no matches.
matchLen = 0;
}
else {
// insert string into hash table and return most recent location of same hash value
int search = (int)InsertString(ref hash);
// did we find a recent location of this hash value?
if (search != 0) {
// yes, now find a match at what we'll call position X
matchLen = FindMatch(search, out matchPos, SearchDepth, NiceLength);
// truncate match if we're too close to the end of the input window
if (bufPos + matchLen > bufEnd)
matchLen = bufEnd - bufPos;
}
else {
// no most recent location found
matchLen = 0;
}
}
if (matchLen < MinMatch) {
// didn't find a match, so output unmatched char
match.State = MatchState.HasSymbol;
match.Symbol = window[bufPos];
bufPos++;
}
else {
// bufPos now points to X+1
bufPos++;
// is this match so good (long) that we should take it automatically without
// checking X+1 ?
if (matchLen <= LazyMatchThreshold) {
int nextMatchLen;
int nextMatchPos = 0;
// search at position X+1
int search = (int)InsertString(ref hash);
// no, so check for a better match at X+1
if (search != 0) {
nextMatchLen = FindMatch(search, out nextMatchPos,
matchLen < GoodLength ? SearchDepth : (SearchDepth >> 2),NiceLength);
// truncate match if we're too close to the end of the window
// note: nextMatchLen could now be < MinMatch
if (bufPos + nextMatchLen > bufEnd) {
nextMatchLen = bufEnd - bufPos;
}
} else {
nextMatchLen = 0;
}
// right now X and X+1 are both inserted into the search tree
if (nextMatchLen > matchLen) {
// since nextMatchLen > matchLen, it can't be < MinMatch here
// match at X+1 is better, so output unmatched char at X
match.State = MatchState.HasSymbolAndMatch;
match.Symbol = window[bufPos-1];
match.Position = nextMatchPos;
match.Length = nextMatchLen;
// insert remainder of second match into search tree
// example: (*=inserted already)
//
// X X+1 X+2 X+3 X+4
// * *
// nextmatchlen=3
// bufPos
//
// If nextMatchLen == 3, we want to perform 2
// insertions (at X+2 and X+3). However, first we must
// inc bufPos.
//
bufPos++; // now points to X+2
matchLen = nextMatchLen;
InsertStrings(ref hash, matchLen);
} else {
// match at X is better, so take it
match.State = MatchState.HasMatch;
match.Position = matchPos;
match.Length = matchLen;
// Insert remainder of first match into search tree, minus the first
// two locations, which were inserted by the FindMatch() calls.
//
// For example, if matchLen == 3, then we've inserted at X and X+1
// already (and bufPos is now pointing at X+1), and now we need to insert
// only at X+2.
//
matchLen--;
bufPos++; // now bufPos points to X+2
InsertStrings(ref hash, matchLen);
}
} else { // match_length >= good_match
// in assertion: bufPos points to X+1, location X inserted already
// first match is so good that we're not even going to check at X+1
match.State = MatchState.HasMatch;
match.Position = matchPos;
match.Length = matchLen;
// insert remainder of match at X into search tree
InsertStrings(ref hash, matchLen);
}
}
if (bufPos == 2*FastEncoderWindowSize) {
MoveWindows();
}
return true;
}
//
// Find a match starting at specified position and return length of match
// Arguments:
// search : where to start searching
// matchPos : return match position here
// searchDepth : # links to traverse
// NiceLength : stop immediately if we find a match >= NiceLength
//
int FindMatch(int search, out int matchPos, int searchDepth, int niceLength ) {
Debug.Assert(bufPos >= 0 && bufPos < 2*FastEncoderWindowSize, "Invalid Buffer position!");
Debug.Assert(search < bufPos, "Invalid starting search point!");
Debug.Assert(RecalculateHash((int)search) == RecalculateHash(bufPos));
int bestMatch = 0; // best match length found so far
int bestMatchPos = 0; // absolute match position of best match found
// the earliest we can look
int earliest = bufPos - FastEncoderWindowSize;
Debug.Assert(earliest >= 0, "bufPos is less than FastEncoderWindowSize!");
byte wantChar = window[bufPos];
while (search > earliest) {
// make sure all our hash links are valid
Debug.Assert(RecalculateHash((int)search) == RecalculateHash(bufPos), "Corrupted hash link!");
// Start by checking the character that would allow us to increase the match
// length by one. This improves performance quite a bit.
if (window[search + bestMatch] == wantChar) {
int j;
// Now make sure that all the other characters are correct
for (j = 0; j < MaxMatch; j++) {
if (window[bufPos+j] != window[search+j])
break;
}
if (j > bestMatch) {
bestMatch = j;
bestMatchPos = search; // absolute position
if (j > NiceLength) break;
wantChar = window[bufPos+j];
}
}
if (--searchDepth == 0) {
break;
}
Debug.Assert(prev[search & FastEncoderWindowMask] < search, "we should always go backwards!");
search = prev[search & FastEncoderWindowMask];
}
// doesn't necessarily mean we found a match; bestMatch could be > 0 and < MinMatch
matchPos = bufPos - bestMatchPos - 1; // convert absolute to relative position
// don't allow match length 3's which are too far away to be worthwhile
if (bestMatch == 3 && matchPos >= FastEncoderMatch3DistThreshold) {
return 0;
}
Debug.Assert(bestMatch < MinMatch || matchPos < FastEncoderWindowSize, "Only find match inside FastEncoderWindowSize");
return bestMatch;
}
[Conditional("DEBUG")]
void VerifyHashes() {
for (int i = 0; i < FastEncoderHashtableSize; i++) {
ushort where = lookup[i];
ushort nextWhere;
while (where != 0 && bufPos - where < FastEncoderWindowSize) {
Debug.Assert(RecalculateHash(where) == i, "Incorrect Hashcode!");
nextWhere = prev[where & FastEncoderWindowMask];
if (bufPos - nextWhere >= FastEncoderWindowSize) {
break;
}
Debug.Assert(nextWhere < where, "pointer is messed up!");
where = nextWhere;
}
}
}
// can't use conditional attribute here.
uint RecalculateHash(int position) {
return (uint)(((window[position] << (2*FastEncoderHashShift)) ^
(window[position+1] << FastEncoderHashShift) ^
(window[position+2])) & FastEncoderHashMask);
}
}
}