namespace System.IO.Compression { using System; using System.Diagnostics; internal class FastEncoderWindow { private byte[] window; // complete bytes window private int bufPos; // the start index of uncompressed bytes private int bufEnd; // the end index of uncompressed bytes // Be very careful about increasing the window size; the code tables will have to // be updated, since they assume that extra_distance_bits is never larger than a // certain size. const int FastEncoderHashShift = 4; const int FastEncoderHashtableSize = 2048; const int FastEncoderHashMask = FastEncoderHashtableSize-1; const int FastEncoderWindowSize = 8192; const int FastEncoderWindowMask = FastEncoderWindowSize - 1; const int FastEncoderMatch3DistThreshold = 16384; internal const int MaxMatch = 258; internal const int MinMatch = 3; // Following constants affect the search, // they should be modifiable if we support different compression levels in future. const int SearchDepth = 32; const int GoodLength = 4; const int NiceLength = 32; const int LazyMatchThreshold = 6; // Hashtable structure private ushort[] prev; // next most recent occurance of chars with same hash value private ushort[] lookup; // hash table to find most recent occurance of chars with same hash value public FastEncoderWindow() { ResetWindow(); } public int BytesAvailable { // uncompressed bytes get { Debug.Assert(bufEnd - bufPos >= 0, "Ending pointer can't be in front of starting pointer!"); return bufEnd - bufPos; } } public DeflateInput UnprocessedInput { get { DeflateInput input = new DeflateInput(); input.Buffer = window; input.StartIndex = bufPos; input.Count = bufEnd - bufPos; return input; } } public void FlushWindow() { ResetWindow(); } private void ResetWindow() { window = new byte[2 * FastEncoderWindowSize + MaxMatch + 4]; prev = new ushort[FastEncoderWindowSize + MaxMatch]; lookup = new ushort[FastEncoderHashtableSize]; bufPos = FastEncoderWindowSize; bufEnd = bufPos; } public int FreeWindowSpace { // Free space in the window get { return 2 * FastEncoderWindowSize - bufEnd; } } // copy bytes from input buffer into window public void CopyBytes(byte[] inputBuffer, int startIndex, int count) { Array.Copy(inputBuffer, startIndex, window, bufEnd, count); bufEnd += count; } // slide the history window to the left by FastEncoderWindowSize bytes public void MoveWindows() { int i; Debug.Assert(bufPos == 2*FastEncoderWindowSize, "only call this at the end of the window"); // verify that the hash table is correct VerifyHashes(); // Debug only code Array.Copy(window, bufPos - FastEncoderWindowSize, window, 0, FastEncoderWindowSize); // move all the hash pointers back for (i = 0; i < FastEncoderHashtableSize; i++) { int val = ((int) lookup[i]) - FastEncoderWindowSize; if (val <= 0) { // too far away now? then set to zero lookup[i] = (ushort) 0; } else { lookup[i] = (ushort) val; } } // prev[]'s are absolute pointers, not relative pointers, so we have to move them back too // making prev[]'s into relative pointers poses problems of its own for (i = 0; i < FastEncoderWindowSize; i++) { long val = ((long) prev[i]) - FastEncoderWindowSize; if (val <= 0) { prev[i] = (ushort) 0; } else { prev[i] = (ushort) val; } } #if DEBUG // For debugging, wipe the window clean, so that if there is a bug in our hashing, // the hash pointers will now point to locations which are not valid for the hash value // (and will be caught by our ASSERTs). Array.Clear(window, FastEncoderWindowSize, window.Length - FastEncoderWindowSize); #endif VerifyHashes(); // debug: verify hash table is correct bufPos = FastEncoderWindowSize; bufEnd = bufPos; } private uint HashValue(uint hash, byte b) { return(hash << FastEncoderHashShift) ^ b; } // insert string into hash table and return most recent location of same hash value private uint InsertString(ref uint hash) { // Note we only use the lowest 11 bits of the hash vallue (hash table size is 11). // This enables fast calculation of hash value for the input string. // If we want to get the next hash code starting at next position, // we can just increment bufPos and call this function. hash = HashValue( hash, window[bufPos+2] ); // Need to assert the hash value uint search = lookup[hash & FastEncoderHashMask]; lookup[hash & FastEncoderHashMask] = (ushort) bufPos; prev[bufPos & FastEncoderWindowMask] = (ushort) search; return search; } // // insert strings into hashtable // Arguments: // hash : intial hash value // matchLen : 1 + number of strings we need to insert. // private void InsertStrings(ref uint hash, int matchLen) { Debug.Assert(matchLen > 0, "Invalid match Len!"); if (bufEnd - bufPos <= matchLen) { bufPos += (matchLen-1); } else { while (--matchLen > 0) { InsertString(ref hash); bufPos++; } } } // // Find out what we should generate next. It can be a symbol, a distance/length pair // or a symbol followed by distance/length pair // internal bool GetNextSymbolOrMatch(Match match) { Debug.Assert(bufPos >= FastEncoderWindowSize && bufPos < (2*FastEncoderWindowSize), "Invalid Buffer Position!"); // initialise the value of the hash, no problem if locations bufPos, bufPos+1 // are invalid (not enough data), since we will never insert using that hash value uint hash = HashValue( 0 , window[bufPos]); hash = HashValue( hash , window[bufPos + 1]); int matchLen; int matchPos = 0; VerifyHashes(); // Debug only code if (bufEnd - bufPos <= 3) { // The hash value becomes corrupt when we get within 3 characters of the end of the // input window, since the hash value is based on 3 characters. We just stop // inserting into the hash table at this point, and allow no matches. matchLen = 0; } else { // insert string into hash table and return most recent location of same hash value int search = (int)InsertString(ref hash); // did we find a recent location of this hash value? if (search != 0) { // yes, now find a match at what we'll call position X matchLen = FindMatch(search, out matchPos, SearchDepth, NiceLength); // truncate match if we're too close to the end of the input window if (bufPos + matchLen > bufEnd) matchLen = bufEnd - bufPos; } else { // no most recent location found matchLen = 0; } } if (matchLen < MinMatch) { // didn't find a match, so output unmatched char match.State = MatchState.HasSymbol; match.Symbol = window[bufPos]; bufPos++; } else { // bufPos now points to X+1 bufPos++; // is this match so good (long) that we should take it automatically without // checking X+1 ? if (matchLen <= LazyMatchThreshold) { int nextMatchLen; int nextMatchPos = 0; // search at position X+1 int search = (int)InsertString(ref hash); // no, so check for a better match at X+1 if (search != 0) { nextMatchLen = FindMatch(search, out nextMatchPos, matchLen < GoodLength ? SearchDepth : (SearchDepth >> 2),NiceLength); // truncate match if we're too close to the end of the window // note: nextMatchLen could now be < MinMatch if (bufPos + nextMatchLen > bufEnd) { nextMatchLen = bufEnd - bufPos; } } else { nextMatchLen = 0; } // right now X and X+1 are both inserted into the search tree if (nextMatchLen > matchLen) { // since nextMatchLen > matchLen, it can't be < MinMatch here // match at X+1 is better, so output unmatched char at X match.State = MatchState.HasSymbolAndMatch; match.Symbol = window[bufPos-1]; match.Position = nextMatchPos; match.Length = nextMatchLen; // insert remainder of second match into search tree // example: (*=inserted already) // // X X+1 X+2 X+3 X+4 // * * // nextmatchlen=3 // bufPos // // If nextMatchLen == 3, we want to perform 2 // insertions (at X+2 and X+3). However, first we must // inc bufPos. // bufPos++; // now points to X+2 matchLen = nextMatchLen; InsertStrings(ref hash, matchLen); } else { // match at X is better, so take it match.State = MatchState.HasMatch; match.Position = matchPos; match.Length = matchLen; // Insert remainder of first match into search tree, minus the first // two locations, which were inserted by the FindMatch() calls. // // For example, if matchLen == 3, then we've inserted at X and X+1 // already (and bufPos is now pointing at X+1), and now we need to insert // only at X+2. // matchLen--; bufPos++; // now bufPos points to X+2 InsertStrings(ref hash, matchLen); } } else { // match_length >= good_match // in assertion: bufPos points to X+1, location X inserted already // first match is so good that we're not even going to check at X+1 match.State = MatchState.HasMatch; match.Position = matchPos; match.Length = matchLen; // insert remainder of match at X into search tree InsertStrings(ref hash, matchLen); } } if (bufPos == 2*FastEncoderWindowSize) { MoveWindows(); } return true; } // // Find a match starting at specified position and return length of match // Arguments: // search : where to start searching // matchPos : return match position here // searchDepth : # links to traverse // NiceLength : stop immediately if we find a match >= NiceLength // int FindMatch(int search, out int matchPos, int searchDepth, int niceLength ) { Debug.Assert(bufPos >= 0 && bufPos < 2*FastEncoderWindowSize, "Invalid Buffer position!"); Debug.Assert(search < bufPos, "Invalid starting search point!"); Debug.Assert(RecalculateHash((int)search) == RecalculateHash(bufPos)); int bestMatch = 0; // best match length found so far int bestMatchPos = 0; // absolute match position of best match found // the earliest we can look int earliest = bufPos - FastEncoderWindowSize; Debug.Assert(earliest >= 0, "bufPos is less than FastEncoderWindowSize!"); byte wantChar = window[bufPos]; while (search > earliest) { // make sure all our hash links are valid Debug.Assert(RecalculateHash((int)search) == RecalculateHash(bufPos), "Corrupted hash link!"); // Start by checking the character that would allow us to increase the match // length by one. This improves performance quite a bit. if (window[search + bestMatch] == wantChar) { int j; // Now make sure that all the other characters are correct for (j = 0; j < MaxMatch; j++) { if (window[bufPos+j] != window[search+j]) break; } if (j > bestMatch) { bestMatch = j; bestMatchPos = search; // absolute position if (j > NiceLength) break; wantChar = window[bufPos+j]; } } if (--searchDepth == 0) { break; } Debug.Assert(prev[search & FastEncoderWindowMask] < search, "we should always go backwards!"); search = prev[search & FastEncoderWindowMask]; } // doesn't necessarily mean we found a match; bestMatch could be > 0 and < MinMatch matchPos = bufPos - bestMatchPos - 1; // convert absolute to relative position // don't allow match length 3's which are too far away to be worthwhile if (bestMatch == 3 && matchPos >= FastEncoderMatch3DistThreshold) { return 0; } Debug.Assert(bestMatch < MinMatch || matchPos < FastEncoderWindowSize, "Only find match inside FastEncoderWindowSize"); return bestMatch; } [Conditional("DEBUG")] void VerifyHashes() { for (int i = 0; i < FastEncoderHashtableSize; i++) { ushort where = lookup[i]; ushort nextWhere; while (where != 0 && bufPos - where < FastEncoderWindowSize) { Debug.Assert(RecalculateHash(where) == i, "Incorrect Hashcode!"); nextWhere = prev[where & FastEncoderWindowMask]; if (bufPos - nextWhere >= FastEncoderWindowSize) { break; } Debug.Assert(nextWhere < where, "pointer is messed up!"); where = nextWhere; } } } // can't use conditional attribute here. uint RecalculateHash(int position) { return (uint)(((window[position] << (2*FastEncoderHashShift)) ^ (window[position+1] << FastEncoderHashShift) ^ (window[position+2])) & FastEncoderHashMask); } } }