linux-packaging-mono/mcs/class/referencesource/System/sys/system/IO/compression/HuffmanTree.cs

namespace System.IO.Compression
{
    using System;
    using System.Diagnostics;
    
    // Strictly speaking this class is not a HuffmanTree, this class is 
    // a lookup table combined with a HuffmanTree. The idea is to speed up
    // the lookup for short symbols (they should appear more frequently ideally.)
    // However we don't want to create a huge table since it might take longer to 
    // build the table than decoding (Deflate usually generates new tables frequently.)  
    // 
    // Jean-loup Gailly and Mark Adler gave a very good explanation about this.
    // The full text (algorithm.txt) can be found inside 
    // ftp://ftp.uu.net/pub/archiving/zip/zlib/zlib.zip.
    //
    // Following paper explains decoding in details:
    //   Hirschberg and Lelewer, "Efficient decoding of prefix codes,"
    //   Comm. ACM, 33,4, April 1990, pp. 449-459.
    //

    internal class HuffmanTree {
        internal const int   MaxLiteralTreeElements         = 288;
        internal const int   MaxDistTreeElements            =  32;
        internal const int   EndOfBlockCode                 = 256;
        internal const int   NumberOfCodeLengthTreeElements =  19;

        int          tableBits;                              
        short[]      table; 
        short[]      left; 
        short[]      right;
        byte[]       codeLengthArray;
#if DEBUG
        uint[]       codeArrayDebug;
#endif

        int tableMask;         

        // huffman tree for static block
        static HuffmanTree staticLiteralLengthTree;
        static HuffmanTree staticDistanceTree;

        static HuffmanTree() {
            // construct the static literal tree and distance tree
            staticLiteralLengthTree = new HuffmanTree(GetStaticLiteralTreeLength());            
            staticDistanceTree = new HuffmanTree(GetStaticDistanceTreeLength());
        }

        static public HuffmanTree StaticLiteralLengthTree {
            get {
                return staticLiteralLengthTree;
            }
        }

        static public HuffmanTree StaticDistanceTree {
            get {
                return staticDistanceTree;
            }
        }

        public HuffmanTree(byte[] codeLengths) {
            Debug.Assert( codeLengths.Length == MaxLiteralTreeElements 
                          || codeLengths.Length == MaxDistTreeElements 
                          || codeLengths.Length == NumberOfCodeLengthTreeElements,
                          "we only expect three kinds of Length here");
            codeLengthArray = codeLengths;

            if (codeLengthArray.Length == MaxLiteralTreeElements) {  // bits for Literal/Length tree table
                tableBits = 9;
            }
            else {          // bits for distance tree table and code length tree table
                tableBits = 7;
            }
            tableMask = (1 << tableBits) -1;

            CreateTable();
        }


        // Generate the array contains huffman codes lengths for static huffman tree.
        // The data is in RFC 1951.
        static byte[] GetStaticLiteralTreeLength() {
            byte[] literalTreeLength = new byte[MaxLiteralTreeElements];
            for (int i = 0; i <= 143; i++)
                literalTreeLength[i] = 8;

            for (int i = 144; i <= 255; i++)
                literalTreeLength[i] = 9;

            for (int i = 256; i <= 279; i++)
                literalTreeLength[i] = 7;

            for (int i = 280; i <= 287; i++)
                literalTreeLength[i] = 8;

            return literalTreeLength;
        }

        static byte[] GetStaticDistanceTreeLength() {
            byte[]  staticDistanceTreeLength = new byte[MaxDistTreeElements];
            for (int i = 0; i < MaxDistTreeElements; i++) {
                staticDistanceTreeLength[i] = 5;
            }
            return staticDistanceTreeLength;
        }


        // Calculate the huffman code for each character based on the code length for each character.
        // This algorithm is described in standard RFC 1951
        uint[] CalculateHuffmanCode() {
            uint[]  bitLengthCount  = new uint[17];
            foreach( int codeLength in codeLengthArray) {
                bitLengthCount[codeLength]++;
            }
            bitLengthCount[0] = 0;  // clear count for length 0

            uint[] nextCode = new uint[17];
            uint tempCode   = 0;
            for (int bits = 1; bits <= 16; bits++) {
                tempCode = (tempCode + bitLengthCount[bits-1]) << 1;
                nextCode[bits] = tempCode;
            }

            uint[] code = new uint[MaxLiteralTreeElements];
            for (int i = 0; i < codeLengthArray.Length; i++) {
                int len = codeLengthArray[i];

                if (len > 0) {
                    code[i] = FastEncoderStatics.BitReverse(nextCode[len], len);
                    nextCode[len]++;
                }
            }       
            return code;
        }

        private void CreateTable() {

            uint[] codeArray = CalculateHuffmanCode();
            table = new short[ 1 << tableBits];
#if DEBUG            
            codeArrayDebug = codeArray;
#endif
            
            // I need to find proof that left and right array will always be 
            // enough. I think they are.
            left  = new short[2* codeLengthArray.Length];
            right = new short[2* codeLengthArray.Length];
            short avail = (short)codeLengthArray.Length;      

            for (int ch = 0; ch < codeLengthArray.Length; ch++) {
                // length of this code
                int len = codeLengthArray[ch];
                if (len > 0) {
                    // start value (bit reversed)
                    int start = (int)codeArray[ch];

                    if (len <= tableBits) {
                        // If a particular symbol is shorter than nine bits, 
                        // then that symbol's translation is duplicated
                        // in all those entries that start with that symbol's bits.  
                        // For example, if the symbol is four bits, then it's duplicated 
                        // 32 times in a nine-bit table. If a symbol is nine bits long, 
                        // it appears in the table once.
                        // 
                        // Make sure that in the loop below, code is always
                        // less than table_size.
                        //
                        // On last iteration we store at array index:
                        //    initial_start_at + (locs-1)*increment
                        //  = initial_start_at + locs*increment - increment
                        //  = initial_start_at + (1 << tableBits) - increment
                        //  = initial_start_at + table_size - increment
                        //
                        // Therefore we must ensure:
                        //     initial_start_at + table_size - increment < table_size
                        // or: initial_start_at < increment
                        //
                        int increment = 1 << len;
                        if (start >= increment) {
                            throw new InvalidDataException(SR.GetString(SR.InvalidHuffmanData));
                        }

                        // Note the bits in the table are reverted.
                        int locs = 1 << (tableBits - len); 
                        for (int j = 0; j < locs; j++) {
                            table[start] = (short) ch;
                            start += increment;
                        }
                    } else {
                        // For any code which has length longer than num_elements,
                        // build a binary tree.

                        int overflowBits = len - tableBits;    // the nodes we need to respent the data.
                        int codeBitMask = 1 << tableBits;    // mask to get current bit (the bits can't fit in the table)  

                        // the left, right table is used to repesent the
                        // the rest bits. When we got the first part (number bits.) and look at
                        // tbe table, we will need to follow the tree to find the real character.
                        // This is in place to avoid bloating the table if there are
                        // a few ones with long code.
                        int index = start & ((1 << tableBits) -1);
                        short[] array = table;

                        do {
                            short value = array[index];

                            if (value == 0) {         // set up next pointer if this node is not used before.
                                array[index] = (short)-avail;  // use next available slot.
                                value = (short)-avail;
                                avail++;
                            }
                            
                            if (value > 0) {         // prevent an IndexOutOfRangeException from array[index]
                                throw new InvalidDataException(SR.GetString(SR.InvalidHuffmanData));
                            }

                            Debug.Assert( value < 0, "CreateTable: Only negative numbers are used for tree pointers!");

                            if ((start & codeBitMask) == 0) {  // if current bit is 0, go change the left array
                                array  = left;
                            } else {                // if current bit is 1, set value in the right array
                                array = right;
                            }
                            index = -value;         // go to next node

                            codeBitMask <<= 1;
                            overflowBits--;
                        } while (overflowBits != 0);

                        array[index] = (short) ch;
                    }
                }
            }
        }

        //
        // This function will try to get enough bits from input and 
        // try to decode the bits.
        // If there are no enought bits in the input, this function will return -1.
        //
        public int GetNextSymbol(InputBuffer input) {
            // Try to load 16 bits into input buffer if possible and get the bitBuffer value.
            // If there aren't 16 bits available we will return all we have in the 
            // input buffer.
            uint bitBuffer = input.TryLoad16Bits();            
            if( input.AvailableBits == 0) {    // running out of input.
                return -1;
            }
            
            // decode an element 
            int symbol = table[bitBuffer & tableMask]; 
            if( symbol < 0) {       //  this will be the start of the binary tree
                // navigate the tree
                uint mask = (uint)1 << tableBits; 
                do 
                { 
                    symbol = -symbol; 
                    if ((bitBuffer & mask) == 0) 
                        symbol = left[symbol]; 
                    else 
                        symbol = right[symbol]; 
                    mask <<= 1; 
                } while (symbol < 0); 
            }

            int codeLength = codeLengthArray[symbol];
            
            // huffman code lengths must be at least 1 bit long
            if (codeLength <= 0)
            {
                throw new InvalidDataException(SR.GetString(SR.InvalidHuffmanData));
            }

            //
            // If this code is longer than the # bits we had in the bit buffer (i.e.
            // we read only part of the code), we can hit the entry in the table or the tree
            // for another symbol. However the length of another symbol will not match the 
            // available bits count.
            if (codeLength > input.AvailableBits)
            {  
                // We already tried to load 16 bits and maximum length is 15, 
                // so this means we are running out of input. 
                return -1;      
            }

            input.SkipBits(codeLength);
            return symbol;
        }

    }
}
Imported Upstream version 4.6.0.125 Former-commit-id: a2155e9bd80020e49e72e86c44da02a8ac0e57a4 2016-08-03 10:59:49 +00:00			`namespace System.IO.Compression`
			`{`
			`using System;`
			`using System.Diagnostics;`

			`// Strictly speaking this class is not a HuffmanTree, this class is`
			`// a lookup table combined with a HuffmanTree. The idea is to speed up`
			`// the lookup for short symbols (they should appear more frequently ideally.)`
			`// However we don't want to create a huge table since it might take longer to`
			`// build the table than decoding (Deflate usually generates new tables frequently.)`
			`//`
			`// Jean-loup Gailly and Mark Adler gave a very good explanation about this.`
			`// The full text (algorithm.txt) can be found inside`
			`// ftp://ftp.uu.net/pub/archiving/zip/zlib/zlib.zip.`
			`//`
			`// Following paper explains decoding in details:`
			`// Hirschberg and Lelewer, "Efficient decoding of prefix codes,"`
			`// Comm. ACM, 33,4, April 1990, pp. 449-459.`
			`//`

			`internal class HuffmanTree {`
			`internal const int MaxLiteralTreeElements = 288;`
			`internal const int MaxDistTreeElements = 32;`
			`internal const int EndOfBlockCode = 256;`
			`internal const int NumberOfCodeLengthTreeElements = 19;`

			`int tableBits;`
			`short[] table;`
			`short[] left;`
			`short[] right;`
			`byte[] codeLengthArray;`
			`#if DEBUG`
			`uint[] codeArrayDebug;`
			`#endif`

			`int tableMask;`

			`// huffman tree for static block`
			`static HuffmanTree staticLiteralLengthTree;`
			`static HuffmanTree staticDistanceTree;`

			`static HuffmanTree() {`
			`// construct the static literal tree and distance tree`
			`staticLiteralLengthTree = new HuffmanTree(GetStaticLiteralTreeLength());`
			`staticDistanceTree = new HuffmanTree(GetStaticDistanceTreeLength());`
			`}`

			`static public HuffmanTree StaticLiteralLengthTree {`
			`get {`
			`return staticLiteralLengthTree;`
			`}`
			`}`

			`static public HuffmanTree StaticDistanceTree {`
			`get {`
			`return staticDistanceTree;`
			`}`
			`}`

			`public HuffmanTree(byte[] codeLengths) {`
			`Debug.Assert( codeLengths.Length == MaxLiteralTreeElements`
			`\|\| codeLengths.Length == MaxDistTreeElements`
			`\|\| codeLengths.Length == NumberOfCodeLengthTreeElements,`
			`"we only expect three kinds of Length here");`
			`codeLengthArray = codeLengths;`

			`if (codeLengthArray.Length == MaxLiteralTreeElements) { // bits for Literal/Length tree table`
			`tableBits = 9;`
			`}`
			`else { // bits for distance tree table and code length tree table`
			`tableBits = 7;`
			`}`
			`tableMask = (1 << tableBits) -1;`

			`CreateTable();`
			`}`


			`// Generate the array contains huffman codes lengths for static huffman tree.`
			`// The data is in RFC 1951.`
			`static byte[] GetStaticLiteralTreeLength() {`
			`byte[] literalTreeLength = new byte[MaxLiteralTreeElements];`
			`for (int i = 0; i <= 143; i++)`
			`literalTreeLength[i] = 8;`

			`for (int i = 144; i <= 255; i++)`
			`literalTreeLength[i] = 9;`

			`for (int i = 256; i <= 279; i++)`
			`literalTreeLength[i] = 7;`

			`for (int i = 280; i <= 287; i++)`
			`literalTreeLength[i] = 8;`

			`return literalTreeLength;`
			`}`

			`static byte[] GetStaticDistanceTreeLength() {`
			`byte[] staticDistanceTreeLength = new byte[MaxDistTreeElements];`
			`for (int i = 0; i < MaxDistTreeElements; i++) {`
			`staticDistanceTreeLength[i] = 5;`
			`}`
			`return staticDistanceTreeLength;`
			`}`


			`// Calculate the huffman code for each character based on the code length for each character.`
			`// This algorithm is described in standard RFC 1951`
			`uint[] CalculateHuffmanCode() {`
			`uint[] bitLengthCount = new uint[17];`
			`foreach( int codeLength in codeLengthArray) {`
			`bitLengthCount[codeLength]++;`
			`}`
			`bitLengthCount[0] = 0; // clear count for length 0`

			`uint[] nextCode = new uint[17];`
			`uint tempCode = 0;`
			`for (int bits = 1; bits <= 16; bits++) {`
			`tempCode = (tempCode + bitLengthCount[bits-1]) << 1;`
			`nextCode[bits] = tempCode;`
			`}`

			`uint[] code = new uint[MaxLiteralTreeElements];`
			`for (int i = 0; i < codeLengthArray.Length; i++) {`
			`int len = codeLengthArray[i];`

			`if (len > 0) {`
			`code[i] = FastEncoderStatics.BitReverse(nextCode[len], len);`
			`nextCode[len]++;`
			`}`
			`}`
			`return code;`
			`}`

			`private void CreateTable() {`

			`uint[] codeArray = CalculateHuffmanCode();`
			`table = new short[ 1 << tableBits];`
			`#if DEBUG`
			`codeArrayDebug = codeArray;`
			`#endif`

			`// I need to find proof that left and right array will always be`
			`// enough. I think they are.`
			`left = new short[2* codeLengthArray.Length];`
			`right = new short[2* codeLengthArray.Length];`
			`short avail = (short)codeLengthArray.Length;`

			`for (int ch = 0; ch < codeLengthArray.Length; ch++) {`
			`// length of this code`
			`int len = codeLengthArray[ch];`
			`if (len > 0) {`
			`// start value (bit reversed)`
			`int start = (int)codeArray[ch];`

			`if (len <= tableBits) {`
			`// If a particular symbol is shorter than nine bits,`
			`// then that symbol's translation is duplicated`
			`// in all those entries that start with that symbol's bits.`
			`// For example, if the symbol is four bits, then it's duplicated`
			`// 32 times in a nine-bit table. If a symbol is nine bits long,`
			`// it appears in the table once.`
			`//`
			`// Make sure that in the loop below, code is always`
			`// less than table_size.`
			`//`
			`// On last iteration we store at array index:`
			`// initial_start_at + (locs-1)*increment`
			`// = initial_start_at + locs*increment - increment`
			`// = initial_start_at + (1 << tableBits) - increment`
			`// = initial_start_at + table_size - increment`
			`//`
			`// Therefore we must ensure:`
			`// initial_start_at + table_size - increment < table_size`
			`// or: initial_start_at < increment`
			`//`
			`int increment = 1 << len;`
			`if (start >= increment) {`
			`throw new InvalidDataException(SR.GetString(SR.InvalidHuffmanData));`
			`}`

			`// Note the bits in the table are reverted.`
			`int locs = 1 << (tableBits - len);`
			`for (int j = 0; j < locs; j++) {`
			`table[start] = (short) ch;`
			`start += increment;`
			`}`
			`} else {`
			`// For any code which has length longer than num_elements,`
			`// build a binary tree.`

			`int overflowBits = len - tableBits; // the nodes we need to respent the data.`
			`int codeBitMask = 1 << tableBits; // mask to get current bit (the bits can't fit in the table)`

			`// the left, right table is used to repesent the`
			`// the rest bits. When we got the first part (number bits.) and look at`
			`// tbe table, we will need to follow the tree to find the real character.`
			`// This is in place to avoid bloating the table if there are`
			`// a few ones with long code.`
			`int index = start & ((1 << tableBits) -1);`
			`short[] array = table;`

			`do {`
			`short value = array[index];`

			`if (value == 0) { // set up next pointer if this node is not used before.`
			`array[index] = (short)-avail; // use next available slot.`
			`value = (short)-avail;`
			`avail++;`
			`}`

			`if (value > 0) { // prevent an IndexOutOfRangeException from array[index]`
			`throw new InvalidDataException(SR.GetString(SR.InvalidHuffmanData));`
			`}`

			`Debug.Assert( value < 0, "CreateTable: Only negative numbers are used for tree pointers!");`

			`if ((start & codeBitMask) == 0) { // if current bit is 0, go change the left array`
			`array = left;`
			`} else { // if current bit is 1, set value in the right array`
			`array = right;`
			`}`
			`index = -value; // go to next node`

			`codeBitMask <<= 1;`
			`overflowBits--;`
			`} while (overflowBits != 0);`

			`array[index] = (short) ch;`
			`}`
			`}`
			`}`
			`}`

			`//`
			`// This function will try to get enough bits from input and`
			`// try to decode the bits.`
			`// If there are no enought bits in the input, this function will return -1.`
			`//`
			`public int GetNextSymbol(InputBuffer input) {`
			`// Try to load 16 bits into input buffer if possible and get the bitBuffer value.`
			`// If there aren't 16 bits available we will return all we have in the`
			`// input buffer.`
			`uint bitBuffer = input.TryLoad16Bits();`
			`if( input.AvailableBits == 0) { // running out of input.`
			`return -1;`
			`}`

			`// decode an element`
			`int symbol = table[bitBuffer & tableMask];`
			`if( symbol < 0) { // this will be the start of the binary tree`
			`// navigate the tree`
			`uint mask = (uint)1 << tableBits;`
			`do`
			`{`
			`symbol = -symbol;`
			`if ((bitBuffer & mask) == 0)`
			`symbol = left[symbol];`
			`else`
			`symbol = right[symbol];`
			`mask <<= 1;`
			`} while (symbol < 0);`
			`}`

			`int codeLength = codeLengthArray[symbol];`

			`// huffman code lengths must be at least 1 bit long`
			`if (codeLength <= 0)`
			`{`
			`throw new InvalidDataException(SR.GetString(SR.InvalidHuffmanData));`
			`}`

			`//`
			`// If this code is longer than the # bits we had in the bit buffer (i.e.`
			`// we read only part of the code), we can hit the entry in the table or the tree`
			`// for another symbol. However the length of another symbol will not match the`
			`// available bits count.`
			`if (codeLength > input.AvailableBits)`
			`{`
			`// We already tried to load 16 bits and maximum length is 15,`
			`// so this means we are running out of input.`
			`return -1;`
			`}`

			`input.SkipBits(codeLength);`
			`return symbol;`
			`}`

			`}`
			`}`