001/*
002 * Copyright 2003-2008 by Paulo Soares.
003 *
004 * This code was originally released in 2001 by SUN (see class
005 * com.sun.media.imageio.plugins.tiff.TIFFDirectory.java)
006 * using the BSD license in a specific wording. In a mail dating from
007 * January 23, 2008, Brian Burkhalter (@sun.com) gave us permission
008 * to use the code under the following version of the BSD license:
009 *
010 * Copyright (c) 2005 Sun Microsystems, Inc. All  Rights Reserved.
011 * 
012 * Redistribution and use in source and binary forms, with or without
013 * modification, are permitted provided that the following conditions
014 * are met: 
015 * 
016 * - Redistribution of source code must retain the above copyright 
017 *   notice, this  list of conditions and the following disclaimer.
018 * 
019 * - Redistribution in binary form must reproduce the above copyright
020 *   notice, this list of conditions and the following disclaimer in 
021 *   the documentation and/or other materials provided with the
022 *   distribution.
023 * 
024 * Neither the name of Sun Microsystems, Inc. or the names of 
025 * contributors may be used to endorse or promote products derived 
026 * from this software without specific prior written permission.
027 * 
028 * This software is provided "AS IS," without a warranty of any 
029 * kind. ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND 
030 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, 
031 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY
032 * EXCLUDED. SUN MIDROSYSTEMS, INC. ("SUN") AND ITS LICENSORS SHALL 
033 * NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF 
034 * USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
035 * DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE FOR 
036 * ANY LOST REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL,
037 * CONSEQUENTIAL, INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND
038 * REGARDLESS OF THE THEORY OF LIABILITY, ARISING OUT OF THE USE OF OR
039 * INABILITY TO USE THIS SOFTWARE, EVEN IF SUN HAS BEEN ADVISED OF THE
040 * POSSIBILITY OF SUCH DAMAGES. 
041 * 
042 * You acknowledge that this software is not designed or intended for 
043 * use in the design, construction, operation or maintenance of any 
044 * nuclear facility. 
045 *
046 * $Revision: 4540 $
047 * $Date: 2010-07-13 05:57:54 -0700 (Tue, 13 Jul 2010) $
048 * $State: Exp $
049 */
050
051package com.itextpdf.text.pdf.codec;
052
053import java.io.PrintStream;
054
055/** 
056 * General purpose LZW String Table.
057 * Extracted from GIFEncoder by Adam Doppelt
058 * Comments added by Robin Luiten
059 * <code>expandCode</code> added by Robin Luiten
060 * The strLen_ table to give quick access to the lenght of an expanded
061 * code for use by the <code>expandCode</code> method added by Robin.
062 * @since 5.0.2
063 */
064public class LZWStringTable
065{
066    /** codesize + Reserved Codes */
067    private     final static int RES_CODES = 2;
068
069    private     final static short HASH_FREE = (short)0xFFFF;
070    private     final static short NEXT_FIRST = (short)0xFFFF;
071
072    private     final static int MAXBITS = 12;
073    private     final static int MAXSTR = (1 << MAXBITS);
074
075    private     final static short HASHSIZE     = 9973;
076    private     final static short HASHSTEP     = 2039;
077
078    byte[]  strChr_;            // after predecessor character
079    short[] strNxt_;            // predecessor string 
080    short[] strHsh_;            // hash table to find  predecessor + char pairs
081    short numStrings_;          // next code if adding new prestring + char
082
083    /**
084         * each entry corresponds to a code and contains the length of data
085         * that the code expands to when decoded.
086         **/
087    int[] strLen_;
088
089    /**
090         * Constructor allocate memory for string store data
091         **/
092    public LZWStringTable()
093    {
094        strChr_ = new byte[MAXSTR];
095        strNxt_ = new short[MAXSTR];
096        strLen_ = new int[MAXSTR];
097        strHsh_ = new short[HASHSIZE];    
098    }
099
100    /**
101         * @param index value of -1 indicates no predecessor [used in initialization]
102         * @param b the byte [character] to add to the string store which follows
103         * the predecessor string specified the index.
104         * @return 0xFFFF if no space in table left for addition of predecessor
105         * index and byte b. Else return the code allocated for combination index + b.
106         **/
107    public int AddCharString(short index, byte b)
108    {
109        int     hshidx;
110
111        if (numStrings_ >= MAXSTR)      // if used up all codes
112            {
113                return 0xFFFF;
114            }
115                
116        hshidx = Hash(index, b);
117        while (strHsh_[hshidx] != HASH_FREE)
118            hshidx = (hshidx + HASHSTEP) % HASHSIZE;
119                
120        strHsh_[hshidx] = numStrings_;
121        strChr_[numStrings_] = b;
122        if (index == HASH_FREE)
123            {
124                strNxt_[numStrings_] = NEXT_FIRST;
125                strLen_[numStrings_] = 1;
126            }
127        else
128            {
129                strNxt_[numStrings_] = index;
130                strLen_[numStrings_] = strLen_[index] + 1;
131            }
132
133        return numStrings_++;   // return the code and inc for next code
134    }
135
136    /**
137         * @param index index to prefix string
138         * @param b the character that follws the index prefix
139         * @return b if param index is HASH_FREE. Else return the code
140         * for this prefix and byte successor
141         **/    
142    public short FindCharString(short index, byte b)
143    {
144        int     hshidx, nxtidx;
145
146        if (index == HASH_FREE)
147            return (short)(b & 0xFF);    // Rob fixed used to sign extend
148
149        hshidx = Hash(index, b);
150        while ((nxtidx = strHsh_[hshidx]) != HASH_FREE) // search
151            {
152                if (strNxt_[nxtidx]     == index &&     strChr_[nxtidx] == b)
153                    return (short)nxtidx;
154                hshidx = (hshidx + HASHSTEP) % HASHSIZE;
155            }
156
157        return (short)0xFFFF;
158    }
159
160    /**
161         * @param codesize the size of code to be preallocated for the
162         * string store.
163         **/
164    public void ClearTable(int codesize)
165    {
166        numStrings_     = 0;
167
168        for     (int q = 0;     q <     HASHSIZE; q++)
169            strHsh_[q] = HASH_FREE;
170
171        int     w =     (1 << codesize) + RES_CODES;
172        for     (int q = 0;     q <     w; q++)
173            AddCharString((short)0xFFFF, (byte)q);      // init with no prefix
174    }
175
176    static public int Hash(short index, byte lastbyte)
177    {
178        return ((int)((short)(lastbyte << 8) ^ index) & 0xFFFF) % HASHSIZE;
179    }
180
181    /**
182         * If expanded data doesn't fit into array only what will fit is written
183         * to buf and the return value indicates how much of the expanded code has
184         * been written to the buf. The next call to expandCode() should be with 
185         * the same code and have the skip parameter set the negated value of the 
186         * previous return. Successive negative return values should be negated and
187         * added together for next skip parameter value with same code.
188         *
189         * @param buf buffer to place expanded data into
190         * @param offset offset to place expanded data
191         * @param code the code to expand to the byte array it represents.
192         * PRECONDITION This code must already be in the LZSS
193         * @param skipHead is the number of bytes at the start of the expanded code to 
194         * be skipped before data is written to buf. It is possible that skipHead is
195         * equal to codeLen.
196         * @return the length of data expanded into buf. If the expanded code is longer
197         * than space left in buf then the value returned is a negative number which when
198         * negated is equal to the number of bytes that were used of the code being expanded.
199         * This negative value also indicates the buffer is full.
200         **/
201    public int expandCode(byte[] buf, int offset, short code, int skipHead)
202    {
203        if (offset == -2) 
204            {
205                if (skipHead == 1) skipHead = 0;
206            }
207        if (code == (short)0xFFFF ||                            // just in case
208            skipHead == strLen_[code])                          // DONE no more unpacked
209            return 0;
210
211        int expandLen;                                                  // how much data we are actually expanding
212        int codeLen = strLen_[code] - skipHead; // length of expanded code left
213        int bufSpace = buf.length - offset;             // how much space left
214        if (bufSpace > codeLen)
215            expandLen = codeLen;                                // only got this many to unpack
216        else
217            expandLen = bufSpace;
218
219        int skipTail = codeLen - expandLen;             // only > 0 if codeLen > bufSpace [left overs]
220
221        int idx = offset + expandLen;                   // initialise to exclusive end address of buffer area
222
223        // NOTE: data unpacks in reverse direction and we are placing the
224        // unpacked data directly into the array in the correct location.
225        while ((idx > offset) && (code != (short)0xFFFF))
226            {
227                if (--skipTail < 0)                                     // skip required of expanded data
228                    {
229                        buf[--idx] = strChr_[code];
230                    }
231                code = strNxt_[code];                           // to predecessor code
232            }
233
234        if (codeLen > expandLen)
235            return -expandLen;                                  // indicate what part of codeLen used
236        else
237            return expandLen;                                   // indicate length of dat unpacked
238    }
239
240    public void dump(PrintStream out)
241    {
242        int i;
243        for (i = 258; i < numStrings_; ++i)
244            out.println(  " strNxt_[" + i + "] = " + strNxt_[i]
245                          + " strChr_ " + Integer.toHexString(strChr_[i] & 0xFF)
246                          + " strLen_ " + Integer.toHexString(strLen_[i]));
247    }
248}