001/*
002 * $Id: PdfSmartCopy.java 4784 2011-03-15 08:33:00Z blowagie $
003 *
004 * This file is part of the iText (R) project.
005 * Copyright (c) 1998-2011 1T3XT BVBA
006 * Authors: Bruno Lowagie, Paulo Soares, et al.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU Affero General Public License version 3
010 * as published by the Free Software Foundation with the addition of the
011 * following permission added to Section 15 as permitted in Section 7(a):
012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT,
013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS.
014 *
015 * This program is distributed in the hope that it will be useful, but
016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
017 * or FITNESS FOR A PARTICULAR PURPOSE.
018 * See the GNU Affero General Public License for more details.
019 * You should have received a copy of the GNU Affero General Public License
020 * along with this program; if not, see http://www.gnu.org/licenses or write to
021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
022 * Boston, MA, 02110-1301 USA, or download the license from the following URL:
023 * http://itextpdf.com/terms-of-use/
024 *
025 * The interactive user interfaces in modified source and object code versions
026 * of this program must display Appropriate Legal Notices, as required under
027 * Section 5 of the GNU Affero General Public License.
028 *
029 * In accordance with Section 7(b) of the GNU Affero General Public License,
030 * a covered work must retain the producer line in every PDF that is created
031 * or manipulated using iText.
032 *
033 * You can be released from the requirements of the license by purchasing
034 * a commercial license. Buying such a license is mandatory as soon as you
035 * develop commercial activities involving the iText software without
036 * disclosing the source code of your own applications.
037 * These activities include: offering paid services to customers as an ASP,
038 * serving PDFs on the fly in a web application, shipping iText with a closed
039 * source product.
040 *
041 * For more information, please contact iText Software Corp. at this
042 * address: sales@itextpdf.com
043 */
044package com.itextpdf.text.pdf;
045
046import java.io.IOException;
047import java.io.OutputStream;
048import java.security.MessageDigest;
049import java.util.Arrays;
050import java.util.HashMap;
051
052import com.itextpdf.text.Document;
053import com.itextpdf.text.DocumentException;
054import com.itextpdf.text.ExceptionConverter;
055
056/**
057 * PdfSmartCopy has the same functionality as PdfCopy,
058 * but when resources (such as fonts, images,...) are
059 * encountered, a reference to these resources is saved
060 * in a cache, so that they can be reused.
061 * This requires more memory, but reduces the file size
062 * of the resulting PDF document.
063 */
064
065public class PdfSmartCopy extends PdfCopy {
066
067        /** the cache with the streams and references. */
068    private HashMap<ByteStore, PdfIndirectReference> streamMap = null;
069
070    /** Creates a PdfSmartCopy instance. */
071    public PdfSmartCopy(Document document, OutputStream os) throws DocumentException {
072        super(document, os);
073        this.streamMap = new HashMap<ByteStore, PdfIndirectReference>();
074    }
075    /**
076     * Translate a PRIndirectReference to a PdfIndirectReference
077     * In addition, translates the object numbers, and copies the
078     * referenced object to the output file if it wasn't available
079     * in the cache yet. If it's in the cache, the reference to
080     * the already used stream is returned.
081     *
082     * NB: PRIndirectReferences (and PRIndirectObjects) really need to know what
083     * file they came from, because each file has its own namespace. The translation
084     * we do from their namespace to ours is *at best* heuristic, and guaranteed to
085     * fail under some circumstances.
086     */
087    @Override
088    protected PdfIndirectReference copyIndirect(PRIndirectReference in) throws IOException, BadPdfFormatException {
089        PdfObject srcObj = PdfReader.getPdfObjectRelease(in);
090        ByteStore streamKey = null;
091        boolean validStream = false;
092        if (srcObj.isStream()) {
093            streamKey = new ByteStore((PRStream)srcObj);
094            validStream = true;
095            PdfIndirectReference streamRef = streamMap.get(streamKey);
096            if (streamRef != null) {
097                return streamRef;
098            }
099        }
100
101        PdfIndirectReference theRef;
102        RefKey key = new RefKey(in);
103        IndirectReferences iRef = indirects.get(key);
104        if (iRef != null) {
105            theRef = iRef.getRef();
106            if (iRef.getCopied()) {
107                return theRef;
108            }
109        } else {
110            theRef = body.getPdfIndirectReference();
111            iRef = new IndirectReferences(theRef);
112            indirects.put(key, iRef);
113        }
114        if (srcObj.isDictionary()) {
115            PdfObject type = PdfReader.getPdfObjectRelease(((PdfDictionary)srcObj).get(PdfName.TYPE));
116            if (type != null && PdfName.PAGE.equals(type)) {
117                return theRef;
118            }
119        }
120        iRef.setCopied();
121
122        if (validStream) {
123            streamMap.put(streamKey, theRef);
124        }
125
126        PdfObject obj = copyObject(srcObj);
127        addToBody(obj, theRef);
128        return theRef;
129    }
130
131    static class ByteStore {
132        private byte[] b;
133        private int hash;
134        private MessageDigest md5;
135
136        private void serObject(PdfObject obj, int level, ByteBuffer bb) throws IOException {
137            if (level <= 0)
138                return;
139            if (obj == null) {
140                bb.append("$Lnull");
141                return;
142            }
143            obj = PdfReader.getPdfObject(obj);
144            if (obj.isStream()) {
145                bb.append("$B");
146                serDic((PdfDictionary)obj, level - 1, bb);
147                if (level > 0) {
148                    md5.reset();
149                    bb.append(md5.digest(PdfReader.getStreamBytesRaw((PRStream)obj)));
150                }
151            }
152            else if (obj.isDictionary()) {
153                serDic((PdfDictionary)obj, level - 1, bb);
154            }
155            else if (obj.isArray()) {
156                serArray((PdfArray)obj, level - 1, bb);
157            }
158            else if (obj.isString()) {
159                bb.append("$S").append(obj.toString());
160            }
161            else if (obj.isName()) {
162                bb.append("$N").append(obj.toString());
163            }
164            else
165                bb.append("$L").append(obj.toString());
166        }
167
168        private void serDic(PdfDictionary dic, int level, ByteBuffer bb) throws IOException {
169            bb.append("$D");
170            if (level <= 0)
171                return;
172            Object[] keys = dic.getKeys().toArray();
173            Arrays.sort(keys);
174            for (int k = 0; k < keys.length; ++k) {
175                serObject((PdfObject)keys[k], level, bb);
176                serObject(dic.get((PdfName)keys[k]), level, bb);
177            }
178        }
179
180        private void serArray(PdfArray array, int level, ByteBuffer bb) throws IOException {
181            bb.append("$A");
182            if (level <= 0)
183                return;
184            for (int k = 0; k < array.size(); ++k) {
185                serObject(array.getPdfObject(k), level, bb);
186            }
187        }
188
189        ByteStore(PRStream str) throws IOException {
190            try {
191                md5 = MessageDigest.getInstance("MD5");
192            }
193            catch (Exception e) {
194                throw new ExceptionConverter(e);
195            }
196            ByteBuffer bb = new ByteBuffer();
197            int level = 100;
198            serObject(str, level, bb);
199            this.b = bb.toByteArray();
200            md5 = null;
201        }
202
203        @Override
204        public boolean equals(Object obj) {
205            if (!(obj instanceof ByteStore))
206                return false;
207            if (hashCode() != obj.hashCode())
208                return false;
209            return Arrays.equals(b, ((ByteStore)obj).b);
210        }
211
212        @Override
213        public int hashCode() {
214            if (hash == 0) {
215                int len = b.length;
216                for (int k = 0; k < len; ++k) {
217                    hash = hash * 31 + (b[k] & 0xff);
218                }
219            }
220            return hash;
221        }
222    }
223}