001/* 002 * $Id: PdfSmartCopy.java 4784 2011-03-15 08:33:00Z blowagie $ 003 * 004 * This file is part of the iText (R) project. 005 * Copyright (c) 1998-2011 1T3XT BVBA 006 * Authors: Bruno Lowagie, Paulo Soares, et al. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Affero General Public License version 3 010 * as published by the Free Software Foundation with the addition of the 011 * following permission added to Section 15 as permitted in Section 7(a): 012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT, 013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS. 014 * 015 * This program is distributed in the hope that it will be useful, but 016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 017 * or FITNESS FOR A PARTICULAR PURPOSE. 018 * See the GNU Affero General Public License for more details. 019 * You should have received a copy of the GNU Affero General Public License 020 * along with this program; if not, see http://www.gnu.org/licenses or write to 021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 022 * Boston, MA, 02110-1301 USA, or download the license from the following URL: 023 * http://itextpdf.com/terms-of-use/ 024 * 025 * The interactive user interfaces in modified source and object code versions 026 * of this program must display Appropriate Legal Notices, as required under 027 * Section 5 of the GNU Affero General Public License. 028 * 029 * In accordance with Section 7(b) of the GNU Affero General Public License, 030 * a covered work must retain the producer line in every PDF that is created 031 * or manipulated using iText. 032 * 033 * You can be released from the requirements of the license by purchasing 034 * a commercial license. Buying such a license is mandatory as soon as you 035 * develop commercial activities involving the iText software without 036 * disclosing the source code of your own applications. 037 * These activities include: offering paid services to customers as an ASP, 038 * serving PDFs on the fly in a web application, shipping iText with a closed 039 * source product. 040 * 041 * For more information, please contact iText Software Corp. at this 042 * address: sales@itextpdf.com 043 */ 044package com.itextpdf.text.pdf; 045 046import java.io.IOException; 047import java.io.OutputStream; 048import java.security.MessageDigest; 049import java.util.Arrays; 050import java.util.HashMap; 051 052import com.itextpdf.text.Document; 053import com.itextpdf.text.DocumentException; 054import com.itextpdf.text.ExceptionConverter; 055 056/** 057 * PdfSmartCopy has the same functionality as PdfCopy, 058 * but when resources (such as fonts, images,...) are 059 * encountered, a reference to these resources is saved 060 * in a cache, so that they can be reused. 061 * This requires more memory, but reduces the file size 062 * of the resulting PDF document. 063 */ 064 065public class PdfSmartCopy extends PdfCopy { 066 067 /** the cache with the streams and references. */ 068 private HashMap<ByteStore, PdfIndirectReference> streamMap = null; 069 070 /** Creates a PdfSmartCopy instance. */ 071 public PdfSmartCopy(Document document, OutputStream os) throws DocumentException { 072 super(document, os); 073 this.streamMap = new HashMap<ByteStore, PdfIndirectReference>(); 074 } 075 /** 076 * Translate a PRIndirectReference to a PdfIndirectReference 077 * In addition, translates the object numbers, and copies the 078 * referenced object to the output file if it wasn't available 079 * in the cache yet. If it's in the cache, the reference to 080 * the already used stream is returned. 081 * 082 * NB: PRIndirectReferences (and PRIndirectObjects) really need to know what 083 * file they came from, because each file has its own namespace. The translation 084 * we do from their namespace to ours is *at best* heuristic, and guaranteed to 085 * fail under some circumstances. 086 */ 087 @Override 088 protected PdfIndirectReference copyIndirect(PRIndirectReference in) throws IOException, BadPdfFormatException { 089 PdfObject srcObj = PdfReader.getPdfObjectRelease(in); 090 ByteStore streamKey = null; 091 boolean validStream = false; 092 if (srcObj.isStream()) { 093 streamKey = new ByteStore((PRStream)srcObj); 094 validStream = true; 095 PdfIndirectReference streamRef = streamMap.get(streamKey); 096 if (streamRef != null) { 097 return streamRef; 098 } 099 } 100 101 PdfIndirectReference theRef; 102 RefKey key = new RefKey(in); 103 IndirectReferences iRef = indirects.get(key); 104 if (iRef != null) { 105 theRef = iRef.getRef(); 106 if (iRef.getCopied()) { 107 return theRef; 108 } 109 } else { 110 theRef = body.getPdfIndirectReference(); 111 iRef = new IndirectReferences(theRef); 112 indirects.put(key, iRef); 113 } 114 if (srcObj.isDictionary()) { 115 PdfObject type = PdfReader.getPdfObjectRelease(((PdfDictionary)srcObj).get(PdfName.TYPE)); 116 if (type != null && PdfName.PAGE.equals(type)) { 117 return theRef; 118 } 119 } 120 iRef.setCopied(); 121 122 if (validStream) { 123 streamMap.put(streamKey, theRef); 124 } 125 126 PdfObject obj = copyObject(srcObj); 127 addToBody(obj, theRef); 128 return theRef; 129 } 130 131 static class ByteStore { 132 private byte[] b; 133 private int hash; 134 private MessageDigest md5; 135 136 private void serObject(PdfObject obj, int level, ByteBuffer bb) throws IOException { 137 if (level <= 0) 138 return; 139 if (obj == null) { 140 bb.append("$Lnull"); 141 return; 142 } 143 obj = PdfReader.getPdfObject(obj); 144 if (obj.isStream()) { 145 bb.append("$B"); 146 serDic((PdfDictionary)obj, level - 1, bb); 147 if (level > 0) { 148 md5.reset(); 149 bb.append(md5.digest(PdfReader.getStreamBytesRaw((PRStream)obj))); 150 } 151 } 152 else if (obj.isDictionary()) { 153 serDic((PdfDictionary)obj, level - 1, bb); 154 } 155 else if (obj.isArray()) { 156 serArray((PdfArray)obj, level - 1, bb); 157 } 158 else if (obj.isString()) { 159 bb.append("$S").append(obj.toString()); 160 } 161 else if (obj.isName()) { 162 bb.append("$N").append(obj.toString()); 163 } 164 else 165 bb.append("$L").append(obj.toString()); 166 } 167 168 private void serDic(PdfDictionary dic, int level, ByteBuffer bb) throws IOException { 169 bb.append("$D"); 170 if (level <= 0) 171 return; 172 Object[] keys = dic.getKeys().toArray(); 173 Arrays.sort(keys); 174 for (int k = 0; k < keys.length; ++k) { 175 serObject((PdfObject)keys[k], level, bb); 176 serObject(dic.get((PdfName)keys[k]), level, bb); 177 } 178 } 179 180 private void serArray(PdfArray array, int level, ByteBuffer bb) throws IOException { 181 bb.append("$A"); 182 if (level <= 0) 183 return; 184 for (int k = 0; k < array.size(); ++k) { 185 serObject(array.getPdfObject(k), level, bb); 186 } 187 } 188 189 ByteStore(PRStream str) throws IOException { 190 try { 191 md5 = MessageDigest.getInstance("MD5"); 192 } 193 catch (Exception e) { 194 throw new ExceptionConverter(e); 195 } 196 ByteBuffer bb = new ByteBuffer(); 197 int level = 100; 198 serObject(str, level, bb); 199 this.b = bb.toByteArray(); 200 md5 = null; 201 } 202 203 @Override 204 public boolean equals(Object obj) { 205 if (!(obj instanceof ByteStore)) 206 return false; 207 if (hashCode() != obj.hashCode()) 208 return false; 209 return Arrays.equals(b, ((ByteStore)obj).b); 210 } 211 212 @Override 213 public int hashCode() { 214 if (hash == 0) { 215 int len = b.length; 216 for (int k = 0; k < len; ++k) { 217 hash = hash * 31 + (b[k] & 0xff); 218 } 219 } 220 return hash; 221 } 222 } 223}