001/* 002 * $Id: PdfReader.java 4883 2011-05-24 19:17:29Z blowagie $ 003 * 004 * This file is part of the iText (R) project. 005 * Copyright (c) 1998-2011 1T3XT BVBA 006 * Authors: Bruno Lowagie, Paulo Soares, et al. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Affero General Public License version 3 010 * as published by the Free Software Foundation with the addition of the 011 * following permission added to Section 15 as permitted in Section 7(a): 012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT, 013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS. 014 * 015 * This program is distributed in the hope that it will be useful, but 016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 017 * or FITNESS FOR A PARTICULAR PURPOSE. 018 * See the GNU Affero General Public License for more details. 019 * You should have received a copy of the GNU Affero General Public License 020 * along with this program; if not, see http://www.gnu.org/licenses or write to 021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 022 * Boston, MA, 02110-1301 USA, or download the license from the following URL: 023 * http://itextpdf.com/terms-of-use/ 024 * 025 * The interactive user interfaces in modified source and object code versions 026 * of this program must display Appropriate Legal Notices, as required under 027 * Section 5 of the GNU Affero General Public License. 028 * 029 * In accordance with Section 7(b) of the GNU Affero General Public License, 030 * a covered work must retain the producer line in every PDF that is created 031 * or manipulated using iText. 032 * 033 * You can be released from the requirements of the license by purchasing 034 * a commercial license. Buying such a license is mandatory as soon as you 035 * develop commercial activities involving the iText software without 036 * disclosing the source code of your own applications. 037 * These activities include: offering paid services to customers as an ASP, 038 * serving PDFs on the fly in a web application, shipping iText with a closed 039 * source product. 040 * 041 * For more information, please contact iText Software Corp. at this 042 * address: sales@itextpdf.com 043 */ 044package com.itextpdf.text.pdf; 045 046import java.io.ByteArrayInputStream; 047import java.io.ByteArrayOutputStream; 048import java.io.DataInputStream; 049import java.io.IOException; 050import java.io.InputStream; 051import java.net.URL; 052import java.security.Key; 053import java.security.MessageDigest; 054import java.security.cert.Certificate; 055import java.util.ArrayList; 056import java.util.Arrays; 057import java.util.Collections; 058import java.util.HashMap; 059import java.util.Iterator; 060import java.util.List; 061import java.util.Map; 062import java.util.Set; 063import java.util.Stack; 064import java.util.zip.InflaterInputStream; 065 066import org.bouncycastle.cms.CMSEnvelopedData; 067import org.bouncycastle.cms.RecipientInformation; 068 069import com.itextpdf.text.ExceptionConverter; 070import com.itextpdf.text.PageSize; 071import com.itextpdf.text.Rectangle; 072import com.itextpdf.text.error_messages.MessageLocalization; 073import com.itextpdf.text.exceptions.BadPasswordException; 074import com.itextpdf.text.exceptions.InvalidPdfException; 075import com.itextpdf.text.exceptions.UnsupportedPdfException; 076import com.itextpdf.text.pdf.PRTokeniser.TokenType; 077import com.itextpdf.text.pdf.codec.TIFFConstants; 078import com.itextpdf.text.pdf.codec.TIFFFaxDecoder; 079import com.itextpdf.text.pdf.codec.TIFFFaxDecompressor; 080import com.itextpdf.text.pdf.interfaces.PdfViewerPreferences; 081import com.itextpdf.text.pdf.internal.PdfViewerPreferencesImp; 082 083/** Reads a PDF document. 084 * @author Paulo Soares 085 * @author Kazuya Ujihara 086 */ 087public class PdfReader implements PdfViewerPreferences { 088 089 /** 090 * The iText developers are not responsible if you decide to change the 091 * value of this static parameter. 092 * @since 5.0.2 093 */ 094 public static boolean unethicalreading = false; 095 096 static final PdfName pageInhCandidates[] = { 097 PdfName.MEDIABOX, PdfName.ROTATE, PdfName.RESOURCES, PdfName.CROPBOX 098 }; 099 100 static final byte endstream[] = PdfEncodings.convertToBytes("endstream", null); 101 static final byte endobj[] = PdfEncodings.convertToBytes("endobj", null); 102 protected PRTokeniser tokens; 103 // Each xref pair is a position 104 // type 0 -> -1, 0 105 // type 1 -> offset, 0 106 // type 2 -> index, obj num 107 protected int xref[]; 108 protected HashMap<Integer, IntHashtable> objStmMark; 109 protected IntHashtable objStmToOffset; 110 protected boolean newXrefType; 111 private ArrayList<PdfObject> xrefObj; 112 PdfDictionary rootPages; 113 protected PdfDictionary trailer; 114 protected PdfDictionary catalog; 115 protected PageRefs pageRefs; 116 protected PRAcroForm acroForm = null; 117 protected boolean acroFormParsed = false; 118 protected boolean encrypted = false; 119 protected boolean rebuilt = false; 120 protected int freeXref; 121 protected boolean tampered = false; 122 protected int lastXref; 123 protected int eofPos; 124 protected char pdfVersion; 125 protected PdfEncryption decrypt; 126 protected byte password[] = null; //added by ujihara for decryption 127 protected Key certificateKey = null; //added by Aiken Sam for certificate decryption 128 protected Certificate certificate = null; //added by Aiken Sam for certificate decryption 129 protected String certificateKeyProvider = null; //added by Aiken Sam for certificate decryption 130 private boolean ownerPasswordUsed; 131 protected ArrayList<PdfString> strings = new ArrayList<PdfString>(); 132 protected boolean sharedStreams = true; 133 protected boolean consolidateNamedDestinations = false; 134 protected boolean remoteToLocalNamedDestinations = false; 135 protected int rValue; 136 protected int pValue; 137 private int objNum; 138 private int objGen; 139 private int fileLength; 140 private boolean hybridXref; 141 private int lastXrefPartial = -1; 142 private boolean partial; 143 144 private PRIndirectReference cryptoRef; 145 private final PdfViewerPreferencesImp viewerPreferences = new PdfViewerPreferencesImp(); 146 private boolean encryptionError; 147 148 /** 149 * Holds value of property appendable. 150 */ 151 private boolean appendable; 152 153 protected PdfReader() { 154 } 155 156 /** Reads and parses a PDF document. 157 * @param filename the file name of the document 158 * @throws IOException on error 159 */ 160 public PdfReader(final String filename) throws IOException { 161 this(filename, null); 162 } 163 164 /** Reads and parses a PDF document. 165 * @param filename the file name of the document 166 * @param ownerPassword the password to read the document 167 * @throws IOException on error 168 */ 169 public PdfReader(final String filename, final byte ownerPassword[]) throws IOException { 170 password = ownerPassword; 171 tokens = new PRTokeniser(filename); 172 readPdf(); 173 } 174 175 /** Reads and parses a PDF document. 176 * @param pdfIn the byte array with the document 177 * @throws IOException on error 178 */ 179 public PdfReader(final byte pdfIn[]) throws IOException { 180 this(pdfIn, null); 181 } 182 183 /** Reads and parses a PDF document. 184 * @param pdfIn the byte array with the document 185 * @param ownerPassword the password to read the document 186 * @throws IOException on error 187 */ 188 public PdfReader(final byte pdfIn[], final byte ownerPassword[]) throws IOException { 189 password = ownerPassword; 190 tokens = new PRTokeniser(pdfIn); 191 readPdf(); 192 } 193 194 /** Reads and parses a PDF document. 195 * @param filename the file name of the document 196 * @param certificate the certificate to read the document 197 * @param certificateKey the private key of the certificate 198 * @param certificateKeyProvider the security provider for certificateKey 199 * @throws IOException on error 200 */ 201 public PdfReader(final String filename, final Certificate certificate, final Key certificateKey, final String certificateKeyProvider) throws IOException { 202 this.certificate = certificate; 203 this.certificateKey = certificateKey; 204 this.certificateKeyProvider = certificateKeyProvider; 205 tokens = new PRTokeniser(filename); 206 readPdf(); 207 } 208 209 /** Reads and parses a PDF document. 210 * @param url the URL of the document 211 * @throws IOException on error 212 */ 213 public PdfReader(final URL url) throws IOException { 214 this(url, null); 215 } 216 217 /** Reads and parses a PDF document. 218 * @param url the URL of the document 219 * @param ownerPassword the password to read the document 220 * @throws IOException on error 221 */ 222 public PdfReader(final URL url, final byte ownerPassword[]) throws IOException { 223 password = ownerPassword; 224 tokens = new PRTokeniser(new RandomAccessFileOrArray(url)); 225 readPdf(); 226 } 227 228 /** 229 * Reads and parses a PDF document. 230 * @param is the <CODE>InputStream</CODE> containing the document. The stream is read to the 231 * end but is not closed 232 * @param ownerPassword the password to read the document 233 * @throws IOException on error 234 */ 235 public PdfReader(final InputStream is, final byte ownerPassword[]) throws IOException { 236 password = ownerPassword; 237 tokens = new PRTokeniser(new RandomAccessFileOrArray(is)); 238 readPdf(); 239 } 240 241 /** 242 * Reads and parses a PDF document. 243 * @param is the <CODE>InputStream</CODE> containing the document. The stream is read to the 244 * end but is not closed 245 * @throws IOException on error 246 */ 247 public PdfReader(final InputStream is) throws IOException { 248 this(is, null); 249 } 250 251 /** 252 * Reads and parses a pdf document. Contrary to the other constructors only the xref is read 253 * into memory. The reader is said to be working in "partial" mode as only parts of the pdf 254 * are read as needed. The pdf is left open but may be closed at any time with 255 * <CODE>PdfReader.close()</CODE>, reopen is automatic. 256 * @param raf the document location 257 * @param ownerPassword the password or <CODE>null</CODE> for no password 258 * @throws IOException on error 259 */ 260 public PdfReader(final RandomAccessFileOrArray raf, final byte ownerPassword[]) throws IOException { 261 password = ownerPassword; 262 partial = true; 263 tokens = new PRTokeniser(raf); 264 readPdfPartial(); 265 } 266 267 /** Creates an independent duplicate. 268 * @param reader the <CODE>PdfReader</CODE> to duplicate 269 */ 270 public PdfReader(final PdfReader reader) { 271 this.appendable = reader.appendable; 272 this.consolidateNamedDestinations = reader.consolidateNamedDestinations; 273 this.encrypted = reader.encrypted; 274 this.rebuilt = reader.rebuilt; 275 this.sharedStreams = reader.sharedStreams; 276 this.tampered = reader.tampered; 277 this.password = reader.password; 278 this.pdfVersion = reader.pdfVersion; 279 this.eofPos = reader.eofPos; 280 this.freeXref = reader.freeXref; 281 this.lastXref = reader.lastXref; 282 this.tokens = new PRTokeniser(reader.tokens.getSafeFile()); 283 if (reader.decrypt != null) 284 this.decrypt = new PdfEncryption(reader.decrypt); 285 this.pValue = reader.pValue; 286 this.rValue = reader.rValue; 287 this.xrefObj = new ArrayList<PdfObject>(reader.xrefObj); 288 for (int k = 0; k < reader.xrefObj.size(); ++k) { 289 this.xrefObj.set(k, duplicatePdfObject(reader.xrefObj.get(k), this)); 290 } 291 this.pageRefs = new PageRefs(reader.pageRefs, this); 292 this.trailer = (PdfDictionary)duplicatePdfObject(reader.trailer, this); 293 this.catalog = trailer.getAsDict(PdfName.ROOT); 294 this.rootPages = catalog.getAsDict(PdfName.PAGES); 295 this.fileLength = reader.fileLength; 296 this.partial = reader.partial; 297 this.hybridXref = reader.hybridXref; 298 this.objStmToOffset = reader.objStmToOffset; 299 this.xref = reader.xref; 300 this.cryptoRef = (PRIndirectReference)duplicatePdfObject(reader.cryptoRef, this); 301 this.ownerPasswordUsed = reader.ownerPasswordUsed; 302 } 303 304 /** Gets a new file instance of the original PDF 305 * document. 306 * @return a new file instance of the original PDF document 307 */ 308 public RandomAccessFileOrArray getSafeFile() { 309 return tokens.getSafeFile(); 310 } 311 312 protected PdfReaderInstance getPdfReaderInstance(final PdfWriter writer) { 313 return new PdfReaderInstance(this, writer); 314 } 315 316 /** Gets the number of pages in the document. 317 * @return the number of pages in the document 318 */ 319 public int getNumberOfPages() { 320 return pageRefs.size(); 321 } 322 323 /** Returns the document's catalog. This dictionary is not a copy, 324 * any changes will be reflected in the catalog. 325 * @return the document's catalog 326 */ 327 public PdfDictionary getCatalog() { 328 return catalog; 329 } 330 331 /** Returns the document's acroform, if it has one. 332 * @return the document's acroform 333 */ 334 public PRAcroForm getAcroForm() { 335 if (!acroFormParsed) { 336 acroFormParsed = true; 337 PdfObject form = catalog.get(PdfName.ACROFORM); 338 if (form != null) { 339 try { 340 acroForm = new PRAcroForm(this); 341 acroForm.readAcroForm((PdfDictionary)getPdfObject(form)); 342 } 343 catch (Exception e) { 344 acroForm = null; 345 } 346 } 347 } 348 return acroForm; 349 } 350 /** 351 * Gets the page rotation. This value can be 0, 90, 180 or 270. 352 * @param index the page number. The first page is 1 353 * @return the page rotation 354 */ 355 public int getPageRotation(final int index) { 356 return getPageRotation(pageRefs.getPageNRelease(index)); 357 } 358 359 int getPageRotation(final PdfDictionary page) { 360 PdfNumber rotate = page.getAsNumber(PdfName.ROTATE); 361 if (rotate == null) 362 return 0; 363 else { 364 int n = rotate.intValue(); 365 n %= 360; 366 return n < 0 ? n + 360 : n; 367 } 368 } 369 /** Gets the page size, taking rotation into account. This 370 * is a <CODE>Rectangle</CODE> with the value of the /MediaBox and the /Rotate key. 371 * @param index the page number. The first page is 1 372 * @return a <CODE>Rectangle</CODE> 373 */ 374 public Rectangle getPageSizeWithRotation(final int index) { 375 return getPageSizeWithRotation(pageRefs.getPageNRelease(index)); 376 } 377 378 /** 379 * Gets the rotated page from a page dictionary. 380 * @param page the page dictionary 381 * @return the rotated page 382 */ 383 public Rectangle getPageSizeWithRotation(final PdfDictionary page) { 384 Rectangle rect = getPageSize(page); 385 int rotation = getPageRotation(page); 386 while (rotation > 0) { 387 rect = rect.rotate(); 388 rotation -= 90; 389 } 390 return rect; 391 } 392 393 /** Gets the page size without taking rotation into account. This 394 * is the value of the /MediaBox key. 395 * @param index the page number. The first page is 1 396 * @return the page size 397 */ 398 public Rectangle getPageSize(final int index) { 399 return getPageSize(pageRefs.getPageNRelease(index)); 400 } 401 402 /** 403 * Gets the page from a page dictionary 404 * @param page the page dictionary 405 * @return the page 406 */ 407 public Rectangle getPageSize(final PdfDictionary page) { 408 PdfArray mediaBox = page.getAsArray(PdfName.MEDIABOX); 409 return getNormalizedRectangle(mediaBox); 410 } 411 412 /** Gets the crop box without taking rotation into account. This 413 * is the value of the /CropBox key. The crop box is the part 414 * of the document to be displayed or printed. It usually is the same 415 * as the media box but may be smaller. If the page doesn't have a crop 416 * box the page size will be returned. 417 * @param index the page number. The first page is 1 418 * @return the crop box 419 */ 420 public Rectangle getCropBox(final int index) { 421 PdfDictionary page = pageRefs.getPageNRelease(index); 422 PdfArray cropBox = (PdfArray)getPdfObjectRelease(page.get(PdfName.CROPBOX)); 423 if (cropBox == null) 424 return getPageSize(page); 425 return getNormalizedRectangle(cropBox); 426 } 427 428 /** Gets the box size. Allowed names are: "crop", "trim", "art", "bleed" and "media". 429 * @param index the page number. The first page is 1 430 * @param boxName the box name 431 * @return the box rectangle or null 432 */ 433 public Rectangle getBoxSize(final int index, final String boxName) { 434 PdfDictionary page = pageRefs.getPageNRelease(index); 435 PdfArray box = null; 436 if (boxName.equals("trim")) 437 box = (PdfArray)getPdfObjectRelease(page.get(PdfName.TRIMBOX)); 438 else if (boxName.equals("art")) 439 box = (PdfArray)getPdfObjectRelease(page.get(PdfName.ARTBOX)); 440 else if (boxName.equals("bleed")) 441 box = (PdfArray)getPdfObjectRelease(page.get(PdfName.BLEEDBOX)); 442 else if (boxName.equals("crop")) 443 box = (PdfArray)getPdfObjectRelease(page.get(PdfName.CROPBOX)); 444 else if (boxName.equals("media")) 445 box = (PdfArray)getPdfObjectRelease(page.get(PdfName.MEDIABOX)); 446 if (box == null) 447 return null; 448 return getNormalizedRectangle(box); 449 } 450 451 /** Returns the content of the document information dictionary as a <CODE>HashMap</CODE> 452 * of <CODE>String</CODE>. 453 * @return content of the document information dictionary 454 */ 455 public HashMap<String, String> getInfo() { 456 HashMap<String, String> map = new HashMap<String, String>(); 457 PdfDictionary info = trailer.getAsDict(PdfName.INFO); 458 if (info == null) 459 return map; 460 for (Object element : info.getKeys()) { 461 PdfName key = (PdfName)element; 462 PdfObject obj = getPdfObject(info.get(key)); 463 if (obj == null) 464 continue; 465 String value = obj.toString(); 466 switch (obj.type()) { 467 case PdfObject.STRING: { 468 value = ((PdfString)obj).toUnicodeString(); 469 break; 470 } 471 case PdfObject.NAME: { 472 value = PdfName.decodeName(value); 473 break; 474 } 475 } 476 map.put(PdfName.decodeName(key.toString()), value); 477 } 478 return map; 479 } 480 481 /** Normalizes a <CODE>Rectangle</CODE> so that llx and lly are smaller than urx and ury. 482 * @param box the original rectangle 483 * @return a normalized <CODE>Rectangle</CODE> 484 */ 485 public static Rectangle getNormalizedRectangle(final PdfArray box) { 486 float llx = ((PdfNumber)getPdfObjectRelease(box.getPdfObject(0))).floatValue(); 487 float lly = ((PdfNumber)getPdfObjectRelease(box.getPdfObject(1))).floatValue(); 488 float urx = ((PdfNumber)getPdfObjectRelease(box.getPdfObject(2))).floatValue(); 489 float ury = ((PdfNumber)getPdfObjectRelease(box.getPdfObject(3))).floatValue(); 490 return new Rectangle(Math.min(llx, urx), Math.min(lly, ury), 491 Math.max(llx, urx), Math.max(lly, ury)); 492 } 493 494 protected void readPdf() throws IOException { 495 try { 496 fileLength = tokens.getFile().length(); 497 pdfVersion = tokens.checkPdfHeader(); 498 try { 499 readXref(); 500 } 501 catch (Exception e) { 502 try { 503 rebuilt = true; 504 rebuildXref(); 505 lastXref = -1; 506 } 507 catch (Exception ne) { 508 throw new InvalidPdfException(MessageLocalization.getComposedMessage("rebuild.failed.1.original.message.2", ne.getMessage(), e.getMessage())); 509 } 510 } 511 try { 512 readDocObj(); 513 } 514 catch (Exception e) { 515 if (e instanceof BadPasswordException) 516 throw new BadPasswordException(e.getMessage()); 517 if (rebuilt || encryptionError) 518 throw new InvalidPdfException(e.getMessage()); 519 rebuilt = true; 520 encrypted = false; 521 rebuildXref(); 522 lastXref = -1; 523 readDocObj(); 524 } 525 526 strings.clear(); 527 readPages(); 528 eliminateSharedStreams(); 529 removeUnusedObjects(); 530 } 531 finally { 532 try { 533 tokens.close(); 534 } 535 catch (Exception e) { 536 // empty on purpose 537 } 538 } 539 } 540 541 protected void readPdfPartial() throws IOException { 542 try { 543 fileLength = tokens.getFile().length(); 544 pdfVersion = tokens.checkPdfHeader(); 545 try { 546 readXref(); 547 } 548 catch (Exception e) { 549 try { 550 rebuilt = true; 551 rebuildXref(); 552 lastXref = -1; 553 } 554 catch (Exception ne) { 555 throw new InvalidPdfException(MessageLocalization.getComposedMessage("rebuild.failed.1.original.message.2", ne.getMessage(), e.getMessage())); 556 } 557 } 558 readDocObjPartial(); 559 readPages(); 560 } 561 catch (IOException e) { 562 try{tokens.close();}catch(Exception ee){} 563 throw e; 564 } 565 } 566 567 private boolean equalsArray(final byte ar1[], final byte ar2[], final int size) { 568 for (int k = 0; k < size; ++k) { 569 if (ar1[k] != ar2[k]) 570 return false; 571 } 572 return true; 573 } 574 575 /** 576 * @throws IOException 577 */ 578 @SuppressWarnings("unchecked") 579 private void readDecryptedDocObj() throws IOException { 580 if (encrypted) 581 return; 582 PdfObject encDic = trailer.get(PdfName.ENCRYPT); 583 if (encDic == null || encDic.toString().equals("null")) 584 return; 585 encryptionError = true; 586 byte[] encryptionKey = null; 587 encrypted = true; 588 PdfDictionary enc = (PdfDictionary)getPdfObject(encDic); 589 590 String s; 591 PdfObject o; 592 593 PdfArray documentIDs = trailer.getAsArray(PdfName.ID); 594 byte documentID[] = null; 595 if (documentIDs != null) { 596 o = documentIDs.getPdfObject(0); 597 strings.remove(o); 598 s = o.toString(); 599 documentID = com.itextpdf.text.DocWriter.getISOBytes(s); 600 if (documentIDs.size() > 1) 601 strings.remove(documentIDs.getPdfObject(1)); 602 } 603 // just in case we have a broken producer 604 if (documentID == null) 605 documentID = new byte[0]; 606 byte uValue[] = null; 607 byte oValue[] = null; 608 int cryptoMode = PdfWriter.STANDARD_ENCRYPTION_40; 609 int lengthValue = 0; 610 611 PdfObject filter = getPdfObjectRelease(enc.get(PdfName.FILTER)); 612 613 if (filter.equals(PdfName.STANDARD)) { 614 s = enc.get(PdfName.U).toString(); 615 strings.remove(enc.get(PdfName.U)); 616 uValue = com.itextpdf.text.DocWriter.getISOBytes(s); 617 s = enc.get(PdfName.O).toString(); 618 strings.remove(enc.get(PdfName.O)); 619 oValue = com.itextpdf.text.DocWriter.getISOBytes(s); 620 if (enc.contains(PdfName.OE)) 621 strings.remove(enc.get(PdfName.OE)); 622 if (enc.contains(PdfName.UE)) 623 strings.remove(enc.get(PdfName.UE)); 624 if (enc.contains(PdfName.PERMS)) 625 strings.remove(enc.get(PdfName.PERMS)); 626 627 o = enc.get(PdfName.P); 628 if (!o.isNumber()) 629 throw new InvalidPdfException(MessageLocalization.getComposedMessage("illegal.p.value")); 630 pValue = ((PdfNumber)o).intValue(); 631 632 o = enc.get(PdfName.R); 633 if (!o.isNumber()) 634 throw new InvalidPdfException(MessageLocalization.getComposedMessage("illegal.r.value")); 635 rValue = ((PdfNumber)o).intValue(); 636 637 switch (rValue) { 638 case 2: 639 cryptoMode = PdfWriter.STANDARD_ENCRYPTION_40; 640 break; 641 case 3: 642 o = enc.get(PdfName.LENGTH); 643 if (!o.isNumber()) 644 throw new InvalidPdfException(MessageLocalization.getComposedMessage("illegal.length.value")); 645 lengthValue = ( (PdfNumber) o).intValue(); 646 if (lengthValue > 128 || lengthValue < 40 || lengthValue % 8 != 0) 647 throw new InvalidPdfException(MessageLocalization.getComposedMessage("illegal.length.value")); 648 cryptoMode = PdfWriter.STANDARD_ENCRYPTION_128; 649 break; 650 case 4: 651 PdfDictionary dic = (PdfDictionary)enc.get(PdfName.CF); 652 if (dic == null) 653 throw new InvalidPdfException(MessageLocalization.getComposedMessage("cf.not.found.encryption")); 654 dic = (PdfDictionary)dic.get(PdfName.STDCF); 655 if (dic == null) 656 throw new InvalidPdfException(MessageLocalization.getComposedMessage("stdcf.not.found.encryption")); 657 if (PdfName.V2.equals(dic.get(PdfName.CFM))) 658 cryptoMode = PdfWriter.STANDARD_ENCRYPTION_128; 659 else if (PdfName.AESV2.equals(dic.get(PdfName.CFM))) 660 cryptoMode = PdfWriter.ENCRYPTION_AES_128; 661 else 662 throw new UnsupportedPdfException(MessageLocalization.getComposedMessage("no.compatible.encryption.found")); 663 PdfObject em = enc.get(PdfName.ENCRYPTMETADATA); 664 if (em != null && em.toString().equals("false")) 665 cryptoMode |= PdfWriter.DO_NOT_ENCRYPT_METADATA; 666 break; 667 case 5: 668 cryptoMode = PdfWriter.ENCRYPTION_AES_256; 669 PdfObject em5 = enc.get(PdfName.ENCRYPTMETADATA); 670 if (em5 != null && em5.toString().equals("false")) 671 cryptoMode |= PdfWriter.DO_NOT_ENCRYPT_METADATA; 672 break; 673 default: 674 throw new UnsupportedPdfException(MessageLocalization.getComposedMessage("unknown.encryption.type.r.eq.1", rValue)); 675 } 676 } 677 else if (filter.equals(PdfName.PUBSEC)) { 678 boolean foundRecipient = false; 679 byte[] envelopedData = null; 680 PdfArray recipients = null; 681 682 o = enc.get(PdfName.V); 683 if (!o.isNumber()) 684 throw new InvalidPdfException(MessageLocalization.getComposedMessage("illegal.v.value")); 685 int vValue = ((PdfNumber)o).intValue(); 686 switch(vValue) { 687 case 1: 688 cryptoMode = PdfWriter.STANDARD_ENCRYPTION_40; 689 lengthValue = 40; 690 recipients = (PdfArray)enc.get(PdfName.RECIPIENTS); 691 break; 692 case 2: 693 o = enc.get(PdfName.LENGTH); 694 if (!o.isNumber()) 695 throw new InvalidPdfException(MessageLocalization.getComposedMessage("illegal.length.value")); 696 lengthValue = ( (PdfNumber) o).intValue(); 697 if (lengthValue > 128 || lengthValue < 40 || lengthValue % 8 != 0) 698 throw new InvalidPdfException(MessageLocalization.getComposedMessage("illegal.length.value")); 699 cryptoMode = PdfWriter.STANDARD_ENCRYPTION_128; 700 recipients = (PdfArray)enc.get(PdfName.RECIPIENTS); 701 break; 702 case 4: 703 PdfDictionary dic = (PdfDictionary)enc.get(PdfName.CF); 704 if (dic == null) 705 throw new InvalidPdfException(MessageLocalization.getComposedMessage("cf.not.found.encryption")); 706 dic = (PdfDictionary)dic.get(PdfName.DEFAULTCRYPTFILTER); 707 if (dic == null) 708 throw new InvalidPdfException(MessageLocalization.getComposedMessage("defaultcryptfilter.not.found.encryption")); 709 if (PdfName.V2.equals(dic.get(PdfName.CFM))) { 710 cryptoMode = PdfWriter.STANDARD_ENCRYPTION_128; 711 lengthValue = 128; 712 } 713 else if (PdfName.AESV2.equals(dic.get(PdfName.CFM))) { 714 cryptoMode = PdfWriter.ENCRYPTION_AES_128; 715 lengthValue = 128; 716 } 717 else 718 throw new UnsupportedPdfException(MessageLocalization.getComposedMessage("no.compatible.encryption.found")); 719 PdfObject em = dic.get(PdfName.ENCRYPTMETADATA); 720 if (em != null && em.toString().equals("false")) 721 cryptoMode |= PdfWriter.DO_NOT_ENCRYPT_METADATA; 722 723 recipients = (PdfArray)dic.get(PdfName.RECIPIENTS); 724 break; 725 default: 726 throw new UnsupportedPdfException(MessageLocalization.getComposedMessage("unknown.encryption.type.v.eq.1", rValue)); 727 } 728 for (int i = 0; i<recipients.size(); i++) { 729 PdfObject recipient = recipients.getPdfObject(i); 730 strings.remove(recipient); 731 732 CMSEnvelopedData data = null; 733 try { 734 data = new CMSEnvelopedData(recipient.getBytes()); 735 736 Iterator<RecipientInformation> recipientCertificatesIt = data.getRecipientInfos().getRecipients().iterator(); 737 738 while (recipientCertificatesIt.hasNext()) { 739 RecipientInformation recipientInfo = recipientCertificatesIt.next(); 740 741 if (recipientInfo.getRID().match(certificate) && !foundRecipient) { 742 envelopedData = recipientInfo.getContent(certificateKey, certificateKeyProvider); 743 foundRecipient = true; 744 } 745 } 746 } 747 catch (Exception f) { 748 throw new ExceptionConverter(f); 749 } 750 } 751 752 if(!foundRecipient || envelopedData == null) { 753 throw new UnsupportedPdfException(MessageLocalization.getComposedMessage("bad.certificate.and.key")); 754 } 755 756 MessageDigest md = null; 757 758 try { 759 md = MessageDigest.getInstance("SHA-1"); 760 md.update(envelopedData, 0, 20); 761 for (int i = 0; i<recipients.size(); i++) { 762 byte[] encodedRecipient = recipients.getPdfObject(i).getBytes(); 763 md.update(encodedRecipient); 764 } 765 if ((cryptoMode & PdfWriter.DO_NOT_ENCRYPT_METADATA) != 0) 766 md.update(new byte[]{(byte)255, (byte)255, (byte)255, (byte)255}); 767 encryptionKey = md.digest(); 768 } 769 catch (Exception f) { 770 throw new ExceptionConverter(f); 771 } 772 } 773 774 775 decrypt = new PdfEncryption(); 776 decrypt.setCryptoMode(cryptoMode, lengthValue); 777 778 if (filter.equals(PdfName.STANDARD)) { 779 if (rValue == 5) { 780 ownerPasswordUsed = decrypt.readKey(enc, password); 781 pValue = decrypt.getPermissions(); 782 } 783 else { 784 //check by owner password 785 decrypt.setupByOwnerPassword(documentID, password, uValue, oValue, pValue); 786 if (!equalsArray(uValue, decrypt.userKey, rValue == 3 || rValue == 4 ? 16 : 32)) { 787 //check by user password 788 decrypt.setupByUserPassword(documentID, password, oValue, pValue); 789 if (!equalsArray(uValue, decrypt.userKey, rValue == 3 || rValue == 4 ? 16 : 32)) { 790 throw new BadPasswordException(MessageLocalization.getComposedMessage("bad.user.password")); 791 } 792 } 793 else 794 ownerPasswordUsed = true; 795 } 796 } 797 else if (filter.equals(PdfName.PUBSEC)) { 798 decrypt.setupByEncryptionKey(encryptionKey, lengthValue); 799 ownerPasswordUsed = true; 800 } 801 802 for (int k = 0; k < strings.size(); ++k) { 803 PdfString str = strings.get(k); 804 str.decrypt(this); 805 } 806 807 if (encDic.isIndirect()) { 808 cryptoRef = (PRIndirectReference)encDic; 809 xrefObj.set(cryptoRef.getNumber(), null); 810 } 811 encryptionError = false; 812 } 813 814 /** 815 * @param obj 816 * @return a PdfObject 817 */ 818 public static PdfObject getPdfObjectRelease(final PdfObject obj) { 819 PdfObject obj2 = getPdfObject(obj); 820 releaseLastXrefPartial(obj); 821 return obj2; 822 } 823 824 825 /** 826 * Reads a <CODE>PdfObject</CODE> resolving an indirect reference 827 * if needed. 828 * @param obj the <CODE>PdfObject</CODE> to read 829 * @return the resolved <CODE>PdfObject</CODE> 830 */ 831 public static PdfObject getPdfObject(PdfObject obj) { 832 if (obj == null) 833 return null; 834 if (!obj.isIndirect()) 835 return obj; 836 try { 837 PRIndirectReference ref = (PRIndirectReference)obj; 838 int idx = ref.getNumber(); 839 boolean appendable = ref.getReader().appendable; 840 obj = ref.getReader().getPdfObject(idx); 841 if (obj == null) { 842 return null; 843 } 844 else { 845 if (appendable) { 846 switch (obj.type()) { 847 case PdfObject.NULL: 848 obj = new PdfNull(); 849 break; 850 case PdfObject.BOOLEAN: 851 obj = new PdfBoolean(((PdfBoolean)obj).booleanValue()); 852 break; 853 case PdfObject.NAME: 854 obj = new PdfName(obj.getBytes()); 855 break; 856 } 857 obj.setIndRef(ref); 858 } 859 return obj; 860 } 861 } 862 catch (Exception e) { 863 throw new ExceptionConverter(e); 864 } 865 } 866 867 /** 868 * Reads a <CODE>PdfObject</CODE> resolving an indirect reference 869 * if needed. If the reader was opened in partial mode the object will be released 870 * to save memory. 871 * @param obj the <CODE>PdfObject</CODE> to read 872 * @param parent 873 * @return a PdfObject 874 */ 875 public static PdfObject getPdfObjectRelease(final PdfObject obj, final PdfObject parent) { 876 PdfObject obj2 = getPdfObject(obj, parent); 877 releaseLastXrefPartial(obj); 878 return obj2; 879 } 880 881 /** 882 * @param obj 883 * @param parent 884 * @return a PdfObject 885 */ 886 public static PdfObject getPdfObject(PdfObject obj, final PdfObject parent) { 887 if (obj == null) 888 return null; 889 if (!obj.isIndirect()) { 890 PRIndirectReference ref = null; 891 if (parent != null && (ref = parent.getIndRef()) != null && ref.getReader().isAppendable()) { 892 switch (obj.type()) { 893 case PdfObject.NULL: 894 obj = new PdfNull(); 895 break; 896 case PdfObject.BOOLEAN: 897 obj = new PdfBoolean(((PdfBoolean)obj).booleanValue()); 898 break; 899 case PdfObject.NAME: 900 obj = new PdfName(obj.getBytes()); 901 break; 902 } 903 obj.setIndRef(ref); 904 } 905 return obj; 906 } 907 return getPdfObject(obj); 908 } 909 910 /** 911 * @param idx 912 * @return a PdfObject 913 */ 914 public PdfObject getPdfObjectRelease(final int idx) { 915 PdfObject obj = getPdfObject(idx); 916 releaseLastXrefPartial(); 917 return obj; 918 } 919 920 /** 921 * @param idx 922 * @return aPdfObject 923 */ 924 public PdfObject getPdfObject(final int idx) { 925 try { 926 lastXrefPartial = -1; 927 if (idx < 0 || idx >= xrefObj.size()) 928 return null; 929 PdfObject obj = xrefObj.get(idx); 930 if (!partial || obj != null) 931 return obj; 932 if (idx * 2 >= xref.length) 933 return null; 934 obj = readSingleObject(idx); 935 lastXrefPartial = -1; 936 if (obj != null) 937 lastXrefPartial = idx; 938 return obj; 939 } 940 catch (Exception e) { 941 throw new ExceptionConverter(e); 942 } 943 } 944 945 /** 946 * 947 */ 948 public void resetLastXrefPartial() { 949 lastXrefPartial = -1; 950 } 951 952 /** 953 * 954 */ 955 public void releaseLastXrefPartial() { 956 if (partial && lastXrefPartial != -1) { 957 xrefObj.set(lastXrefPartial, null); 958 lastXrefPartial = -1; 959 } 960 } 961 962 /** 963 * @param obj 964 */ 965 public static void releaseLastXrefPartial(final PdfObject obj) { 966 if (obj == null) 967 return; 968 if (!obj.isIndirect()) 969 return; 970 if (!(obj instanceof PRIndirectReference)) 971 return; 972 973 PRIndirectReference ref = (PRIndirectReference)obj; 974 PdfReader reader = ref.getReader(); 975 if (reader.partial && reader.lastXrefPartial != -1 && reader.lastXrefPartial == ref.getNumber()) { 976 reader.xrefObj.set(reader.lastXrefPartial, null); 977 } 978 reader.lastXrefPartial = -1; 979 } 980 981 private void setXrefPartialObject(final int idx, final PdfObject obj) { 982 if (!partial || idx < 0) 983 return; 984 xrefObj.set(idx, obj); 985 } 986 987 /** 988 * @param obj 989 * @return an indirect reference 990 */ 991 public PRIndirectReference addPdfObject(final PdfObject obj) { 992 xrefObj.add(obj); 993 return new PRIndirectReference(this, xrefObj.size() - 1); 994 } 995 996 protected void readPages() throws IOException { 997 catalog = trailer.getAsDict(PdfName.ROOT); 998 rootPages = catalog.getAsDict(PdfName.PAGES); 999 pageRefs = new PageRefs(this); 1000 } 1001 1002 protected void readDocObjPartial() throws IOException { 1003 xrefObj = new ArrayList<PdfObject>(xref.length / 2); 1004 xrefObj.addAll(Collections.<PdfObject>nCopies(xref.length / 2, null)); 1005 readDecryptedDocObj(); 1006 if (objStmToOffset != null) { 1007 int keys[] = objStmToOffset.getKeys(); 1008 for (int k = 0; k < keys.length; ++k) { 1009 int n = keys[k]; 1010 objStmToOffset.put(n, xref[n * 2]); 1011 xref[n * 2] = -1; 1012 } 1013 } 1014 } 1015 1016 protected PdfObject readSingleObject(final int k) throws IOException { 1017 strings.clear(); 1018 int k2 = k * 2; 1019 int pos = xref[k2]; 1020 if (pos < 0) 1021 return null; 1022 if (xref[k2 + 1] > 0) 1023 pos = objStmToOffset.get(xref[k2 + 1]); 1024 if (pos == 0) 1025 return null; 1026 tokens.seek(pos); 1027 tokens.nextValidToken(); 1028 if (tokens.getTokenType() != TokenType.NUMBER) 1029 tokens.throwError(MessageLocalization.getComposedMessage("invalid.object.number")); 1030 objNum = tokens.intValue(); 1031 tokens.nextValidToken(); 1032 if (tokens.getTokenType() != TokenType.NUMBER) 1033 tokens.throwError(MessageLocalization.getComposedMessage("invalid.generation.number")); 1034 objGen = tokens.intValue(); 1035 tokens.nextValidToken(); 1036 if (!tokens.getStringValue().equals("obj")) 1037 tokens.throwError(MessageLocalization.getComposedMessage("token.obj.expected")); 1038 PdfObject obj; 1039 try { 1040 obj = readPRObject(); 1041 for (int j = 0; j < strings.size(); ++j) { 1042 PdfString str = strings.get(j); 1043 str.decrypt(this); 1044 } 1045 if (obj.isStream()) { 1046 checkPRStreamLength((PRStream)obj); 1047 } 1048 } 1049 catch (Exception e) { 1050 obj = null; 1051 } 1052 if (xref[k2 + 1] > 0) { 1053 obj = readOneObjStm((PRStream)obj, xref[k2]); 1054 } 1055 xrefObj.set(k, obj); 1056 return obj; 1057 } 1058 1059 protected PdfObject readOneObjStm(final PRStream stream, int idx) throws IOException { 1060 int first = stream.getAsNumber(PdfName.FIRST).intValue(); 1061 byte b[] = getStreamBytes(stream, tokens.getFile()); 1062 PRTokeniser saveTokens = tokens; 1063 tokens = new PRTokeniser(b); 1064 try { 1065 int address = 0; 1066 boolean ok = true; 1067 ++idx; 1068 for (int k = 0; k < idx; ++k) { 1069 ok = tokens.nextToken(); 1070 if (!ok) 1071 break; 1072 if (tokens.getTokenType() != TokenType.NUMBER) { 1073 ok = false; 1074 break; 1075 } 1076 ok = tokens.nextToken(); 1077 if (!ok) 1078 break; 1079 if (tokens.getTokenType() != TokenType.NUMBER) { 1080 ok = false; 1081 break; 1082 } 1083 address = tokens.intValue() + first; 1084 } 1085 if (!ok) 1086 throw new InvalidPdfException(MessageLocalization.getComposedMessage("error.reading.objstm")); 1087 tokens.seek(address); 1088 tokens.nextToken(); 1089 PdfObject obj; 1090 if (tokens.getTokenType() == PRTokeniser.TokenType.NUMBER) { 1091 obj = new PdfNumber(tokens.getStringValue()); 1092 } 1093 else { 1094 tokens.seek(address); 1095 obj = readPRObject(); 1096 } 1097 return obj; 1098 //return readPRObject(); 1099 } 1100 finally { 1101 tokens = saveTokens; 1102 } 1103 } 1104 1105 /** 1106 * @return the percentage of the cross reference table that has been read 1107 */ 1108 public double dumpPerc() { 1109 int total = 0; 1110 for (int k = 0; k < xrefObj.size(); ++k) { 1111 if (xrefObj.get(k) != null) 1112 ++total; 1113 } 1114 return total * 100.0 / xrefObj.size(); 1115 } 1116 1117 protected void readDocObj() throws IOException { 1118 ArrayList<PRStream> streams = new ArrayList<PRStream>(); 1119 xrefObj = new ArrayList<PdfObject>(xref.length / 2); 1120 xrefObj.addAll(Collections.<PdfObject>nCopies(xref.length / 2, null)); 1121 for (int k = 2; k < xref.length; k += 2) { 1122 int pos = xref[k]; 1123 if (pos <= 0 || xref[k + 1] > 0) 1124 continue; 1125 tokens.seek(pos); 1126 tokens.nextValidToken(); 1127 if (tokens.getTokenType() != TokenType.NUMBER) 1128 tokens.throwError(MessageLocalization.getComposedMessage("invalid.object.number")); 1129 objNum = tokens.intValue(); 1130 tokens.nextValidToken(); 1131 if (tokens.getTokenType() != TokenType.NUMBER) 1132 tokens.throwError(MessageLocalization.getComposedMessage("invalid.generation.number")); 1133 objGen = tokens.intValue(); 1134 tokens.nextValidToken(); 1135 if (!tokens.getStringValue().equals("obj")) 1136 tokens.throwError(MessageLocalization.getComposedMessage("token.obj.expected")); 1137 PdfObject obj; 1138 try { 1139 obj = readPRObject(); 1140 if (obj.isStream()) { 1141 streams.add((PRStream)obj); 1142 } 1143 } 1144 catch (Exception e) { 1145 obj = null; 1146 } 1147 xrefObj.set(k / 2, obj); 1148 } 1149 for (int k = 0; k < streams.size(); ++k) { 1150 checkPRStreamLength(streams.get(k)); 1151 } 1152 readDecryptedDocObj(); 1153 if (objStmMark != null) { 1154 for (Map.Entry<Integer, IntHashtable>entry: objStmMark.entrySet()) { 1155 int n = entry.getKey().intValue(); 1156 IntHashtable h = entry.getValue(); 1157 readObjStm((PRStream)xrefObj.get(n), h); 1158 xrefObj.set(n, null); 1159 } 1160 objStmMark = null; 1161 } 1162 xref = null; 1163 } 1164 1165 private void checkPRStreamLength(final PRStream stream) throws IOException { 1166 int fileLength = tokens.length(); 1167 int start = stream.getOffset(); 1168 boolean calc = false; 1169 int streamLength = 0; 1170 PdfObject obj = getPdfObjectRelease(stream.get(PdfName.LENGTH)); 1171 if (obj != null && obj.type() == PdfObject.NUMBER) { 1172 streamLength = ((PdfNumber)obj).intValue(); 1173 if (streamLength + start > fileLength - 20) 1174 calc = true; 1175 else { 1176 tokens.seek(start + streamLength); 1177 String line = tokens.readString(20); 1178 if (!line.startsWith("\nendstream") && 1179 !line.startsWith("\r\nendstream") && 1180 !line.startsWith("\rendstream") && 1181 !line.startsWith("endstream")) 1182 calc = true; 1183 } 1184 } 1185 else 1186 calc = true; 1187 if (calc) { 1188 byte tline[] = new byte[16]; 1189 tokens.seek(start); 1190 while (true) { 1191 int pos = tokens.getFilePointer(); 1192 if (!tokens.readLineSegment(tline)) 1193 break; 1194 if (equalsn(tline, endstream)) { 1195 streamLength = pos - start; 1196 break; 1197 } 1198 if (equalsn(tline, endobj)) { 1199 tokens.seek(pos - 16); 1200 String s = tokens.readString(16); 1201 int index = s.indexOf("endstream"); 1202 if (index >= 0) 1203 pos = pos - 16 + index; 1204 streamLength = pos - start; 1205 break; 1206 } 1207 } 1208 } 1209 stream.setLength(streamLength); 1210 } 1211 1212 protected void readObjStm(final PRStream stream, final IntHashtable map) throws IOException { 1213 int first = stream.getAsNumber(PdfName.FIRST).intValue(); 1214 int n = stream.getAsNumber(PdfName.N).intValue(); 1215 byte b[] = getStreamBytes(stream, tokens.getFile()); 1216 PRTokeniser saveTokens = tokens; 1217 tokens = new PRTokeniser(b); 1218 try { 1219 int address[] = new int[n]; 1220 int objNumber[] = new int[n]; 1221 boolean ok = true; 1222 for (int k = 0; k < n; ++k) { 1223 ok = tokens.nextToken(); 1224 if (!ok) 1225 break; 1226 if (tokens.getTokenType() != TokenType.NUMBER) { 1227 ok = false; 1228 break; 1229 } 1230 objNumber[k] = tokens.intValue(); 1231 ok = tokens.nextToken(); 1232 if (!ok) 1233 break; 1234 if (tokens.getTokenType() != TokenType.NUMBER) { 1235 ok = false; 1236 break; 1237 } 1238 address[k] = tokens.intValue() + first; 1239 } 1240 if (!ok) 1241 throw new InvalidPdfException(MessageLocalization.getComposedMessage("error.reading.objstm")); 1242 for (int k = 0; k < n; ++k) { 1243 if (map.containsKey(k)) { 1244 tokens.seek(address[k]); 1245 tokens.nextToken(); 1246 PdfObject obj; 1247 if (tokens.getTokenType() == PRTokeniser.TokenType.NUMBER) { 1248 obj = new PdfNumber(tokens.getStringValue()); 1249 } 1250 else { 1251 tokens.seek(address[k]); 1252 obj = readPRObject(); 1253 } 1254 xrefObj.set(objNumber[k], obj); 1255 } 1256 } 1257 } 1258 finally { 1259 tokens = saveTokens; 1260 } 1261 } 1262 1263 /** 1264 * Eliminates the reference to the object freeing the memory used by it and clearing 1265 * the xref entry. 1266 * @param obj the object. If it's an indirect reference it will be eliminated 1267 * @return the object or the already erased dereferenced object 1268 */ 1269 public static PdfObject killIndirect(final PdfObject obj) { 1270 if (obj == null || obj.isNull()) 1271 return null; 1272 PdfObject ret = getPdfObjectRelease(obj); 1273 if (obj.isIndirect()) { 1274 PRIndirectReference ref = (PRIndirectReference)obj; 1275 PdfReader reader = ref.getReader(); 1276 int n = ref.getNumber(); 1277 reader.xrefObj.set(n, null); 1278 if (reader.partial) 1279 reader.xref[n * 2] = -1; 1280 } 1281 return ret; 1282 } 1283 1284 private void ensureXrefSize(final int size) { 1285 if (size == 0) 1286 return; 1287 if (xref == null) 1288 xref = new int[size]; 1289 else { 1290 if (xref.length < size) { 1291 int xref2[] = new int[size]; 1292 System.arraycopy(xref, 0, xref2, 0, xref.length); 1293 xref = xref2; 1294 } 1295 } 1296 } 1297 1298 protected void readXref() throws IOException { 1299 hybridXref = false; 1300 newXrefType = false; 1301 tokens.seek(tokens.getStartxref(1024)); 1302 tokens.nextToken(); 1303 if (!tokens.getStringValue().equals("startxref")) 1304 throw new InvalidPdfException(MessageLocalization.getComposedMessage("startxref.not.found")); 1305 tokens.nextToken(); 1306 if (tokens.getTokenType() != TokenType.NUMBER) 1307 throw new InvalidPdfException(MessageLocalization.getComposedMessage("startxref.is.not.followed.by.a.number")); 1308 int startxref = tokens.intValue(); 1309 lastXref = startxref; 1310 eofPos = tokens.getFilePointer(); 1311 try { 1312 if (readXRefStream(startxref)) { 1313 newXrefType = true; 1314 return; 1315 } 1316 } 1317 catch (Exception e) {} 1318 xref = null; 1319 tokens.seek(startxref); 1320 trailer = readXrefSection(); 1321 PdfDictionary trailer2 = trailer; 1322 while (true) { 1323 PdfNumber prev = (PdfNumber)trailer2.get(PdfName.PREV); 1324 if (prev == null) 1325 break; 1326 tokens.seek(prev.intValue()); 1327 trailer2 = readXrefSection(); 1328 } 1329 } 1330 1331 protected PdfDictionary readXrefSection() throws IOException { 1332 tokens.nextValidToken(); 1333 if (!tokens.getStringValue().equals("xref")) 1334 tokens.throwError(MessageLocalization.getComposedMessage("xref.subsection.not.found")); 1335 int start = 0; 1336 int end = 0; 1337 int pos = 0; 1338 int gen = 0; 1339 while (true) { 1340 tokens.nextValidToken(); 1341 if (tokens.getStringValue().equals("trailer")) 1342 break; 1343 if (tokens.getTokenType() != TokenType.NUMBER) 1344 tokens.throwError(MessageLocalization.getComposedMessage("object.number.of.the.first.object.in.this.xref.subsection.not.found")); 1345 start = tokens.intValue(); 1346 tokens.nextValidToken(); 1347 if (tokens.getTokenType() != TokenType.NUMBER) 1348 tokens.throwError(MessageLocalization.getComposedMessage("number.of.entries.in.this.xref.subsection.not.found")); 1349 end = tokens.intValue() + start; 1350 if (start == 1) { // fix incorrect start number 1351 int back = tokens.getFilePointer(); 1352 tokens.nextValidToken(); 1353 pos = tokens.intValue(); 1354 tokens.nextValidToken(); 1355 gen = tokens.intValue(); 1356 if (pos == 0 && gen == PdfWriter.GENERATION_MAX) { 1357 --start; 1358 --end; 1359 } 1360 tokens.seek(back); 1361 } 1362 ensureXrefSize(end * 2); 1363 for (int k = start; k < end; ++k) { 1364 tokens.nextValidToken(); 1365 pos = tokens.intValue(); 1366 tokens.nextValidToken(); 1367 gen = tokens.intValue(); 1368 tokens.nextValidToken(); 1369 int p = k * 2; 1370 if (tokens.getStringValue().equals("n")) { 1371 if (xref[p] == 0 && xref[p + 1] == 0) { 1372// if (pos == 0) 1373// tokens.throwError(MessageLocalization.getComposedMessage("file.position.0.cross.reference.entry.in.this.xref.subsection")); 1374 xref[p] = pos; 1375 } 1376 } 1377 else if (tokens.getStringValue().equals("f")) { 1378 if (xref[p] == 0 && xref[p + 1] == 0) 1379 xref[p] = -1; 1380 } 1381 else 1382 tokens.throwError(MessageLocalization.getComposedMessage("invalid.cross.reference.entry.in.this.xref.subsection")); 1383 } 1384 } 1385 PdfDictionary trailer = (PdfDictionary)readPRObject(); 1386 PdfNumber xrefSize = (PdfNumber)trailer.get(PdfName.SIZE); 1387 ensureXrefSize(xrefSize.intValue() * 2); 1388 PdfObject xrs = trailer.get(PdfName.XREFSTM); 1389 if (xrs != null && xrs.isNumber()) { 1390 int loc = ((PdfNumber)xrs).intValue(); 1391 try { 1392 readXRefStream(loc); 1393 newXrefType = true; 1394 hybridXref = true; 1395 } 1396 catch (IOException e) { 1397 xref = null; 1398 throw e; 1399 } 1400 } 1401 return trailer; 1402 } 1403 1404 protected boolean readXRefStream(final int ptr) throws IOException { 1405 tokens.seek(ptr); 1406 int thisStream = 0; 1407 if (!tokens.nextToken()) 1408 return false; 1409 if (tokens.getTokenType() != TokenType.NUMBER) 1410 return false; 1411 thisStream = tokens.intValue(); 1412 if (!tokens.nextToken() || tokens.getTokenType() != TokenType.NUMBER) 1413 return false; 1414 if (!tokens.nextToken() || !tokens.getStringValue().equals("obj")) 1415 return false; 1416 PdfObject object = readPRObject(); 1417 PRStream stm = null; 1418 if (object.isStream()) { 1419 stm = (PRStream)object; 1420 if (!PdfName.XREF.equals(stm.get(PdfName.TYPE))) 1421 return false; 1422 } 1423 else 1424 return false; 1425 if (trailer == null) { 1426 trailer = new PdfDictionary(); 1427 trailer.putAll(stm); 1428 } 1429 stm.setLength(((PdfNumber)stm.get(PdfName.LENGTH)).intValue()); 1430 int size = ((PdfNumber)stm.get(PdfName.SIZE)).intValue(); 1431 PdfArray index; 1432 PdfObject obj = stm.get(PdfName.INDEX); 1433 if (obj == null) { 1434 index = new PdfArray(); 1435 index.add(new int[]{0, size}); 1436 } 1437 else 1438 index = (PdfArray)obj; 1439 PdfArray w = (PdfArray)stm.get(PdfName.W); 1440 int prev = -1; 1441 obj = stm.get(PdfName.PREV); 1442 if (obj != null) 1443 prev = ((PdfNumber)obj).intValue(); 1444 // Each xref pair is a position 1445 // type 0 -> -1, 0 1446 // type 1 -> offset, 0 1447 // type 2 -> index, obj num 1448 ensureXrefSize(size * 2); 1449 if (objStmMark == null && !partial) 1450 objStmMark = new HashMap<Integer, IntHashtable>(); 1451 if (objStmToOffset == null && partial) 1452 objStmToOffset = new IntHashtable(); 1453 byte b[] = getStreamBytes(stm, tokens.getFile()); 1454 int bptr = 0; 1455 int wc[] = new int[3]; 1456 for (int k = 0; k < 3; ++k) 1457 wc[k] = w.getAsNumber(k).intValue(); 1458 for (int idx = 0; idx < index.size(); idx += 2) { 1459 int start = index.getAsNumber(idx).intValue(); 1460 int length = index.getAsNumber(idx + 1).intValue(); 1461 ensureXrefSize((start + length) * 2); 1462 while (length-- > 0) { 1463 int type = 1; 1464 if (wc[0] > 0) { 1465 type = 0; 1466 for (int k = 0; k < wc[0]; ++k) 1467 type = (type << 8) + (b[bptr++] & 0xff); 1468 } 1469 int field2 = 0; 1470 for (int k = 0; k < wc[1]; ++k) 1471 field2 = (field2 << 8) + (b[bptr++] & 0xff); 1472 int field3 = 0; 1473 for (int k = 0; k < wc[2]; ++k) 1474 field3 = (field3 << 8) + (b[bptr++] & 0xff); 1475 int base = start * 2; 1476 if (xref[base] == 0 && xref[base + 1] == 0) { 1477 switch (type) { 1478 case 0: 1479 xref[base] = -1; 1480 break; 1481 case 1: 1482 xref[base] = field2; 1483 break; 1484 case 2: 1485 xref[base] = field3; 1486 xref[base + 1] = field2; 1487 if (partial) { 1488 objStmToOffset.put(field2, 0); 1489 } 1490 else { 1491 Integer on = Integer.valueOf(field2); 1492 IntHashtable seq = objStmMark.get(on); 1493 if (seq == null) { 1494 seq = new IntHashtable(); 1495 seq.put(field3, 1); 1496 objStmMark.put(on, seq); 1497 } 1498 else 1499 seq.put(field3, 1); 1500 } 1501 break; 1502 } 1503 } 1504 ++start; 1505 } 1506 } 1507 thisStream *= 2; 1508 if (thisStream < xref.length) 1509 xref[thisStream] = -1; 1510 1511 if (prev == -1) 1512 return true; 1513 return readXRefStream(prev); 1514 } 1515 1516 protected void rebuildXref() throws IOException { 1517 hybridXref = false; 1518 newXrefType = false; 1519 tokens.seek(0); 1520 int xr[][] = new int[1024][]; 1521 int top = 0; 1522 trailer = null; 1523 byte line[] = new byte[64]; 1524 for (;;) { 1525 int pos = tokens.getFilePointer(); 1526 if (!tokens.readLineSegment(line)) 1527 break; 1528 if (line[0] == 't') { 1529 if (!PdfEncodings.convertToString(line, null).startsWith("trailer")) 1530 continue; 1531 tokens.seek(pos); 1532 tokens.nextToken(); 1533 pos = tokens.getFilePointer(); 1534 try { 1535 PdfDictionary dic = (PdfDictionary)readPRObject(); 1536 if (dic.get(PdfName.ROOT) != null) 1537 trailer = dic; 1538 else 1539 tokens.seek(pos); 1540 } 1541 catch (Exception e) { 1542 tokens.seek(pos); 1543 } 1544 } 1545 else if (line[0] >= '0' && line[0] <= '9') { 1546 int obj[] = PRTokeniser.checkObjectStart(line); 1547 if (obj == null) 1548 continue; 1549 int num = obj[0]; 1550 int gen = obj[1]; 1551 if (num >= xr.length) { 1552 int newLength = num * 2; 1553 int xr2[][] = new int[newLength][]; 1554 System.arraycopy(xr, 0, xr2, 0, top); 1555 xr = xr2; 1556 } 1557 if (num >= top) 1558 top = num + 1; 1559 if (xr[num] == null || gen >= xr[num][1]) { 1560 obj[0] = pos; 1561 xr[num] = obj; 1562 } 1563 } 1564 } 1565 if (trailer == null) 1566 throw new InvalidPdfException(MessageLocalization.getComposedMessage("trailer.not.found")); 1567 xref = new int[top * 2]; 1568 for (int k = 0; k < top; ++k) { 1569 int obj[] = xr[k]; 1570 if (obj != null) 1571 xref[k * 2] = obj[0]; 1572 } 1573 } 1574 1575 protected PdfDictionary readDictionary() throws IOException { 1576 PdfDictionary dic = new PdfDictionary(); 1577 while (true) { 1578 tokens.nextValidToken(); 1579 if (tokens.getTokenType() == TokenType.END_DIC) 1580 break; 1581 if (tokens.getTokenType() != TokenType.NAME) 1582 tokens.throwError(MessageLocalization.getComposedMessage("dictionary.key.is.not.a.name")); 1583 PdfName name = new PdfName(tokens.getStringValue(), false); 1584 PdfObject obj = readPRObject(); 1585 int type = obj.type(); 1586 if (-type == TokenType.END_DIC.ordinal()) 1587 tokens.throwError(MessageLocalization.getComposedMessage("unexpected.gt.gt")); 1588 if (-type == TokenType.END_ARRAY.ordinal()) 1589 tokens.throwError(MessageLocalization.getComposedMessage("unexpected.close.bracket")); 1590 dic.put(name, obj); 1591 } 1592 return dic; 1593 } 1594 1595 protected PdfArray readArray() throws IOException { 1596 PdfArray array = new PdfArray(); 1597 while (true) { 1598 PdfObject obj = readPRObject(); 1599 int type = obj.type(); 1600 if (-type == TokenType.END_ARRAY.ordinal()) 1601 break; 1602 if (-type == TokenType.END_DIC.ordinal()) 1603 tokens.throwError(MessageLocalization.getComposedMessage("unexpected.gt.gt")); 1604 array.add(obj); 1605 } 1606 return array; 1607 } 1608 1609 // Track how deeply nested the current object is, so 1610 // we know when to return an individual null or boolean, or 1611 // reuse one of the static ones. 1612 private int readDepth = 0; 1613 1614 protected PdfObject readPRObject() throws IOException { 1615 tokens.nextValidToken(); 1616 TokenType type = tokens.getTokenType(); 1617 switch (type) { 1618 case START_DIC: { 1619 ++readDepth; 1620 PdfDictionary dic = readDictionary(); 1621 --readDepth; 1622 int pos = tokens.getFilePointer(); 1623 // be careful in the trailer. May not be a "next" token. 1624 boolean hasNext; 1625 do { 1626 hasNext = tokens.nextToken(); 1627 } while (hasNext && tokens.getTokenType() == TokenType.COMMENT); 1628 1629 if (hasNext && tokens.getStringValue().equals("stream")) { 1630 //skip whitespaces 1631 int ch; 1632 do { 1633 ch = tokens.read(); 1634 } while (ch == 32 || ch == 9 || ch == 0 || ch == 12); 1635 if (ch != '\n') 1636 ch = tokens.read(); 1637 if (ch != '\n') 1638 tokens.backOnePosition(ch); 1639 PRStream stream = new PRStream(this, tokens.getFilePointer()); 1640 stream.putAll(dic); 1641 // crypto handling 1642 stream.setObjNum(objNum, objGen); 1643 1644 return stream; 1645 } 1646 else { 1647 tokens.seek(pos); 1648 return dic; 1649 } 1650 } 1651 case START_ARRAY: { 1652 ++readDepth; 1653 PdfArray arr = readArray(); 1654 --readDepth; 1655 return arr; 1656 } 1657 case NUMBER: 1658 return new PdfNumber(tokens.getStringValue()); 1659 case STRING: 1660 PdfString str = new PdfString(tokens.getStringValue(), null).setHexWriting(tokens.isHexString()); 1661 // crypto handling 1662 str.setObjNum(objNum, objGen); 1663 if (strings != null) 1664 strings.add(str); 1665 1666 return str; 1667 case NAME: { 1668 PdfName cachedName = PdfName.staticNames.get( tokens.getStringValue() ); 1669 if (readDepth > 0 && cachedName != null) { 1670 return cachedName; 1671 } else { 1672 // an indirect name (how odd...), or a non-standard one 1673 return new PdfName(tokens.getStringValue(), false); 1674 } 1675 } 1676 case REF: 1677 int num = tokens.getReference(); 1678 PRIndirectReference ref = new PRIndirectReference(this, num, tokens.getGeneration()); 1679 return ref; 1680 case ENDOFFILE: 1681 throw new IOException(MessageLocalization.getComposedMessage("unexpected.end.of.file")); 1682 default: 1683 String sv = tokens.getStringValue(); 1684 if ("null".equals(sv)) { 1685 if (readDepth == 0) { 1686 return new PdfNull(); 1687 } //else 1688 return PdfNull.PDFNULL; 1689 } 1690 else if ("true".equals(sv)) { 1691 if (readDepth == 0) { 1692 return new PdfBoolean( true ); 1693 } //else 1694 return PdfBoolean.PDFTRUE; 1695 } 1696 else if ("false".equals(sv)) { 1697 if (readDepth == 0) { 1698 return new PdfBoolean( false ); 1699 } //else 1700 return PdfBoolean.PDFFALSE; 1701 } 1702 return new PdfLiteral(-type.ordinal(), tokens.getStringValue()); 1703 } 1704 } 1705 1706 /** Decodes a stream that has the FlateDecode filter. 1707 * @param in the input data 1708 * @return the decoded data 1709 */ 1710 public static byte[] FlateDecode(final byte in[]) { 1711 byte b[] = FlateDecode(in, true); 1712 if (b == null) 1713 return FlateDecode(in, false); 1714 return b; 1715 } 1716 1717 /** 1718 * @param in 1719 * @param dicPar 1720 * @return a byte array 1721 */ 1722 public static byte[] decodePredictor(final byte in[], final PdfObject dicPar) { 1723 if (dicPar == null || !dicPar.isDictionary()) 1724 return in; 1725 PdfDictionary dic = (PdfDictionary)dicPar; 1726 PdfObject obj = getPdfObject(dic.get(PdfName.PREDICTOR)); 1727 if (obj == null || !obj.isNumber()) 1728 return in; 1729 int predictor = ((PdfNumber)obj).intValue(); 1730 if (predictor < 10) 1731 return in; 1732 int width = 1; 1733 obj = getPdfObject(dic.get(PdfName.COLUMNS)); 1734 if (obj != null && obj.isNumber()) 1735 width = ((PdfNumber)obj).intValue(); 1736 int colors = 1; 1737 obj = getPdfObject(dic.get(PdfName.COLORS)); 1738 if (obj != null && obj.isNumber()) 1739 colors = ((PdfNumber)obj).intValue(); 1740 int bpc = 8; 1741 obj = getPdfObject(dic.get(PdfName.BITSPERCOMPONENT)); 1742 if (obj != null && obj.isNumber()) 1743 bpc = ((PdfNumber)obj).intValue(); 1744 DataInputStream dataStream = new DataInputStream(new ByteArrayInputStream(in)); 1745 ByteArrayOutputStream fout = new ByteArrayOutputStream(in.length); 1746 int bytesPerPixel = colors * bpc / 8; 1747 int bytesPerRow = (colors*width*bpc + 7)/8; 1748 byte[] curr = new byte[bytesPerRow]; 1749 byte[] prior = new byte[bytesPerRow]; 1750 1751 // Decode the (sub)image row-by-row 1752 while (true) { 1753 // Read the filter type byte and a row of data 1754 int filter = 0; 1755 try { 1756 filter = dataStream.read(); 1757 if (filter < 0) { 1758 return fout.toByteArray(); 1759 } 1760 dataStream.readFully(curr, 0, bytesPerRow); 1761 } catch (Exception e) { 1762 return fout.toByteArray(); 1763 } 1764 1765 switch (filter) { 1766 case 0: //PNG_FILTER_NONE 1767 break; 1768 case 1: //PNG_FILTER_SUB 1769 for (int i = bytesPerPixel; i < bytesPerRow; i++) { 1770 curr[i] += curr[i - bytesPerPixel]; 1771 } 1772 break; 1773 case 2: //PNG_FILTER_UP 1774 for (int i = 0; i < bytesPerRow; i++) { 1775 curr[i] += prior[i]; 1776 } 1777 break; 1778 case 3: //PNG_FILTER_AVERAGE 1779 for (int i = 0; i < bytesPerPixel; i++) { 1780 curr[i] += prior[i] / 2; 1781 } 1782 for (int i = bytesPerPixel; i < bytesPerRow; i++) { 1783 curr[i] += ((curr[i - bytesPerPixel] & 0xff) + (prior[i] & 0xff))/2; 1784 } 1785 break; 1786 case 4: //PNG_FILTER_PAETH 1787 for (int i = 0; i < bytesPerPixel; i++) { 1788 curr[i] += prior[i]; 1789 } 1790 1791 for (int i = bytesPerPixel; i < bytesPerRow; i++) { 1792 int a = curr[i - bytesPerPixel] & 0xff; 1793 int b = prior[i] & 0xff; 1794 int c = prior[i - bytesPerPixel] & 0xff; 1795 1796 int p = a + b - c; 1797 int pa = Math.abs(p - a); 1798 int pb = Math.abs(p - b); 1799 int pc = Math.abs(p - c); 1800 1801 int ret; 1802 1803 if (pa <= pb && pa <= pc) { 1804 ret = a; 1805 } else if (pb <= pc) { 1806 ret = b; 1807 } else { 1808 ret = c; 1809 } 1810 curr[i] += (byte)ret; 1811 } 1812 break; 1813 default: 1814 // Error -- unknown filter type 1815 throw new RuntimeException(MessageLocalization.getComposedMessage("png.filter.unknown")); 1816 } 1817 try { 1818 fout.write(curr); 1819 } 1820 catch (IOException ioe) { 1821 // Never happens 1822 } 1823 1824 // Swap curr and prior 1825 byte[] tmp = prior; 1826 prior = curr; 1827 curr = tmp; 1828 } 1829 } 1830 1831 /** A helper to FlateDecode. 1832 * @param in the input data 1833 * @param strict <CODE>true</CODE> to read a correct stream. <CODE>false</CODE> 1834 * to try to read a corrupted stream 1835 * @return the decoded data 1836 */ 1837 public static byte[] FlateDecode(final byte in[], final boolean strict) { 1838 ByteArrayInputStream stream = new ByteArrayInputStream(in); 1839 InflaterInputStream zip = new InflaterInputStream(stream); 1840 ByteArrayOutputStream out = new ByteArrayOutputStream(); 1841 byte b[] = new byte[strict ? 4092 : 1]; 1842 try { 1843 int n; 1844 while ((n = zip.read(b)) >= 0) { 1845 out.write(b, 0, n); 1846 } 1847 zip.close(); 1848 out.close(); 1849 return out.toByteArray(); 1850 } 1851 catch (Exception e) { 1852 if (strict) 1853 return null; 1854 return out.toByteArray(); 1855 } 1856 } 1857 1858 /** Decodes a stream that has the ASCIIHexDecode filter. 1859 * @param in the input data 1860 * @return the decoded data 1861 */ 1862 public static byte[] ASCIIHexDecode(final byte in[]) { 1863 ByteArrayOutputStream out = new ByteArrayOutputStream(); 1864 boolean first = true; 1865 int n1 = 0; 1866 for (int k = 0; k < in.length; ++k) { 1867 int ch = in[k] & 0xff; 1868 if (ch == '>') 1869 break; 1870 if (PRTokeniser.isWhitespace(ch)) 1871 continue; 1872 int n = PRTokeniser.getHex(ch); 1873 if (n == -1) 1874 throw new RuntimeException(MessageLocalization.getComposedMessage("illegal.character.in.asciihexdecode")); 1875 if (first) 1876 n1 = n; 1877 else 1878 out.write((byte)((n1 << 4) + n)); 1879 first = !first; 1880 } 1881 if (!first) 1882 out.write((byte)(n1 << 4)); 1883 return out.toByteArray(); 1884 } 1885 1886 /** Decodes a stream that has the ASCII85Decode filter. 1887 * @param in the input data 1888 * @return the decoded data 1889 */ 1890 public static byte[] ASCII85Decode(final byte in[]) { 1891 ByteArrayOutputStream out = new ByteArrayOutputStream(); 1892 int state = 0; 1893 int chn[] = new int[5]; 1894 for (int k = 0; k < in.length; ++k) { 1895 int ch = in[k] & 0xff; 1896 if (ch == '~') 1897 break; 1898 if (PRTokeniser.isWhitespace(ch)) 1899 continue; 1900 if (ch == 'z' && state == 0) { 1901 out.write(0); 1902 out.write(0); 1903 out.write(0); 1904 out.write(0); 1905 continue; 1906 } 1907 if (ch < '!' || ch > 'u') 1908 throw new RuntimeException(MessageLocalization.getComposedMessage("illegal.character.in.ascii85decode")); 1909 chn[state] = ch - '!'; 1910 ++state; 1911 if (state == 5) { 1912 state = 0; 1913 int r = 0; 1914 for (int j = 0; j < 5; ++j) 1915 r = r * 85 + chn[j]; 1916 out.write((byte)(r >> 24)); 1917 out.write((byte)(r >> 16)); 1918 out.write((byte)(r >> 8)); 1919 out.write((byte)r); 1920 } 1921 } 1922 int r = 0; 1923 // We'll ignore the next two lines for the sake of perpetuating broken PDFs 1924// if (state == 1) 1925// throw new RuntimeException(MessageLocalization.getComposedMessage("illegal.length.in.ascii85decode")); 1926 if (state == 2) { 1927 r = chn[0] * 85 * 85 * 85 * 85 + chn[1] * 85 * 85 * 85 + 85 * 85 * 85 + 85 * 85 + 85; 1928 out.write((byte)(r >> 24)); 1929 } 1930 else if (state == 3) { 1931 r = chn[0] * 85 * 85 * 85 * 85 + chn[1] * 85 * 85 * 85 + chn[2] * 85 * 85 + 85 * 85 + 85; 1932 out.write((byte)(r >> 24)); 1933 out.write((byte)(r >> 16)); 1934 } 1935 else if (state == 4) { 1936 r = chn[0] * 85 * 85 * 85 * 85 + chn[1] * 85 * 85 * 85 + chn[2] * 85 * 85 + chn[3] * 85 + 85; 1937 out.write((byte)(r >> 24)); 1938 out.write((byte)(r >> 16)); 1939 out.write((byte)(r >> 8)); 1940 } 1941 return out.toByteArray(); 1942 } 1943 1944 /** Decodes a stream that has the LZWDecode filter. 1945 * @param in the input data 1946 * @return the decoded data 1947 */ 1948 public static byte[] LZWDecode(final byte in[]) { 1949 ByteArrayOutputStream out = new ByteArrayOutputStream(); 1950 LZWDecoder lzw = new LZWDecoder(); 1951 lzw.decode(in, out); 1952 return out.toByteArray(); 1953 } 1954 1955 /** Checks if the document had errors and was rebuilt. 1956 * @return true if rebuilt. 1957 * 1958 */ 1959 public boolean isRebuilt() { 1960 return this.rebuilt; 1961 } 1962 1963 /** Gets the dictionary that represents a page. 1964 * @param pageNum the page number. 1 is the first 1965 * @return the page dictionary 1966 */ 1967 public PdfDictionary getPageN(final int pageNum) { 1968 PdfDictionary dic = pageRefs.getPageN(pageNum); 1969 if (dic == null) 1970 return null; 1971 if (appendable) 1972 dic.setIndRef(pageRefs.getPageOrigRef(pageNum)); 1973 return dic; 1974 } 1975 1976 /** 1977 * @param pageNum 1978 * @return a Dictionary object 1979 */ 1980 public PdfDictionary getPageNRelease(final int pageNum) { 1981 PdfDictionary dic = getPageN(pageNum); 1982 pageRefs.releasePage(pageNum); 1983 return dic; 1984 } 1985 1986 /** 1987 * @param pageNum 1988 */ 1989 public void releasePage(final int pageNum) { 1990 pageRefs.releasePage(pageNum); 1991 } 1992 1993 /** 1994 * 1995 */ 1996 public void resetReleasePage() { 1997 pageRefs.resetReleasePage(); 1998 } 1999 2000 /** Gets the page reference to this page. 2001 * @param pageNum the page number. 1 is the first 2002 * @return the page reference 2003 */ 2004 public PRIndirectReference getPageOrigRef(final int pageNum) { 2005 return pageRefs.getPageOrigRef(pageNum); 2006 } 2007 2008 /** Gets the contents of the page. 2009 * @param pageNum the page number. 1 is the first 2010 * @param file the location of the PDF document 2011 * @throws IOException on error 2012 * @return the content 2013 */ 2014 public byte[] getPageContent(final int pageNum, final RandomAccessFileOrArray file) throws IOException{ 2015 PdfDictionary page = getPageNRelease(pageNum); 2016 if (page == null) 2017 return null; 2018 PdfObject contents = getPdfObjectRelease(page.get(PdfName.CONTENTS)); 2019 if (contents == null) 2020 return new byte[0]; 2021 ByteArrayOutputStream bout = null; 2022 if (contents.isStream()) { 2023 return getStreamBytes((PRStream)contents, file); 2024 } 2025 else if (contents.isArray()) { 2026 PdfArray array = (PdfArray)contents; 2027 bout = new ByteArrayOutputStream(); 2028 for (int k = 0; k < array.size(); ++k) { 2029 PdfObject item = getPdfObjectRelease(array.getPdfObject(k)); 2030 if (item == null || !item.isStream()) 2031 continue; 2032 byte[] b = getStreamBytes((PRStream)item, file); 2033 bout.write(b); 2034 if (k != array.size() - 1) 2035 bout.write('\n'); 2036 } 2037 return bout.toByteArray(); 2038 } 2039 else 2040 return new byte[0]; 2041 } 2042 2043 /** Gets the content from the page dictionary. 2044 * @param page the page dictionary 2045 * @throws IOException on error 2046 * @return the content 2047 * @since 5.0.6 2048 */ 2049 public static byte[] getPageContent(final PdfDictionary page) throws IOException{ 2050 if (page == null) 2051 return null; 2052 RandomAccessFileOrArray rf = null; 2053 try { 2054 PdfObject contents = getPdfObjectRelease(page.get(PdfName.CONTENTS)); 2055 if (contents == null) 2056 return new byte[0]; 2057 if (contents.isStream()) { 2058 if (rf == null) { 2059 rf = ((PRStream)contents).getReader().getSafeFile(); 2060 rf.reOpen(); 2061 } 2062 return getStreamBytes((PRStream)contents, rf); 2063 } 2064 else if (contents.isArray()) { 2065 PdfArray array = (PdfArray)contents; 2066 ByteArrayOutputStream bout = new ByteArrayOutputStream(); 2067 for (int k = 0; k < array.size(); ++k) { 2068 PdfObject item = getPdfObjectRelease(array.getPdfObject(k)); 2069 if (item == null || !item.isStream()) 2070 continue; 2071 if (rf == null) { 2072 rf = ((PRStream)item).getReader().getSafeFile(); 2073 rf.reOpen(); 2074 } 2075 byte[] b = getStreamBytes((PRStream)item, rf); 2076 bout.write(b); 2077 if (k != array.size() - 1) 2078 bout.write('\n'); 2079 } 2080 return bout.toByteArray(); 2081 } 2082 else 2083 return new byte[0]; 2084 } 2085 finally { 2086 try { 2087 if (rf != null) 2088 rf.close(); 2089 }catch(Exception e){} 2090 } 2091 } 2092 2093 /** 2094 * Retrieve the given page's resource dictionary 2095 * @param pageNum 1-based page number from which to retrieve the resource dictionary 2096 * @return The page's resources, or 'null' if the page has none. 2097 * @since 5.1 2098 */ 2099 public PdfDictionary getPageResources(final int pageNum) { 2100 return getPageResources(getPageN(pageNum)); 2101 } 2102 2103 /** 2104 * Retrieve the given page's resource dictionary 2105 * @param pageDict the given page 2106 * @return The page's resources, or 'null' if the page has none. 2107 * @since 5.1 2108 */ 2109 public PdfDictionary getPageResources(final PdfDictionary pageDict) { 2110 return pageDict.getAsDict(PdfName.RESOURCES); 2111 } 2112 2113 /** Gets the contents of the page. 2114 * @param pageNum the page number. 1 is the first 2115 * @throws IOException on error 2116 * @return the content 2117 */ 2118 public byte[] getPageContent(final int pageNum) throws IOException{ 2119 RandomAccessFileOrArray rf = getSafeFile(); 2120 try { 2121 rf.reOpen(); 2122 return getPageContent(pageNum, rf); 2123 } 2124 finally { 2125 try{rf.close();}catch(Exception e){} 2126 } 2127 } 2128 2129 protected void killXref(PdfObject obj) { 2130 if (obj == null) 2131 return; 2132 if (obj instanceof PdfIndirectReference && !obj.isIndirect()) 2133 return; 2134 switch (obj.type()) { 2135 case PdfObject.INDIRECT: { 2136 int xr = ((PRIndirectReference)obj).getNumber(); 2137 obj = xrefObj.get(xr); 2138 xrefObj.set(xr, null); 2139 freeXref = xr; 2140 killXref(obj); 2141 break; 2142 } 2143 case PdfObject.ARRAY: { 2144 PdfArray t = (PdfArray)obj; 2145 for (int i = 0; i < t.size(); ++i) 2146 killXref(t.getPdfObject(i)); 2147 break; 2148 } 2149 case PdfObject.STREAM: 2150 case PdfObject.DICTIONARY: { 2151 PdfDictionary dic = (PdfDictionary)obj; 2152 for (Object element : dic.getKeys()) { 2153 killXref(dic.get((PdfName)element)); 2154 } 2155 break; 2156 } 2157 } 2158 } 2159 2160 /** Sets the contents of the page. 2161 * @param content the new page content 2162 * @param pageNum the page number. 1 is the first 2163 */ 2164 public void setPageContent(final int pageNum, final byte content[]) { 2165 setPageContent(pageNum, content, PdfStream.DEFAULT_COMPRESSION); 2166 } 2167 /** Sets the contents of the page. 2168 * @param content the new page content 2169 * @param pageNum the page number. 1 is the first 2170 * @param compressionLevel the compressionLevel 2171 * @since 2.1.3 (the method already existed without param compressionLevel) 2172 */ 2173 public void setPageContent(final int pageNum, final byte content[], final int compressionLevel) { 2174 PdfDictionary page = getPageN(pageNum); 2175 if (page == null) 2176 return; 2177 PdfObject contents = page.get(PdfName.CONTENTS); 2178 freeXref = -1; 2179 killXref(contents); 2180 if (freeXref == -1) { 2181 xrefObj.add(null); 2182 freeXref = xrefObj.size() - 1; 2183 } 2184 page.put(PdfName.CONTENTS, new PRIndirectReference(this, freeXref)); 2185 xrefObj.set(freeXref, new PRStream(this, content, compressionLevel)); 2186 } 2187 2188 /** 2189 * Decode a byte[] applying the filters specified in the provided dictionary. 2190 * @param b the bytes to decode 2191 * @param streamDictionary the dictionary that contains filter information 2192 * @return the decoded bytes 2193 * @throws IOException if there are any problems decoding the bytes 2194 * @since 5.0.4 2195 */ 2196 public static byte[] decodeBytes(byte[] b, final PdfDictionary streamDictionary) throws IOException { 2197 PdfObject filter = getPdfObjectRelease(streamDictionary.get(PdfName.FILTER)); 2198 2199 ArrayList<PdfObject> filters = new ArrayList<PdfObject>(); 2200 if (filter != null) { 2201 if (filter.isName()) 2202 filters.add(filter); 2203 else if (filter.isArray()) 2204 filters = ((PdfArray)filter).getArrayList(); 2205 } 2206 ArrayList<PdfObject> dp = new ArrayList<PdfObject>(); 2207 PdfObject dpo = getPdfObjectRelease(streamDictionary.get(PdfName.DECODEPARMS)); 2208 if (dpo == null || !dpo.isDictionary() && !dpo.isArray()) 2209 dpo = getPdfObjectRelease(streamDictionary.get(PdfName.DP)); 2210 if (dpo != null) { 2211 if (dpo.isDictionary()) 2212 dp.add(dpo); 2213 else if (dpo.isArray()) 2214 dp = ((PdfArray)dpo).getArrayList(); 2215 } 2216 PdfName name; 2217 for (int j = 0; j < filters.size(); ++j) { 2218 name = (PdfName)getPdfObjectRelease(filters.get(j)); 2219 if (PdfName.FLATEDECODE.equals(name) || PdfName.FL.equals(name)) { 2220 b = FlateDecode(b); 2221 PdfObject dicParam = null; 2222 if (j < dp.size()) { 2223 dicParam = dp.get(j); 2224 b = decodePredictor(b, dicParam); 2225 } 2226 } 2227 else if (PdfName.ASCIIHEXDECODE.equals(name) || PdfName.AHX.equals(name)) 2228 b = ASCIIHexDecode(b); 2229 else if (PdfName.ASCII85DECODE.equals(name) || PdfName.A85.equals(name)) 2230 b = ASCII85Decode(b); 2231 else if (PdfName.LZWDECODE.equals(name)) { 2232 b = LZWDecode(b); 2233 PdfObject dicParam = null; 2234 if (j < dp.size()) { 2235 dicParam = dp.get(j); 2236 b = decodePredictor(b, dicParam); 2237 } 2238 } 2239 else if (PdfName.CCITTFAXDECODE.equals(name)) { 2240 PdfNumber wn = (PdfNumber)getPdfObjectRelease(streamDictionary.get(PdfName.WIDTH)); 2241 PdfNumber hn = (PdfNumber)getPdfObjectRelease(streamDictionary.get(PdfName.HEIGHT)); 2242 if (wn == null || hn == null) 2243 throw new UnsupportedPdfException(MessageLocalization.getComposedMessage("filter.ccittfaxdecode.is.only.supported.for.images")); 2244 int width = wn.intValue(); 2245 int height = hn.intValue(); 2246 PdfDictionary param = null; 2247 if (j < dp.size()) { 2248 PdfObject objParam = getPdfObjectRelease(dp.get(j)); 2249 if (objParam != null && (objParam instanceof PdfDictionary)) 2250 param = (PdfDictionary)objParam; 2251 } 2252 int k = 0; 2253 boolean blackIs1 = false; 2254 boolean byteAlign = false; 2255 if (param != null) { 2256 PdfNumber kn = param.getAsNumber(PdfName.K); 2257 if (kn != null) 2258 k = kn.intValue(); 2259 PdfBoolean bo = param.getAsBoolean(PdfName.BLACKIS1); 2260 if (bo != null) 2261 blackIs1 = bo.booleanValue(); 2262 bo = param.getAsBoolean(PdfName.ENCODEDBYTEALIGN); 2263 if (bo != null) 2264 byteAlign = bo.booleanValue(); 2265 } 2266 byte[] outBuf = new byte[(width + 7) / 8 * height]; 2267 TIFFFaxDecompressor decoder = new TIFFFaxDecompressor(); 2268 if (k == 0 || k > 0) { 2269 int tiffT4Options = k > 0 ? TIFFConstants.GROUP3OPT_2DENCODING : 0; 2270 tiffT4Options |= byteAlign ? TIFFConstants.GROUP3OPT_FILLBITS : 0; 2271 decoder.SetOptions(1, TIFFConstants.COMPRESSION_CCITTFAX3, tiffT4Options, 0); 2272 decoder.decodeRaw(outBuf, b, width, height); 2273 if (decoder.fails > 0) { 2274 byte[] outBuf2 = new byte[(width + 7) / 8 * height]; 2275 int oldFails = decoder.fails; 2276 decoder.SetOptions(1, TIFFConstants.COMPRESSION_CCITTRLE, tiffT4Options, 0); 2277 decoder.decodeRaw(outBuf2, b, width, height); 2278 if (decoder.fails < oldFails) { 2279 outBuf = outBuf2; 2280 } 2281 } 2282 } 2283 else { 2284 TIFFFaxDecoder deca = new TIFFFaxDecoder(1, width, height); 2285 deca.decodeT6(outBuf, b, 0, height, 0); 2286 } 2287 if (!blackIs1) { 2288 int len = outBuf.length; 2289 for (int t = 0; t < len; ++t) { 2290 outBuf[t] ^= 0xff; 2291 } 2292 } 2293 b = outBuf; 2294 } 2295 else if (PdfName.CRYPT.equals(name)) { 2296 } 2297 else 2298 throw new UnsupportedPdfException(MessageLocalization.getComposedMessage("the.filter.1.is.not.supported", name)); 2299 } 2300 return b; 2301 } 2302 2303 /** Get the content from a stream applying the required filters. 2304 * @param stream the stream 2305 * @param file the location where the stream is 2306 * @throws IOException on error 2307 * @return the stream content 2308 */ 2309 public static byte[] getStreamBytes(final PRStream stream, final RandomAccessFileOrArray file) throws IOException { 2310 byte[] b = getStreamBytesRaw(stream, file); 2311 return decodeBytes(b, stream); 2312 } 2313 2314 /** Get the content from a stream applying the required filters. 2315 * @param stream the stream 2316 * @throws IOException on error 2317 * @return the stream content 2318 */ 2319 public static byte[] getStreamBytes(final PRStream stream) throws IOException { 2320 RandomAccessFileOrArray rf = stream.getReader().getSafeFile(); 2321 try { 2322 rf.reOpen(); 2323 return getStreamBytes(stream, rf); 2324 } 2325 finally { 2326 try{rf.close();}catch(Exception e){} 2327 } 2328 } 2329 2330 /** Get the content from a stream as it is without applying any filter. 2331 * @param stream the stream 2332 * @param file the location where the stream is 2333 * @throws IOException on error 2334 * @return the stream content 2335 */ 2336 public static byte[] getStreamBytesRaw(final PRStream stream, final RandomAccessFileOrArray file) throws IOException { 2337 PdfReader reader = stream.getReader(); 2338 byte b[]; 2339 if (stream.getOffset() < 0) 2340 b = stream.getBytes(); 2341 else { 2342 b = new byte[stream.getLength()]; 2343 file.seek(stream.getOffset()); 2344 file.readFully(b); 2345 PdfEncryption decrypt = reader.getDecrypt(); 2346 if (decrypt != null) { 2347 PdfObject filter = getPdfObjectRelease(stream.get(PdfName.FILTER)); 2348 ArrayList<PdfObject> filters = new ArrayList<PdfObject>(); 2349 if (filter != null) { 2350 if (filter.isName()) 2351 filters.add(filter); 2352 else if (filter.isArray()) 2353 filters = ((PdfArray)filter).getArrayList(); 2354 } 2355 boolean skip = false; 2356 for (int k = 0; k < filters.size(); ++k) { 2357 PdfObject obj = getPdfObjectRelease(filters.get(k)); 2358 if (obj != null && obj.toString().equals("/Crypt")) { 2359 skip = true; 2360 break; 2361 } 2362 } 2363 if (!skip) { 2364 decrypt.setHashKey(stream.getObjNum(), stream.getObjGen()); 2365 b = decrypt.decryptByteArray(b); 2366 } 2367 } 2368 } 2369 return b; 2370 } 2371 2372 /** Get the content from a stream as it is without applying any filter. 2373 * @param stream the stream 2374 * @throws IOException on error 2375 * @return the stream content 2376 */ 2377 public static byte[] getStreamBytesRaw(final PRStream stream) throws IOException { 2378 RandomAccessFileOrArray rf = stream.getReader().getSafeFile(); 2379 try { 2380 rf.reOpen(); 2381 return getStreamBytesRaw(stream, rf); 2382 } 2383 finally { 2384 try{rf.close();}catch(Exception e){} 2385 } 2386 } 2387 2388 /** Eliminates shared streams if they exist. */ 2389 public void eliminateSharedStreams() { 2390 if (!sharedStreams) 2391 return; 2392 sharedStreams = false; 2393 if (pageRefs.size() == 1) 2394 return; 2395 ArrayList<PRIndirectReference> newRefs = new ArrayList<PRIndirectReference>(); 2396 ArrayList<PRStream> newStreams = new ArrayList<PRStream>(); 2397 IntHashtable visited = new IntHashtable(); 2398 for (int k = 1; k <= pageRefs.size(); ++k) { 2399 PdfDictionary page = pageRefs.getPageN(k); 2400 if (page == null) 2401 continue; 2402 PdfObject contents = getPdfObject(page.get(PdfName.CONTENTS)); 2403 if (contents == null) 2404 continue; 2405 if (contents.isStream()) { 2406 PRIndirectReference ref = (PRIndirectReference)page.get(PdfName.CONTENTS); 2407 if (visited.containsKey(ref.getNumber())) { 2408 // need to duplicate 2409 newRefs.add(ref); 2410 newStreams.add(new PRStream((PRStream)contents, null)); 2411 } 2412 else 2413 visited.put(ref.getNumber(), 1); 2414 } 2415 else if (contents.isArray()) { 2416 PdfArray array = (PdfArray)contents; 2417 for (int j = 0; j < array.size(); ++j) { 2418 PRIndirectReference ref = (PRIndirectReference)array.getPdfObject(j); 2419 if (visited.containsKey(ref.getNumber())) { 2420 // need to duplicate 2421 newRefs.add(ref); 2422 newStreams.add(new PRStream((PRStream)getPdfObject(ref), null)); 2423 } 2424 else 2425 visited.put(ref.getNumber(), 1); 2426 } 2427 } 2428 } 2429 if (newStreams.isEmpty()) 2430 return; 2431 for (int k = 0; k < newStreams.size(); ++k) { 2432 xrefObj.add(newStreams.get(k)); 2433 PRIndirectReference ref = newRefs.get(k); 2434 ref.setNumber(xrefObj.size() - 1, 0); 2435 } 2436 } 2437 2438 /** Checks if the document was changed. 2439 * @return <CODE>true</CODE> if the document was changed, 2440 * <CODE>false</CODE> otherwise 2441 */ 2442 public boolean isTampered() { 2443 return tampered; 2444 } 2445 2446 /** 2447 * Sets the tampered state. A tampered PdfReader cannot be reused in PdfStamper. 2448 * @param tampered the tampered state 2449 */ 2450 public void setTampered(final boolean tampered) { 2451 this.tampered = tampered; 2452 pageRefs.keepPages(); 2453 } 2454 2455 /** Gets the XML metadata. 2456 * @throws IOException on error 2457 * @return the XML metadata 2458 */ 2459 public byte[] getMetadata() throws IOException { 2460 PdfObject obj = getPdfObject(catalog.get(PdfName.METADATA)); 2461 if (!(obj instanceof PRStream)) 2462 return null; 2463 RandomAccessFileOrArray rf = getSafeFile(); 2464 byte b[] = null; 2465 try { 2466 rf.reOpen(); 2467 b = getStreamBytes((PRStream)obj, rf); 2468 } 2469 finally { 2470 try { 2471 rf.close(); 2472 } 2473 catch (Exception e) { 2474 // empty on purpose 2475 } 2476 } 2477 return b; 2478 } 2479 2480 /** 2481 * Gets the byte address of the last xref table. 2482 * @return the byte address of the last xref table 2483 */ 2484 public int getLastXref() { 2485 return lastXref; 2486 } 2487 2488 /** 2489 * Gets the number of xref objects. 2490 * @return the number of xref objects 2491 */ 2492 public int getXrefSize() { 2493 return xrefObj.size(); 2494 } 2495 2496 /** 2497 * Gets the byte address of the %%EOF marker. 2498 * @return the byte address of the %%EOF marker 2499 */ 2500 public int getEofPos() { 2501 return eofPos; 2502 } 2503 2504 /** 2505 * Gets the PDF version. Only the last version char is returned. For example 2506 * version 1.4 is returned as '4'. 2507 * @return the PDF version 2508 */ 2509 public char getPdfVersion() { 2510 return pdfVersion; 2511 } 2512 2513 /** 2514 * Returns <CODE>true</CODE> if the PDF is encrypted. 2515 * @return <CODE>true</CODE> if the PDF is encrypted 2516 */ 2517 public boolean isEncrypted() { 2518 return encrypted; 2519 } 2520 2521 /** 2522 * Gets the encryption permissions. It can be used directly in 2523 * <CODE>PdfWriter.setEncryption()</CODE>. 2524 * @return the encryption permissions 2525 */ 2526 public int getPermissions() { 2527 return pValue; 2528 } 2529 2530 /** 2531 * Returns <CODE>true</CODE> if the PDF has a 128 bit key encryption. 2532 * @return <CODE>true</CODE> if the PDF has a 128 bit key encryption 2533 */ 2534 public boolean is128Key() { 2535 return rValue == 3; 2536 } 2537 2538 /** 2539 * Gets the trailer dictionary 2540 * @return the trailer dictionary 2541 */ 2542 public PdfDictionary getTrailer() { 2543 return trailer; 2544 } 2545 2546 PdfEncryption getDecrypt() { 2547 return decrypt; 2548 } 2549 2550 static boolean equalsn(final byte a1[], final byte a2[]) { 2551 int length = a2.length; 2552 for (int k = 0; k < length; ++k) { 2553 if (a1[k] != a2[k]) 2554 return false; 2555 } 2556 return true; 2557 } 2558 2559 static boolean existsName(final PdfDictionary dic, final PdfName key, final PdfName value) { 2560 PdfObject type = getPdfObjectRelease(dic.get(key)); 2561 if (type == null || !type.isName()) 2562 return false; 2563 PdfName name = (PdfName)type; 2564 return name.equals(value); 2565 } 2566 2567 static String getFontName(final PdfDictionary dic) { 2568 if (dic == null) 2569 return null; 2570 PdfObject type = getPdfObjectRelease(dic.get(PdfName.BASEFONT)); 2571 if (type == null || !type.isName()) 2572 return null; 2573 return PdfName.decodeName(type.toString()); 2574 } 2575 2576 static String getSubsetPrefix(final PdfDictionary dic) { 2577 if (dic == null) 2578 return null; 2579 String s = getFontName(dic); 2580 if (s == null) 2581 return null; 2582 if (s.length() < 8 || s.charAt(6) != '+') 2583 return null; 2584 for (int k = 0; k < 6; ++k) { 2585 char c = s.charAt(k); 2586 if (c < 'A' || c > 'Z') 2587 return null; 2588 } 2589 return s; 2590 } 2591 2592 /** Finds all the font subsets and changes the prefixes to some 2593 * random values. 2594 * @return the number of font subsets altered 2595 */ 2596 public int shuffleSubsetNames() { 2597 int total = 0; 2598 for (int k = 1; k < xrefObj.size(); ++k) { 2599 PdfObject obj = getPdfObjectRelease(k); 2600 if (obj == null || !obj.isDictionary()) 2601 continue; 2602 PdfDictionary dic = (PdfDictionary)obj; 2603 if (!existsName(dic, PdfName.TYPE, PdfName.FONT)) 2604 continue; 2605 if (existsName(dic, PdfName.SUBTYPE, PdfName.TYPE1) 2606 || existsName(dic, PdfName.SUBTYPE, PdfName.MMTYPE1) 2607 || existsName(dic, PdfName.SUBTYPE, PdfName.TRUETYPE)) { 2608 String s = getSubsetPrefix(dic); 2609 if (s == null) 2610 continue; 2611 String ns = BaseFont.createSubsetPrefix() + s.substring(7); 2612 PdfName newName = new PdfName(ns); 2613 dic.put(PdfName.BASEFONT, newName); 2614 setXrefPartialObject(k, dic); 2615 ++total; 2616 PdfDictionary fd = dic.getAsDict(PdfName.FONTDESCRIPTOR); 2617 if (fd == null) 2618 continue; 2619 fd.put(PdfName.FONTNAME, newName); 2620 } 2621 else if (existsName(dic, PdfName.SUBTYPE, PdfName.TYPE0)) { 2622 String s = getSubsetPrefix(dic); 2623 PdfArray arr = dic.getAsArray(PdfName.DESCENDANTFONTS); 2624 if (arr == null) 2625 continue; 2626 if (arr.isEmpty()) 2627 continue; 2628 PdfDictionary desc = arr.getAsDict(0); 2629 String sde = getSubsetPrefix(desc); 2630 if (sde == null) 2631 continue; 2632 String ns = BaseFont.createSubsetPrefix(); 2633 if (s != null) 2634 dic.put(PdfName.BASEFONT, new PdfName(ns + s.substring(7))); 2635 setXrefPartialObject(k, dic); 2636 PdfName newName = new PdfName(ns + sde.substring(7)); 2637 desc.put(PdfName.BASEFONT, newName); 2638 ++total; 2639 PdfDictionary fd = desc.getAsDict(PdfName.FONTDESCRIPTOR); 2640 if (fd == null) 2641 continue; 2642 fd.put(PdfName.FONTNAME, newName); 2643 } 2644 } 2645 return total; 2646 } 2647 2648 /** Finds all the fonts not subset but embedded and marks them as subset. 2649 * @return the number of fonts altered 2650 */ 2651 public int createFakeFontSubsets() { 2652 int total = 0; 2653 for (int k = 1; k < xrefObj.size(); ++k) { 2654 PdfObject obj = getPdfObjectRelease(k); 2655 if (obj == null || !obj.isDictionary()) 2656 continue; 2657 PdfDictionary dic = (PdfDictionary)obj; 2658 if (!existsName(dic, PdfName.TYPE, PdfName.FONT)) 2659 continue; 2660 if (existsName(dic, PdfName.SUBTYPE, PdfName.TYPE1) 2661 || existsName(dic, PdfName.SUBTYPE, PdfName.MMTYPE1) 2662 || existsName(dic, PdfName.SUBTYPE, PdfName.TRUETYPE)) { 2663 String s = getSubsetPrefix(dic); 2664 if (s != null) 2665 continue; 2666 s = getFontName(dic); 2667 if (s == null) 2668 continue; 2669 String ns = BaseFont.createSubsetPrefix() + s; 2670 PdfDictionary fd = (PdfDictionary)getPdfObjectRelease(dic.get(PdfName.FONTDESCRIPTOR)); 2671 if (fd == null) 2672 continue; 2673 if (fd.get(PdfName.FONTFILE) == null && fd.get(PdfName.FONTFILE2) == null 2674 && fd.get(PdfName.FONTFILE3) == null) 2675 continue; 2676 fd = dic.getAsDict(PdfName.FONTDESCRIPTOR); 2677 PdfName newName = new PdfName(ns); 2678 dic.put(PdfName.BASEFONT, newName); 2679 fd.put(PdfName.FONTNAME, newName); 2680 setXrefPartialObject(k, dic); 2681 ++total; 2682 } 2683 } 2684 return total; 2685 } 2686 2687 private static PdfArray getNameArray(PdfObject obj) { 2688 if (obj == null) 2689 return null; 2690 obj = getPdfObjectRelease(obj); 2691 if (obj == null) 2692 return null; 2693 if (obj.isArray()) 2694 return (PdfArray)obj; 2695 else if (obj.isDictionary()) { 2696 PdfObject arr2 = getPdfObjectRelease(((PdfDictionary)obj).get(PdfName.D)); 2697 if (arr2 != null && arr2.isArray()) 2698 return (PdfArray)arr2; 2699 } 2700 return null; 2701 } 2702 2703 /** 2704 * Gets all the named destinations as an <CODE>HashMap</CODE>. The key is the name 2705 * and the value is the destinations array. 2706 * @return gets all the named destinations 2707 */ 2708 public HashMap<Object, PdfObject> getNamedDestination() { 2709 return getNamedDestination(false); 2710 } 2711 2712 /** 2713 * Gets all the named destinations as an <CODE>HashMap</CODE>. The key is the name 2714 * and the value is the destinations array. 2715 * @param keepNames true if you want the keys to be real PdfNames instead of Strings 2716 * @return gets all the named destinations 2717 * @since 2.1.6 2718 */ 2719 public HashMap<Object, PdfObject> getNamedDestination(final boolean keepNames) { 2720 HashMap<Object, PdfObject> names = getNamedDestinationFromNames(keepNames); 2721 names.putAll(getNamedDestinationFromStrings()); 2722 return names; 2723 } 2724 2725 /** 2726 * Gets the named destinations from the /Dests key in the catalog as an <CODE>HashMap</CODE>. The key is the name 2727 * and the value is the destinations array. 2728 * @return gets the named destinations 2729 * @since 5.0.1 (generic type in signature) 2730 */ 2731 @SuppressWarnings("unchecked") 2732 public HashMap<String, PdfObject> getNamedDestinationFromNames() { 2733 return new HashMap(getNamedDestinationFromNames(false)); 2734 } 2735 2736 /** 2737 * Gets the named destinations from the /Dests key in the catalog as an <CODE>HashMap</CODE>. The key is the name 2738 * and the value is the destinations array. 2739 * @param keepNames true if you want the keys to be real PdfNames instead of Strings 2740 * @return gets the named destinations 2741 * @since 2.1.6 2742 */ 2743 public HashMap<Object, PdfObject> getNamedDestinationFromNames(final boolean keepNames) { 2744 HashMap<Object, PdfObject> names = new HashMap<Object, PdfObject>(); 2745 if (catalog.get(PdfName.DESTS) != null) { 2746 PdfDictionary dic = (PdfDictionary)getPdfObjectRelease(catalog.get(PdfName.DESTS)); 2747 if (dic == null) 2748 return names; 2749 Set<PdfName> keys = dic.getKeys(); 2750 for (PdfName key : keys) { 2751 PdfArray arr = getNameArray(dic.get(key)); 2752 if (arr == null) 2753 continue; 2754 if (keepNames) { 2755 names.put(key, arr); 2756 } 2757 else { 2758 String name = PdfName.decodeName(key.toString()); 2759 names.put(name, arr); 2760 } 2761 } 2762 } 2763 return names; 2764 } 2765 2766 /** 2767 * Gets the named destinations from the /Names key in the catalog as an <CODE>HashMap</CODE>. The key is the name 2768 * and the value is the destinations array. 2769 * @return gets the named destinations 2770 */ 2771 public HashMap<String, PdfObject> getNamedDestinationFromStrings() { 2772 if (catalog.get(PdfName.NAMES) != null) { 2773 PdfDictionary dic = (PdfDictionary)getPdfObjectRelease(catalog.get(PdfName.NAMES)); 2774 if (dic != null) { 2775 dic = (PdfDictionary)getPdfObjectRelease(dic.get(PdfName.DESTS)); 2776 if (dic != null) { 2777 HashMap<String, PdfObject> names = PdfNameTree.readTree(dic); 2778 for (Iterator<Map.Entry<String, PdfObject>> it = names.entrySet().iterator(); it.hasNext();) { 2779 Map.Entry<String, PdfObject> entry = it.next(); 2780 PdfArray arr = getNameArray(entry.getValue()); 2781 if (arr != null) 2782 entry.setValue(arr); 2783 else 2784 it.remove(); 2785 } 2786 return names; 2787 } 2788 } 2789 } 2790 return new HashMap<String, PdfObject>(); 2791 } 2792 2793 /** 2794 * Removes all the fields from the document. 2795 */ 2796 public void removeFields() { 2797 pageRefs.resetReleasePage(); 2798 for (int k = 1; k <= pageRefs.size(); ++k) { 2799 PdfDictionary page = pageRefs.getPageN(k); 2800 PdfArray annots = page.getAsArray(PdfName.ANNOTS); 2801 if (annots == null) { 2802 pageRefs.releasePage(k); 2803 continue; 2804 } 2805 for (int j = 0; j < annots.size(); ++j) { 2806 PdfObject obj = getPdfObjectRelease(annots.getPdfObject(j)); 2807 if (obj == null || !obj.isDictionary()) 2808 continue; 2809 PdfDictionary annot = (PdfDictionary)obj; 2810 if (PdfName.WIDGET.equals(annot.get(PdfName.SUBTYPE))) 2811 annots.remove(j--); 2812 } 2813 if (annots.isEmpty()) 2814 page.remove(PdfName.ANNOTS); 2815 else 2816 pageRefs.releasePage(k); 2817 } 2818 catalog.remove(PdfName.ACROFORM); 2819 pageRefs.resetReleasePage(); 2820 } 2821 2822 /** 2823 * Removes all the annotations and fields from the document. 2824 */ 2825 public void removeAnnotations() { 2826 pageRefs.resetReleasePage(); 2827 for (int k = 1; k <= pageRefs.size(); ++k) { 2828 PdfDictionary page = pageRefs.getPageN(k); 2829 if (page.get(PdfName.ANNOTS) == null) 2830 pageRefs.releasePage(k); 2831 else 2832 page.remove(PdfName.ANNOTS); 2833 } 2834 catalog.remove(PdfName.ACROFORM); 2835 pageRefs.resetReleasePage(); 2836 } 2837 2838 /** 2839 * Retrieves links for a certain page. 2840 * @param page the page to inspect 2841 * @return a list of links 2842 */ 2843 public ArrayList<PdfAnnotation.PdfImportedLink> getLinks(final int page) { 2844 pageRefs.resetReleasePage(); 2845 ArrayList<PdfAnnotation.PdfImportedLink> result = new ArrayList<PdfAnnotation.PdfImportedLink>(); 2846 PdfDictionary pageDic = pageRefs.getPageN(page); 2847 if (pageDic.get(PdfName.ANNOTS) != null) { 2848 PdfArray annots = pageDic.getAsArray(PdfName.ANNOTS); 2849 for (int j = 0; j < annots.size(); ++j) { 2850 PdfDictionary annot = (PdfDictionary)getPdfObjectRelease(annots.getPdfObject(j)); 2851 2852 if (PdfName.LINK.equals(annot.get(PdfName.SUBTYPE))) { 2853 result.add(new PdfAnnotation.PdfImportedLink(annot)); 2854 } 2855 } 2856 } 2857 pageRefs.releasePage(page); 2858 pageRefs.resetReleasePage(); 2859 return result; 2860 } 2861 2862 private void iterateBookmarks(PdfObject outlineRef, final HashMap<Object, PdfObject> names) { 2863 while (outlineRef != null) { 2864 replaceNamedDestination(outlineRef, names); 2865 PdfDictionary outline = (PdfDictionary)getPdfObjectRelease(outlineRef); 2866 PdfObject first = outline.get(PdfName.FIRST); 2867 if (first != null) { 2868 iterateBookmarks(first, names); 2869 } 2870 outlineRef = outline.get(PdfName.NEXT); 2871 } 2872 } 2873 2874 /** 2875 * Replaces remote named links with local destinations that have the same name. 2876 * @since 5.0 2877 */ 2878 public void makeRemoteNamedDestinationsLocal() { 2879 if (remoteToLocalNamedDestinations) 2880 return; 2881 remoteToLocalNamedDestinations = true; 2882 HashMap<Object, PdfObject> names = getNamedDestination(true); 2883 if (names.isEmpty()) 2884 return; 2885 for (int k = 1; k <= pageRefs.size(); ++k) { 2886 PdfDictionary page = pageRefs.getPageN(k); 2887 PdfObject annotsRef; 2888 PdfArray annots = (PdfArray)getPdfObject(annotsRef = page.get(PdfName.ANNOTS)); 2889 int annotIdx = lastXrefPartial; 2890 releaseLastXrefPartial(); 2891 if (annots == null) { 2892 pageRefs.releasePage(k); 2893 continue; 2894 } 2895 boolean commitAnnots = false; 2896 for (int an = 0; an < annots.size(); ++an) { 2897 PdfObject objRef = annots.getPdfObject(an); 2898 if (convertNamedDestination(objRef, names) && !objRef.isIndirect()) 2899 commitAnnots = true; 2900 } 2901 if (commitAnnots) 2902 setXrefPartialObject(annotIdx, annots); 2903 if (!commitAnnots || annotsRef.isIndirect()) 2904 pageRefs.releasePage(k); 2905 } 2906 } 2907 2908 /** 2909 * Converts a remote named destination GoToR with a local named destination 2910 * if there's a corresponding name. 2911 * @param obj an annotation that needs to be screened for links to external named destinations. 2912 * @param names a map with names of local named destinations 2913 * @since iText 5.0 2914 */ 2915 private boolean convertNamedDestination(PdfObject obj, final HashMap<Object, PdfObject> names) { 2916 obj = getPdfObject(obj); 2917 int objIdx = lastXrefPartial; 2918 releaseLastXrefPartial(); 2919 if (obj != null && obj.isDictionary()) { 2920 PdfObject ob2 = getPdfObject(((PdfDictionary)obj).get(PdfName.A)); 2921 if (ob2 != null) { 2922 int obj2Idx = lastXrefPartial; 2923 releaseLastXrefPartial(); 2924 PdfDictionary dic = (PdfDictionary)ob2; 2925 PdfName type = (PdfName)getPdfObjectRelease(dic.get(PdfName.S)); 2926 if (PdfName.GOTOR.equals(type)) { 2927 PdfObject ob3 = getPdfObjectRelease(dic.get(PdfName.D)); 2928 Object name = null; 2929 if (ob3 != null) { 2930 if (ob3.isName()) 2931 name = ob3; 2932 else if (ob3.isString()) 2933 name = ob3.toString(); 2934 PdfArray dest = (PdfArray)names.get(name); 2935 if (dest != null) { 2936 dic.remove(PdfName.F); 2937 dic.remove(PdfName.NEWWINDOW); 2938 dic.put(PdfName.S, PdfName.GOTO); 2939 setXrefPartialObject(obj2Idx, ob2); 2940 setXrefPartialObject(objIdx, obj); 2941 return true; 2942 } 2943 } 2944 } 2945 } 2946 } 2947 return false; 2948 } 2949 2950 /** Replaces all the local named links with the actual destinations. */ 2951 public void consolidateNamedDestinations() { 2952 if (consolidateNamedDestinations) 2953 return; 2954 consolidateNamedDestinations = true; 2955 HashMap<Object, PdfObject> names = getNamedDestination(true); 2956 if (names.isEmpty()) 2957 return; 2958 for (int k = 1; k <= pageRefs.size(); ++k) { 2959 PdfDictionary page = pageRefs.getPageN(k); 2960 PdfObject annotsRef; 2961 PdfArray annots = (PdfArray)getPdfObject(annotsRef = page.get(PdfName.ANNOTS)); 2962 int annotIdx = lastXrefPartial; 2963 releaseLastXrefPartial(); 2964 if (annots == null) { 2965 pageRefs.releasePage(k); 2966 continue; 2967 } 2968 boolean commitAnnots = false; 2969 for (int an = 0; an < annots.size(); ++an) { 2970 PdfObject objRef = annots.getPdfObject(an); 2971 if (replaceNamedDestination(objRef, names) && !objRef.isIndirect()) 2972 commitAnnots = true; 2973 } 2974 if (commitAnnots) 2975 setXrefPartialObject(annotIdx, annots); 2976 if (!commitAnnots || annotsRef.isIndirect()) 2977 pageRefs.releasePage(k); 2978 } 2979 PdfDictionary outlines = (PdfDictionary)getPdfObjectRelease(catalog.get(PdfName.OUTLINES)); 2980 if (outlines == null) 2981 return; 2982 iterateBookmarks(outlines.get(PdfName.FIRST), names); 2983 } 2984 2985 private boolean replaceNamedDestination(PdfObject obj, final HashMap<Object, PdfObject> names) { 2986 obj = getPdfObject(obj); 2987 int objIdx = lastXrefPartial; 2988 releaseLastXrefPartial(); 2989 if (obj != null && obj.isDictionary()) { 2990 PdfObject ob2 = getPdfObjectRelease(((PdfDictionary)obj).get(PdfName.DEST)); 2991 Object name = null; 2992 if (ob2 != null) { 2993 if (ob2.isName()) 2994 name = ob2; 2995 else if (ob2.isString()) 2996 name = ob2.toString(); 2997 PdfArray dest = (PdfArray)names.get(name); 2998 if (dest != null) { 2999 ((PdfDictionary)obj).put(PdfName.DEST, dest); 3000 setXrefPartialObject(objIdx, obj); 3001 return true; 3002 } 3003 } 3004 else if ((ob2 = getPdfObject(((PdfDictionary)obj).get(PdfName.A))) != null) { 3005 int obj2Idx = lastXrefPartial; 3006 releaseLastXrefPartial(); 3007 PdfDictionary dic = (PdfDictionary)ob2; 3008 PdfName type = (PdfName)getPdfObjectRelease(dic.get(PdfName.S)); 3009 if (PdfName.GOTO.equals(type)) { 3010 PdfObject ob3 = getPdfObjectRelease(dic.get(PdfName.D)); 3011 if (ob3 != null) { 3012 if (ob3.isName()) 3013 name = ob3; 3014 else if (ob3.isString()) 3015 name = ob3.toString(); 3016 } 3017 PdfArray dest = (PdfArray)names.get(name); 3018 if (dest != null) { 3019 dic.put(PdfName.D, dest); 3020 setXrefPartialObject(obj2Idx, ob2); 3021 setXrefPartialObject(objIdx, obj); 3022 return true; 3023 } 3024 } 3025 } 3026 } 3027 return false; 3028 } 3029 3030 protected static PdfDictionary duplicatePdfDictionary(final PdfDictionary original, PdfDictionary copy, final PdfReader newReader) { 3031 if (copy == null) 3032 copy = new PdfDictionary(); 3033 for (Object element : original.getKeys()) { 3034 PdfName key = (PdfName)element; 3035 copy.put(key, duplicatePdfObject(original.get(key), newReader)); 3036 } 3037 return copy; 3038 } 3039 3040 protected static PdfObject duplicatePdfObject(final PdfObject original, final PdfReader newReader) { 3041 if (original == null) 3042 return null; 3043 switch (original.type()) { 3044 case PdfObject.DICTIONARY: { 3045 return duplicatePdfDictionary((PdfDictionary)original, null, newReader); 3046 } 3047 case PdfObject.STREAM: { 3048 PRStream org = (PRStream)original; 3049 PRStream stream = new PRStream(org, null, newReader); 3050 duplicatePdfDictionary(org, stream, newReader); 3051 return stream; 3052 } 3053 case PdfObject.ARRAY: { 3054 PdfArray arr = new PdfArray(); 3055 for (Iterator<PdfObject> it = ((PdfArray)original).listIterator(); it.hasNext();) { 3056 arr.add(duplicatePdfObject(it.next(), newReader)); 3057 } 3058 return arr; 3059 } 3060 case PdfObject.INDIRECT: { 3061 PRIndirectReference org = (PRIndirectReference)original; 3062 return new PRIndirectReference(newReader, org.getNumber(), org.getGeneration()); 3063 } 3064 default: 3065 return original; 3066 } 3067 } 3068 3069 /** 3070 * Closes the reader 3071 */ 3072 public void close() { 3073 if (!partial) 3074 return; 3075 try { 3076 tokens.close(); 3077 } 3078 catch (IOException e) { 3079 throw new ExceptionConverter(e); 3080 } 3081 } 3082 3083 @SuppressWarnings("unchecked") 3084 protected void removeUnusedNode(PdfObject obj, final boolean hits[]) { 3085 Stack<Object> state = new Stack<Object>(); 3086 state.push(obj); 3087 while (!state.empty()) { 3088 Object current = state.pop(); 3089 if (current == null) 3090 continue; 3091 ArrayList<PdfObject> ar = null; 3092 PdfDictionary dic = null; 3093 PdfName[] keys = null; 3094 Object[] objs = null; 3095 int idx = 0; 3096 if (current instanceof PdfObject) { 3097 obj = (PdfObject)current; 3098 switch (obj.type()) { 3099 case PdfObject.DICTIONARY: 3100 case PdfObject.STREAM: 3101 dic = (PdfDictionary)obj; 3102 keys = new PdfName[dic.size()]; 3103 dic.getKeys().toArray(keys); 3104 break; 3105 case PdfObject.ARRAY: 3106 ar = ((PdfArray)obj).getArrayList(); 3107 break; 3108 case PdfObject.INDIRECT: 3109 PRIndirectReference ref = (PRIndirectReference)obj; 3110 int num = ref.getNumber(); 3111 if (!hits[num]) { 3112 hits[num] = true; 3113 state.push(getPdfObjectRelease(ref)); 3114 } 3115 continue; 3116 default: 3117 continue; 3118 } 3119 } 3120 else { 3121 objs = (Object[])current; 3122 if (objs[0] instanceof ArrayList) { 3123 ar = (ArrayList<PdfObject>)objs[0]; 3124 idx = ((Integer)objs[1]).intValue(); 3125 } 3126 else { 3127 keys = (PdfName[])objs[0]; 3128 dic = (PdfDictionary)objs[1]; 3129 idx = ((Integer)objs[2]).intValue(); 3130 } 3131 } 3132 if (ar != null) { 3133 for (int k = idx; k < ar.size(); ++k) { 3134 PdfObject v = ar.get(k); 3135 if (v.isIndirect()) { 3136 int num = ((PRIndirectReference)v).getNumber(); 3137 if (num >= xrefObj.size() || !partial && xrefObj.get(num) == null) { 3138 ar.set(k, PdfNull.PDFNULL); 3139 continue; 3140 } 3141 } 3142 if (objs == null) 3143 state.push(new Object[]{ar, Integer.valueOf(k + 1)}); 3144 else { 3145 objs[1] = Integer.valueOf(k + 1); 3146 state.push(objs); 3147 } 3148 state.push(v); 3149 break; 3150 } 3151 } 3152 else { 3153 for (int k = idx; k < keys.length; ++k) { 3154 PdfName key = keys[k]; 3155 PdfObject v = dic.get(key); 3156 if (v.isIndirect()) { 3157 int num = ((PRIndirectReference)v).getNumber(); 3158 if (num >= xrefObj.size() || !partial && xrefObj.get(num) == null) { 3159 dic.put(key, PdfNull.PDFNULL); 3160 continue; 3161 } 3162 } 3163 if (objs == null) 3164 state.push(new Object[]{keys, dic, Integer.valueOf(k + 1)}); 3165 else { 3166 objs[2] = Integer.valueOf(k + 1); 3167 state.push(objs); 3168 } 3169 state.push(v); 3170 break; 3171 } 3172 } 3173 } 3174 } 3175 3176 /** Removes all the unreachable objects. 3177 * @return the number of indirect objects removed 3178 */ 3179 public int removeUnusedObjects() { 3180 boolean hits[] = new boolean[xrefObj.size()]; 3181 removeUnusedNode(trailer, hits); 3182 int total = 0; 3183 if (partial) { 3184 for (int k = 1; k < hits.length; ++k) { 3185 if (!hits[k]) { 3186 xref[k * 2] = -1; 3187 xref[k * 2 + 1] = 0; 3188 xrefObj.set(k, null); 3189 ++total; 3190 } 3191 } 3192 } 3193 else { 3194 for (int k = 1; k < hits.length; ++k) { 3195 if (!hits[k]) { 3196 xrefObj.set(k, null); 3197 ++total; 3198 } 3199 } 3200 } 3201 return total; 3202 } 3203 3204 /** Gets a read-only version of <CODE>AcroFields</CODE>. 3205 * @return a read-only version of <CODE>AcroFields</CODE> 3206 */ 3207 public AcroFields getAcroFields() { 3208 return new AcroFields(this, null); 3209 } 3210 3211 /** 3212 * Gets the global document JavaScript. 3213 * @param file the document file 3214 * @throws IOException on error 3215 * @return the global document JavaScript 3216 */ 3217 public String getJavaScript(final RandomAccessFileOrArray file) throws IOException { 3218 PdfDictionary names = (PdfDictionary)getPdfObjectRelease(catalog.get(PdfName.NAMES)); 3219 if (names == null) 3220 return null; 3221 PdfDictionary js = (PdfDictionary)getPdfObjectRelease(names.get(PdfName.JAVASCRIPT)); 3222 if (js == null) 3223 return null; 3224 HashMap<String, PdfObject> jscript = PdfNameTree.readTree(js); 3225 String sortedNames[] = new String[jscript.size()]; 3226 sortedNames = jscript.keySet().toArray(sortedNames); 3227 Arrays.sort(sortedNames); 3228 StringBuffer buf = new StringBuffer(); 3229 for (int k = 0; k < sortedNames.length; ++k) { 3230 PdfDictionary j = (PdfDictionary)getPdfObjectRelease(jscript.get(sortedNames[k])); 3231 if (j == null) 3232 continue; 3233 PdfObject obj = getPdfObjectRelease(j.get(PdfName.JS)); 3234 if (obj != null) { 3235 if (obj.isString()) 3236 buf.append(((PdfString)obj).toUnicodeString()).append('\n'); 3237 else if (obj.isStream()) { 3238 byte bytes[] = getStreamBytes((PRStream)obj, file); 3239 if (bytes.length >= 2 && bytes[0] == (byte)254 && bytes[1] == (byte)255) 3240 buf.append(PdfEncodings.convertToString(bytes, PdfObject.TEXT_UNICODE)); 3241 else 3242 buf.append(PdfEncodings.convertToString(bytes, PdfObject.TEXT_PDFDOCENCODING)); 3243 buf.append('\n'); 3244 } 3245 } 3246 } 3247 return buf.toString(); 3248 } 3249 3250 /** 3251 * Gets the global document JavaScript. 3252 * @throws IOException on error 3253 * @return the global document JavaScript 3254 */ 3255 public String getJavaScript() throws IOException { 3256 RandomAccessFileOrArray rf = getSafeFile(); 3257 try { 3258 rf.reOpen(); 3259 return getJavaScript(rf); 3260 } 3261 finally { 3262 try{rf.close();}catch(Exception e){} 3263 } 3264 } 3265 3266 /** 3267 * Selects the pages to keep in the document. The pages are described as 3268 * ranges. The page ordering can be changed but 3269 * no page repetitions are allowed. Note that it may be very slow in partial mode. 3270 * @param ranges the comma separated ranges as described in {@link SequenceList} 3271 */ 3272 public void selectPages(final String ranges) { 3273 selectPages(SequenceList.expand(ranges, getNumberOfPages())); 3274 } 3275 3276 /** 3277 * Selects the pages to keep in the document. The pages are described as a 3278 * <CODE>List</CODE> of <CODE>Integer</CODE>. The page ordering can be changed but 3279 * no page repetitions are allowed. Note that it may be very slow in partial mode. 3280 * @param pagesToKeep the pages to keep in the document 3281 */ 3282 public void selectPages(final List<Integer> pagesToKeep) { 3283 pageRefs.selectPages(pagesToKeep); 3284 removeUnusedObjects(); 3285 } 3286 3287 /** Sets the viewer preferences as the sum of several constants. 3288 * @param preferences the viewer preferences 3289 * @see PdfViewerPreferences#setViewerPreferences 3290 */ 3291 public void setViewerPreferences(final int preferences) { 3292 this.viewerPreferences.setViewerPreferences(preferences); 3293 setViewerPreferences(this.viewerPreferences); 3294 } 3295 3296 /** Adds a viewer preference 3297 * @param key a key for a viewer preference 3298 * @param value a value for the viewer preference 3299 * @see PdfViewerPreferences#addViewerPreference 3300 */ 3301 public void addViewerPreference(final PdfName key, final PdfObject value) { 3302 this.viewerPreferences.addViewerPreference(key, value); 3303 setViewerPreferences(this.viewerPreferences); 3304 } 3305 3306 void setViewerPreferences(final PdfViewerPreferencesImp vp) { 3307 vp.addToCatalog(catalog); 3308 } 3309 3310 /** 3311 * Returns a bitset representing the PageMode and PageLayout viewer preferences. 3312 * Doesn't return any information about the ViewerPreferences dictionary. 3313 * @return an int that contains the Viewer Preferences. 3314 */ 3315 public int getSimpleViewerPreferences() { 3316 return PdfViewerPreferencesImp.getViewerPreferences(catalog).getPageLayoutAndMode(); 3317 } 3318 3319 /** 3320 * Getter for property appendable. 3321 * @return Value of property appendable. 3322 */ 3323 public boolean isAppendable() { 3324 return this.appendable; 3325 } 3326 3327 /** 3328 * Setter for property appendable. 3329 * @param appendable New value of property appendable. 3330 */ 3331 public void setAppendable(final boolean appendable) { 3332 this.appendable = appendable; 3333 if (appendable) 3334 getPdfObject(trailer.get(PdfName.ROOT)); 3335 } 3336 3337 /** 3338 * Getter for property newXrefType. 3339 * @return Value of property newXrefType. 3340 */ 3341 public boolean isNewXrefType() { 3342 return newXrefType; 3343 } 3344 3345 /** 3346 * Getter for property fileLength. 3347 * @return Value of property fileLength. 3348 */ 3349 public int getFileLength() { 3350 return fileLength; 3351 } 3352 3353 /** 3354 * Getter for property hybridXref. 3355 * @return Value of property hybridXref. 3356 */ 3357 public boolean isHybridXref() { 3358 return hybridXref; 3359 } 3360 3361 static class PageRefs { 3362 private final PdfReader reader; 3363 /** ArrayList with the indirect references to every page. Element 0 = page 1; 1 = page 2;... Not used for partial reading. */ 3364 private ArrayList<PRIndirectReference> refsn; 3365 /** The number of pages, updated only in case of partial reading. */ 3366 private int sizep; 3367 /** intHashtable that does the same thing as refsn in case of partial reading: major difference: not all the pages are read. */ 3368 private IntHashtable refsp; 3369 /** Page number of the last page that was read (partial reading only) */ 3370 private int lastPageRead = -1; 3371 /** stack to which pages dictionaries are pushed to keep track of the current page attributes */ 3372 private ArrayList<PdfDictionary> pageInh; 3373 private boolean keepPages; 3374 3375 private PageRefs(final PdfReader reader) throws IOException { 3376 this.reader = reader; 3377 if (reader.partial) { 3378 refsp = new IntHashtable(); 3379 PdfNumber npages = (PdfNumber)PdfReader.getPdfObjectRelease(reader.rootPages.get(PdfName.COUNT)); 3380 sizep = npages.intValue(); 3381 } 3382 else { 3383 readPages(); 3384 } 3385 } 3386 3387 PageRefs(final PageRefs other, final PdfReader reader) { 3388 this.reader = reader; 3389 this.sizep = other.sizep; 3390 if (other.refsn != null) { 3391 refsn = new ArrayList<PRIndirectReference>(other.refsn); 3392 for (int k = 0; k < refsn.size(); ++k) { 3393 refsn.set(k, (PRIndirectReference)duplicatePdfObject(refsn.get(k), reader)); 3394 } 3395 } 3396 else 3397 this.refsp = (IntHashtable)other.refsp.clone(); 3398 } 3399 3400 int size() { 3401 if (refsn != null) 3402 return refsn.size(); 3403 else 3404 return sizep; 3405 } 3406 3407 void readPages() throws IOException { 3408 if (refsn != null) 3409 return; 3410 refsp = null; 3411 refsn = new ArrayList<PRIndirectReference>(); 3412 pageInh = new ArrayList<PdfDictionary>(); 3413 iteratePages((PRIndirectReference)reader.catalog.get(PdfName.PAGES)); 3414 pageInh = null; 3415 reader.rootPages.put(PdfName.COUNT, new PdfNumber(refsn.size())); 3416 } 3417 3418 void reReadPages() throws IOException { 3419 refsn = null; 3420 readPages(); 3421 } 3422 3423 /** Gets the dictionary that represents a page. 3424 * @param pageNum the page number. 1 is the first 3425 * @return the page dictionary 3426 */ 3427 public PdfDictionary getPageN(final int pageNum) { 3428 PRIndirectReference ref = getPageOrigRef(pageNum); 3429 return (PdfDictionary)PdfReader.getPdfObject(ref); 3430 } 3431 3432 /** 3433 * @param pageNum 3434 * @return a dictionary object 3435 */ 3436 public PdfDictionary getPageNRelease(final int pageNum) { 3437 PdfDictionary page = getPageN(pageNum); 3438 releasePage(pageNum); 3439 return page; 3440 } 3441 3442 /** 3443 * @param pageNum 3444 * @return an indirect reference 3445 */ 3446 public PRIndirectReference getPageOrigRefRelease(final int pageNum) { 3447 PRIndirectReference ref = getPageOrigRef(pageNum); 3448 releasePage(pageNum); 3449 return ref; 3450 } 3451 3452 /** 3453 * Gets the page reference to this page. 3454 * @param pageNum the page number. 1 is the first 3455 * @return the page reference 3456 */ 3457 public PRIndirectReference getPageOrigRef(int pageNum) { 3458 try { 3459 --pageNum; 3460 if (pageNum < 0 || pageNum >= size()) 3461 return null; 3462 if (refsn != null) 3463 return refsn.get(pageNum); 3464 else { 3465 int n = refsp.get(pageNum); 3466 if (n == 0) { 3467 PRIndirectReference ref = getSinglePage(pageNum); 3468 if (reader.lastXrefPartial == -1) 3469 lastPageRead = -1; 3470 else 3471 lastPageRead = pageNum; 3472 reader.lastXrefPartial = -1; 3473 refsp.put(pageNum, ref.getNumber()); 3474 if (keepPages) 3475 lastPageRead = -1; 3476 return ref; 3477 } 3478 else { 3479 if (lastPageRead != pageNum) 3480 lastPageRead = -1; 3481 if (keepPages) 3482 lastPageRead = -1; 3483 return new PRIndirectReference(reader, n); 3484 } 3485 } 3486 } 3487 catch (Exception e) { 3488 throw new ExceptionConverter(e); 3489 } 3490 } 3491 3492 void keepPages() { 3493 if (refsp == null || keepPages) 3494 return; 3495 keepPages = true; 3496 refsp.clear(); 3497 } 3498 3499 /** 3500 * @param pageNum 3501 */ 3502 public void releasePage(int pageNum) { 3503 if (refsp == null) 3504 return; 3505 --pageNum; 3506 if (pageNum < 0 || pageNum >= size()) 3507 return; 3508 if (pageNum != lastPageRead) 3509 return; 3510 lastPageRead = -1; 3511 reader.lastXrefPartial = refsp.get(pageNum); 3512 reader.releaseLastXrefPartial(); 3513 refsp.remove(pageNum); 3514 } 3515 3516 /** 3517 * 3518 */ 3519 public void resetReleasePage() { 3520 if (refsp == null) 3521 return; 3522 lastPageRead = -1; 3523 } 3524 3525 void insertPage(int pageNum, final PRIndirectReference ref) { 3526 --pageNum; 3527 if (refsn != null) { 3528 if (pageNum >= refsn.size()) 3529 refsn.add(ref); 3530 else 3531 refsn.add(pageNum, ref); 3532 } 3533 else { 3534 ++sizep; 3535 lastPageRead = -1; 3536 if (pageNum >= size()) { 3537 refsp.put(size(), ref.getNumber()); 3538 } 3539 else { 3540 IntHashtable refs2 = new IntHashtable((refsp.size() + 1) * 2); 3541 for (Iterator<IntHashtable.Entry> it = refsp.getEntryIterator(); it.hasNext();) { 3542 IntHashtable.Entry entry = it.next(); 3543 int p = entry.getKey(); 3544 refs2.put(p >= pageNum ? p + 1 : p, entry.getValue()); 3545 } 3546 refs2.put(pageNum, ref.getNumber()); 3547 refsp = refs2; 3548 } 3549 } 3550 } 3551 3552 /** 3553 * Adds a PdfDictionary to the pageInh stack to keep track of the page attributes. 3554 * @param nodePages a Pages dictionary 3555 */ 3556 private void pushPageAttributes(final PdfDictionary nodePages) { 3557 PdfDictionary dic = new PdfDictionary(); 3558 if (!pageInh.isEmpty()) { 3559 dic.putAll(pageInh.get(pageInh.size() - 1)); 3560 } 3561 for (int k = 0; k < pageInhCandidates.length; ++k) { 3562 PdfObject obj = nodePages.get(pageInhCandidates[k]); 3563 if (obj != null) 3564 dic.put(pageInhCandidates[k], obj); 3565 } 3566 pageInh.add(dic); 3567 } 3568 3569 /** 3570 * Removes the last PdfDictionary that was pushed to the pageInh stack. 3571 */ 3572 private void popPageAttributes() { 3573 pageInh.remove(pageInh.size() - 1); 3574 } 3575 3576 private void iteratePages(final PRIndirectReference rpage) throws IOException { 3577 PdfDictionary page = (PdfDictionary)getPdfObject(rpage); 3578 PdfArray kidsPR = page.getAsArray(PdfName.KIDS); 3579 // reference to a leaf 3580 if (kidsPR == null) { 3581 page.put(PdfName.TYPE, PdfName.PAGE); 3582 PdfDictionary dic = pageInh.get(pageInh.size() - 1); 3583 PdfName key; 3584 for (Object element : dic.getKeys()) { 3585 key = (PdfName)element; 3586 if (page.get(key) == null) 3587 page.put(key, dic.get(key)); 3588 } 3589 if (page.get(PdfName.MEDIABOX) == null) { 3590 PdfArray arr = new PdfArray(new float[]{0,0,PageSize.LETTER.getRight(),PageSize.LETTER.getTop()}); 3591 page.put(PdfName.MEDIABOX, arr); 3592 } 3593 refsn.add(rpage); 3594 } 3595 // reference to a branch 3596 else { 3597 page.put(PdfName.TYPE, PdfName.PAGES); 3598 pushPageAttributes(page); 3599 for (int k = 0; k < kidsPR.size(); ++k){ 3600 PdfObject obj = kidsPR.getPdfObject(k); 3601 if (!obj.isIndirect()) { 3602 while (k < kidsPR.size()) 3603 kidsPR.remove(k); 3604 break; 3605 } 3606 iteratePages((PRIndirectReference)obj); 3607 } 3608 popPageAttributes(); 3609 } 3610 } 3611 3612 protected PRIndirectReference getSinglePage(final int n) { 3613 PdfDictionary acc = new PdfDictionary(); 3614 PdfDictionary top = reader.rootPages; 3615 int base = 0; 3616 while (true) { 3617 for (int k = 0; k < pageInhCandidates.length; ++k) { 3618 PdfObject obj = top.get(pageInhCandidates[k]); 3619 if (obj != null) 3620 acc.put(pageInhCandidates[k], obj); 3621 } 3622 PdfArray kids = (PdfArray)PdfReader.getPdfObjectRelease(top.get(PdfName.KIDS)); 3623 for (Iterator<PdfObject> it = kids.listIterator(); it.hasNext();) { 3624 PRIndirectReference ref = (PRIndirectReference)it.next(); 3625 PdfDictionary dic = (PdfDictionary)getPdfObject(ref); 3626 int last = reader.lastXrefPartial; 3627 PdfObject count = getPdfObjectRelease(dic.get(PdfName.COUNT)); 3628 reader.lastXrefPartial = last; 3629 int acn = 1; 3630 if (count != null && count.type() == PdfObject.NUMBER) 3631 acn = ((PdfNumber)count).intValue(); 3632 if (n < base + acn) { 3633 if (count == null) { 3634 dic.mergeDifferent(acc); 3635 return ref; 3636 } 3637 reader.releaseLastXrefPartial(); 3638 top = dic; 3639 break; 3640 } 3641 reader.releaseLastXrefPartial(); 3642 base += acn; 3643 } 3644 } 3645 } 3646 3647 private void selectPages(final List<Integer> pagesToKeep) { 3648 IntHashtable pg = new IntHashtable(); 3649 ArrayList<Integer> finalPages = new ArrayList<Integer>(); 3650 int psize = size(); 3651 for (Integer pi : pagesToKeep) { 3652 int p = pi.intValue(); 3653 if (p >= 1 && p <= psize && pg.put(p, 1) == 0) 3654 finalPages.add(pi); 3655 } 3656 if (reader.partial) { 3657 for (int k = 1; k <= psize; ++k) { 3658 getPageOrigRef(k); 3659 resetReleasePage(); 3660 } 3661 } 3662 PRIndirectReference parent = (PRIndirectReference)reader.catalog.get(PdfName.PAGES); 3663 PdfDictionary topPages = (PdfDictionary)PdfReader.getPdfObject(parent); 3664 ArrayList<PRIndirectReference> newPageRefs = new ArrayList<PRIndirectReference>(finalPages.size()); 3665 PdfArray kids = new PdfArray(); 3666 for (int k = 0; k < finalPages.size(); ++k) { 3667 int p = finalPages.get(k).intValue(); 3668 PRIndirectReference pref = getPageOrigRef(p); 3669 resetReleasePage(); 3670 kids.add(pref); 3671 newPageRefs.add(pref); 3672 getPageN(p).put(PdfName.PARENT, parent); 3673 } 3674 AcroFields af = reader.getAcroFields(); 3675 boolean removeFields = af.getFields().size() > 0; 3676 for (int k = 1; k <= psize; ++k) { 3677 if (!pg.containsKey(k)) { 3678 if (removeFields) 3679 af.removeFieldsFromPage(k); 3680 PRIndirectReference pref = getPageOrigRef(k); 3681 int nref = pref.getNumber(); 3682 reader.xrefObj.set(nref, null); 3683 if (reader.partial) { 3684 reader.xref[nref * 2] = -1; 3685 reader.xref[nref * 2 + 1] = 0; 3686 } 3687 } 3688 } 3689 topPages.put(PdfName.COUNT, new PdfNumber(finalPages.size())); 3690 topPages.put(PdfName.KIDS, kids); 3691 refsp = null; 3692 refsn = newPageRefs; 3693 } 3694 } 3695 3696 PdfIndirectReference getCryptoRef() { 3697 if (cryptoRef == null) 3698 return null; 3699 return new PdfIndirectReference(0, cryptoRef.getNumber(), cryptoRef.getGeneration()); 3700 } 3701 3702 /** 3703 * Removes any usage rights that this PDF may have. Only Adobe can grant usage rights 3704 * and any PDF modification with iText will invalidate them. Invalidated usage rights may 3705 * confuse Acrobat and it's advisable to remove them altogether. 3706 */ 3707 public void removeUsageRights() { 3708 PdfDictionary perms = catalog.getAsDict(PdfName.PERMS); 3709 if (perms == null) 3710 return; 3711 perms.remove(PdfName.UR); 3712 perms.remove(PdfName.UR3); 3713 if (perms.size() == 0) 3714 catalog.remove(PdfName.PERMS); 3715 } 3716 3717 /** 3718 * Gets the certification level for this document. The return values can be <code>PdfSignatureAppearance.NOT_CERTIFIED</code>, 3719 * <code>PdfSignatureAppearance.CERTIFIED_NO_CHANGES_ALLOWED</code>, 3720 * <code>PdfSignatureAppearance.CERTIFIED_FORM_FILLING</code> and 3721 * <code>PdfSignatureAppearance.CERTIFIED_FORM_FILLING_AND_ANNOTATIONS</code>. 3722 * <p> 3723 * No signature validation is made, use the methods available for that in <CODE>AcroFields</CODE>. 3724 * </p> 3725 * @return gets the certification level for this document 3726 */ 3727 public int getCertificationLevel() { 3728 PdfDictionary dic = catalog.getAsDict(PdfName.PERMS); 3729 if (dic == null) 3730 return PdfSignatureAppearance.NOT_CERTIFIED; 3731 dic = dic.getAsDict(PdfName.DOCMDP); 3732 if (dic == null) 3733 return PdfSignatureAppearance.NOT_CERTIFIED; 3734 PdfArray arr = dic.getAsArray(PdfName.REFERENCE); 3735 if (arr == null || arr.size() == 0) 3736 return PdfSignatureAppearance.NOT_CERTIFIED; 3737 dic = arr.getAsDict(0); 3738 if (dic == null) 3739 return PdfSignatureAppearance.NOT_CERTIFIED; 3740 dic = dic.getAsDict(PdfName.TRANSFORMPARAMS); 3741 if (dic == null) 3742 return PdfSignatureAppearance.NOT_CERTIFIED; 3743 PdfNumber p = dic.getAsNumber(PdfName.P); 3744 if (p == null) 3745 return PdfSignatureAppearance.NOT_CERTIFIED; 3746 return p.intValue(); 3747 } 3748 3749 /** 3750 * Checks if the document was opened with the owner password so that the end application 3751 * can decide what level of access restrictions to apply. If the document is not encrypted 3752 * it will return <CODE>true</CODE>. 3753 * @return <CODE>true</CODE> if the document was opened with the owner password or if it's not encrypted, 3754 * <CODE>false</CODE> if the document was opened with the user password 3755 */ 3756 public final boolean isOpenedWithFullPermissions() { 3757 return !encrypted || ownerPasswordUsed || unethicalreading; 3758 } 3759 3760 /** 3761 * @return the crypto mode, or -1 of none 3762 */ 3763 public int getCryptoMode() { 3764 if (decrypt == null) 3765 return -1; 3766 else 3767 return decrypt.getCryptoMode(); 3768 } 3769 3770 /** 3771 * @return true if the metadata is encrypted. 3772 */ 3773 public boolean isMetadataEncrypted() { 3774 if (decrypt == null) 3775 return false; 3776 else 3777 return decrypt.isMetadataEncrypted(); 3778 } 3779 3780 /** 3781 * @return byte of computed user password, or null if not encrypted or no ownerPassword is used. 3782 */ 3783 public byte[] computeUserPassword() { 3784 if (!encrypted || !ownerPasswordUsed) return null; 3785 return decrypt.computeUserPassword(password); 3786 } 3787}