001/* 002 * $Id: PdfContentParser.java 4784 2011-03-15 08:33:00Z blowagie $ 003 * 004 * This file is part of the iText (R) project. 005 * Copyright (c) 1998-2011 1T3XT BVBA 006 * Authors: Bruno Lowagie, Paulo Soares, et al. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Affero General Public License version 3 010 * as published by the Free Software Foundation with the addition of the 011 * following permission added to Section 15 as permitted in Section 7(a): 012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT, 013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS. 014 * 015 * This program is distributed in the hope that it will be useful, but 016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 017 * or FITNESS FOR A PARTICULAR PURPOSE. 018 * See the GNU Affero General Public License for more details. 019 * You should have received a copy of the GNU Affero General Public License 020 * along with this program; if not, see http://www.gnu.org/licenses or write to 021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 022 * Boston, MA, 02110-1301 USA, or download the license from the following URL: 023 * http://itextpdf.com/terms-of-use/ 024 * 025 * The interactive user interfaces in modified source and object code versions 026 * of this program must display Appropriate Legal Notices, as required under 027 * Section 5 of the GNU Affero General Public License. 028 * 029 * In accordance with Section 7(b) of the GNU Affero General Public License, 030 * a covered work must retain the producer line in every PDF that is created 031 * or manipulated using iText. 032 * 033 * You can be released from the requirements of the license by purchasing 034 * a commercial license. Buying such a license is mandatory as soon as you 035 * develop commercial activities involving the iText software without 036 * disclosing the source code of your own applications. 037 * These activities include: offering paid services to customers as an ASP, 038 * serving PDFs on the fly in a web application, shipping iText with a closed 039 * source product. 040 * 041 * For more information, please contact iText Software Corp. at this 042 * address: sales@itextpdf.com 043 */ 044package com.itextpdf.text.pdf; 045 046import java.io.IOException; 047import java.util.ArrayList; 048 049import com.itextpdf.text.error_messages.MessageLocalization; 050import com.itextpdf.text.pdf.PRTokeniser.TokenType; 051/** 052 * Parses the page or template content. 053 * @author Paulo Soares 054 */ 055public class PdfContentParser { 056 057 /** 058 * Commands have this type. 059 */ 060 public static final int COMMAND_TYPE = 200; 061 /** 062 * Holds value of property tokeniser. 063 */ 064 private PRTokeniser tokeniser; 065 066 /** 067 * Creates a new instance of PdfContentParser 068 * @param tokeniser the tokeniser with the content 069 */ 070 public PdfContentParser(PRTokeniser tokeniser) { 071 this.tokeniser = tokeniser; 072 } 073 074 /** 075 * Parses a single command from the content. Each command is output as an array of arguments 076 * having the command itself as the last element. The returned array will be empty if the 077 * end of content was reached. 078 * @param ls an <CODE>ArrayList</CODE> to use. It will be cleared before using. If it's 079 * <CODE>null</CODE> will create a new <CODE>ArrayList</CODE> 080 * @return the same <CODE>ArrayList</CODE> given as argument or a new one 081 * @throws IOException on error 082 */ 083 public ArrayList<PdfObject> parse(ArrayList<PdfObject> ls) throws IOException { 084 if (ls == null) 085 ls = new ArrayList<PdfObject>(); 086 else 087 ls.clear(); 088 PdfObject ob = null; 089 while ((ob = readPRObject()) != null) { 090 ls.add(ob); 091 if (ob.type() == COMMAND_TYPE) 092 break; 093 } 094 return ls; 095 } 096 097 /** 098 * Gets the tokeniser. 099 * @return the tokeniser. 100 */ 101 public PRTokeniser getTokeniser() { 102 return this.tokeniser; 103 } 104 105 /** 106 * Sets the tokeniser. 107 * @param tokeniser the tokeniser 108 */ 109 public void setTokeniser(PRTokeniser tokeniser) { 110 this.tokeniser = tokeniser; 111 } 112 113 /** 114 * Reads a dictionary. The tokeniser must be positioned past the "<<" token. 115 * @return the dictionary 116 * @throws IOException on error 117 */ 118 public PdfDictionary readDictionary() throws IOException { 119 PdfDictionary dic = new PdfDictionary(); 120 while (true) { 121 if (!nextValidToken()) 122 throw new IOException(MessageLocalization.getComposedMessage("unexpected.end.of.file")); 123 if (tokeniser.getTokenType() == TokenType.END_DIC) 124 break; 125 if (tokeniser.getTokenType() != TokenType.NAME) 126 throw new IOException(MessageLocalization.getComposedMessage("dictionary.key.is.not.a.name")); 127 PdfName name = new PdfName(tokeniser.getStringValue(), false); 128 PdfObject obj = readPRObject(); 129 int type = obj.type(); 130 if (-type == TokenType.END_DIC.ordinal()) 131 throw new IOException(MessageLocalization.getComposedMessage("unexpected.gt.gt")); 132 if (-type == TokenType.END_ARRAY.ordinal()) 133 throw new IOException(MessageLocalization.getComposedMessage("unexpected.close.bracket")); 134 dic.put(name, obj); 135 } 136 return dic; 137 } 138 139 /** 140 * Reads an array. The tokeniser must be positioned past the "[" token. 141 * @return an array 142 * @throws IOException on error 143 */ 144 public PdfArray readArray() throws IOException { 145 PdfArray array = new PdfArray(); 146 while (true) { 147 PdfObject obj = readPRObject(); 148 int type = obj.type(); 149 if (-type == TokenType.END_ARRAY.ordinal()) 150 break; 151 if (-type == TokenType.END_DIC.ordinal()) 152 throw new IOException(MessageLocalization.getComposedMessage("unexpected.gt.gt")); 153 array.add(obj); 154 } 155 return array; 156 } 157 158 /** 159 * Reads a pdf object. 160 * @return the pdf object 161 * @throws IOException on error 162 */ 163 public PdfObject readPRObject() throws IOException { 164 if (!nextValidToken()) 165 return null; 166 TokenType type = tokeniser.getTokenType(); 167 switch (type) { 168 case START_DIC: { 169 PdfDictionary dic = readDictionary(); 170 return dic; 171 } 172 case START_ARRAY: 173 return readArray(); 174 case STRING: 175 PdfString str = new PdfString(tokeniser.getStringValue(), null).setHexWriting(tokeniser.isHexString()); 176 return str; 177 case NAME: 178 return new PdfName(tokeniser.getStringValue(), false); 179 case NUMBER: 180 return new PdfNumber(tokeniser.getStringValue()); 181 case OTHER: 182 return new PdfLiteral(COMMAND_TYPE, tokeniser.getStringValue()); 183 default: 184 return new PdfLiteral(-type.ordinal(), tokeniser.getStringValue()); 185 } 186 } 187 188 /** 189 * Reads the next token skipping over the comments. 190 * @return <CODE>true</CODE> if a token was read, <CODE>false</CODE> if the end of content was reached 191 * @throws IOException on error 192 */ 193 public boolean nextValidToken() throws IOException { 194 while (tokeniser.nextToken()) { 195 if (tokeniser.getTokenType() == TokenType.COMMENT) 196 continue; 197 return true; 198 } 199 return false; 200 } 201}