001/*
002 * $Id: PdfContentParser.java 4784 2011-03-15 08:33:00Z blowagie $
003 *
004 * This file is part of the iText (R) project.
005 * Copyright (c) 1998-2011 1T3XT BVBA
006 * Authors: Bruno Lowagie, Paulo Soares, et al.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU Affero General Public License version 3
010 * as published by the Free Software Foundation with the addition of the
011 * following permission added to Section 15 as permitted in Section 7(a):
012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT,
013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS.
014 *
015 * This program is distributed in the hope that it will be useful, but
016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
017 * or FITNESS FOR A PARTICULAR PURPOSE.
018 * See the GNU Affero General Public License for more details.
019 * You should have received a copy of the GNU Affero General Public License
020 * along with this program; if not, see http://www.gnu.org/licenses or write to
021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
022 * Boston, MA, 02110-1301 USA, or download the license from the following URL:
023 * http://itextpdf.com/terms-of-use/
024 *
025 * The interactive user interfaces in modified source and object code versions
026 * of this program must display Appropriate Legal Notices, as required under
027 * Section 5 of the GNU Affero General Public License.
028 *
029 * In accordance with Section 7(b) of the GNU Affero General Public License,
030 * a covered work must retain the producer line in every PDF that is created
031 * or manipulated using iText.
032 *
033 * You can be released from the requirements of the license by purchasing
034 * a commercial license. Buying such a license is mandatory as soon as you
035 * develop commercial activities involving the iText software without
036 * disclosing the source code of your own applications.
037 * These activities include: offering paid services to customers as an ASP,
038 * serving PDFs on the fly in a web application, shipping iText with a closed
039 * source product.
040 *
041 * For more information, please contact iText Software Corp. at this
042 * address: sales@itextpdf.com
043 */
044package com.itextpdf.text.pdf;
045
046import java.io.IOException;
047import java.util.ArrayList;
048
049import com.itextpdf.text.error_messages.MessageLocalization;
050import com.itextpdf.text.pdf.PRTokeniser.TokenType;
051/**
052 * Parses the page or template content.
053 * @author Paulo Soares
054 */
055public class PdfContentParser {
056
057    /**
058     * Commands have this type.
059     */
060    public static final int COMMAND_TYPE = 200;
061    /**
062     * Holds value of property tokeniser.
063     */
064    private PRTokeniser tokeniser;
065
066    /**
067     * Creates a new instance of PdfContentParser
068     * @param tokeniser the tokeniser with the content
069     */
070    public PdfContentParser(PRTokeniser tokeniser) {
071        this.tokeniser = tokeniser;
072    }
073
074    /**
075     * Parses a single command from the content. Each command is output as an array of arguments
076     * having the command itself as the last element. The returned array will be empty if the
077     * end of content was reached.
078     * @param ls an <CODE>ArrayList</CODE> to use. It will be cleared before using. If it's
079     * <CODE>null</CODE> will create a new <CODE>ArrayList</CODE>
080     * @return the same <CODE>ArrayList</CODE> given as argument or a new one
081     * @throws IOException on error
082     */
083    public ArrayList<PdfObject> parse(ArrayList<PdfObject> ls) throws IOException {
084        if (ls == null)
085            ls = new ArrayList<PdfObject>();
086        else
087            ls.clear();
088        PdfObject ob = null;
089        while ((ob = readPRObject()) != null) {
090            ls.add(ob);
091            if (ob.type() == COMMAND_TYPE)
092                break;
093        }
094        return ls;
095    }
096
097    /**
098     * Gets the tokeniser.
099     * @return the tokeniser.
100     */
101    public PRTokeniser getTokeniser() {
102        return this.tokeniser;
103    }
104
105    /**
106     * Sets the tokeniser.
107     * @param tokeniser the tokeniser
108     */
109    public void setTokeniser(PRTokeniser tokeniser) {
110        this.tokeniser = tokeniser;
111    }
112
113    /**
114     * Reads a dictionary. The tokeniser must be positioned past the "&lt;&lt;" token.
115     * @return the dictionary
116     * @throws IOException on error
117     */
118    public PdfDictionary readDictionary() throws IOException {
119        PdfDictionary dic = new PdfDictionary();
120        while (true) {
121            if (!nextValidToken())
122                throw new IOException(MessageLocalization.getComposedMessage("unexpected.end.of.file"));
123                if (tokeniser.getTokenType() == TokenType.END_DIC)
124                    break;
125                if (tokeniser.getTokenType() != TokenType.NAME)
126                    throw new IOException(MessageLocalization.getComposedMessage("dictionary.key.is.not.a.name"));
127                PdfName name = new PdfName(tokeniser.getStringValue(), false);
128                PdfObject obj = readPRObject();
129                int type = obj.type();
130                if (-type == TokenType.END_DIC.ordinal())
131                    throw new IOException(MessageLocalization.getComposedMessage("unexpected.gt.gt"));
132                if (-type == TokenType.END_ARRAY.ordinal())
133                    throw new IOException(MessageLocalization.getComposedMessage("unexpected.close.bracket"));
134                dic.put(name, obj);
135        }
136        return dic;
137    }
138
139    /**
140     * Reads an array. The tokeniser must be positioned past the "[" token.
141     * @return an array
142     * @throws IOException on error
143     */
144    public PdfArray readArray() throws IOException {
145        PdfArray array = new PdfArray();
146        while (true) {
147            PdfObject obj = readPRObject();
148            int type = obj.type();
149            if (-type == TokenType.END_ARRAY.ordinal())
150                break;
151            if (-type == TokenType.END_DIC.ordinal())
152                throw new IOException(MessageLocalization.getComposedMessage("unexpected.gt.gt"));
153            array.add(obj);
154        }
155        return array;
156    }
157
158    /**
159     * Reads a pdf object.
160     * @return the pdf object
161     * @throws IOException on error
162     */
163    public PdfObject readPRObject() throws IOException {
164        if (!nextValidToken())
165            return null;
166        TokenType type = tokeniser.getTokenType();
167        switch (type) {
168            case START_DIC: {
169                PdfDictionary dic = readDictionary();
170                return dic;
171            }
172            case START_ARRAY:
173                return readArray();
174            case STRING:
175                PdfString str = new PdfString(tokeniser.getStringValue(), null).setHexWriting(tokeniser.isHexString());
176                return str;
177            case NAME:
178                return new PdfName(tokeniser.getStringValue(), false);
179            case NUMBER:
180                return new PdfNumber(tokeniser.getStringValue());
181            case OTHER:
182                return new PdfLiteral(COMMAND_TYPE, tokeniser.getStringValue());
183            default:
184                return new PdfLiteral(-type.ordinal(), tokeniser.getStringValue());
185        }
186    }
187
188    /**
189     * Reads the next token skipping over the comments.
190     * @return <CODE>true</CODE> if a token was read, <CODE>false</CODE> if the end of content was reached
191     * @throws IOException on error
192     */
193    public boolean nextValidToken() throws IOException {
194        while (tokeniser.nextToken()) {
195            if (tokeniser.getTokenType() == TokenType.COMMENT)
196                continue;
197            return true;
198        }
199        return false;
200    }
201}