001/*
002 * Copyright 2001 (C) MetaStuff, Ltd. All Rights Reserved.
003 * 
004 * This software is open source. 
005 * See the bottom of this file for the licence.
006 * 
007 * $Id: XPPReader.java,v 1.1 2001/12/19 09:51:39 jstrachan Exp $
008 */
009
010package org.dom4j.io;
011
012import java.io.BufferedReader;
013import java.io.File;
014import java.io.FileReader;
015import java.io.FileNotFoundException;
016import java.io.InputStream;
017import java.io.InputStreamReader;
018import java.io.IOException;
019import java.io.Reader;
020import java.net.MalformedURLException;
021import java.net.URL;
022import java.util.ArrayList;
023
024import org.dom4j.Branch;
025import org.dom4j.Document;
026import org.dom4j.DocumentException;
027import org.dom4j.DocumentFactory;
028import org.dom4j.Element;
029import org.dom4j.ElementHandler;
030import org.dom4j.QName;
031import org.dom4j.xpp.ProxyXmlStartTag;
032
033import org.xml.sax.InputSource;
034
035import org.gjt.xpp.XmlEndTag;
036import org.gjt.xpp.XmlPullParser;
037import org.gjt.xpp.XmlPullParserFactory;
038import org.gjt.xpp.XmlPullParserException;
039import org.gjt.xpp.XmlStartTag;
040
041/** <p><code>XPPReader</code> is a Reader of DOM4J documents that 
042  * uses the fast 
043  * <a href="http://www.extreme.indiana.edu/soap/xpp/">XML Pull Parser 2.x</a>.
044  * It does not currently support comments, CDATA or ProcessingInstructions or
045  * validation but it is very fast for use in SOAP style environments.</p>
046  *
047  * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
048  * @version $Revision: 1.1 $
049  */
050public class XPPReader {
051
052    /** <code>DocumentFactory</code> used to create new document objects */
053    private DocumentFactory factory;
054    
055    /** <code>XmlPullParser</code> used to parse XML */
056    private XmlPullParser xppParser;
057    
058    /** <code>XmlPullParser</code> used to parse XML */
059    private XmlPullParserFactory xppFactory;
060    
061    /** DispatchHandler to call when each <code>Element</code> is encountered */
062    private DispatchHandler dispatchHandler;
063 
064        
065    
066    public XPPReader() {
067    }
068
069    public XPPReader(DocumentFactory factory) {
070        this.factory = factory;
071    }
072
073    
074    
075        
076    /** <p>Reads a Document from the given <code>File</code></p>
077      *
078      * @param file is the <code>File</code> to read from.
079      * @return the newly created Document instance
080      * @throws DocumentException if an error occurs during parsing.
081      * @throws MalformedURLException if a URL could not be made for the given File
082      */
083    public Document read(File file) throws DocumentException, IOException, XmlPullParserException {
084        String systemID = file.getAbsolutePath();
085        return read( new BufferedReader( new FileReader( file ) ), systemID );
086    }
087    
088    /** <p>Reads a Document from the given <code>URL</code></p>
089      *
090      * @param url <code>URL</code> to read from.
091      * @return the newly created Document instance
092      * @throws DocumentException if an error occurs during parsing.
093      */
094    public Document read(URL url) throws DocumentException, IOException, XmlPullParserException {
095        String systemID = url.toExternalForm();
096        return read( createReader( url.openStream() ), systemID);
097    }
098    
099    /** <p>Reads a Document from the given URL or filename.</p>
100      *
101      * <p>
102      * If the systemID contains a <code>':'</code> character then it is
103      * assumed to be a URL otherwise its assumed to be a file name.
104      * If you want finer grained control over this mechansim then please
105      * explicitly pass in either a {@link URL} or a {@link File} instance
106      * instead of a {@link String} to denote the source of the document.
107      * </p>
108      *
109      * @param systemID is a URL for a document or a file name.
110      * @return the newly created Document instance
111      * @throws DocumentException if an error occurs during parsing.
112      * @throws MalformedURLException if a URL could not be made for the given File
113      */
114    public Document read(String systemID) throws DocumentException, IOException, XmlPullParserException {
115        if ( systemID.indexOf( ':' ) >= 0 ) {
116            // lets assume its a URL
117            return read(new URL(systemID));
118        }
119        else {
120            // lets assume that we are given a file name
121            return read( new File(systemID) );
122        }
123    }
124
125    /** <p>Reads a Document from the given stream</p>
126      *
127      * @param in <code>InputStream</code> to read from.
128      * @return the newly created Document instance
129      * @throws DocumentException if an error occurs during parsing.
130      */
131    public Document read(InputStream in) throws DocumentException, IOException, XmlPullParserException {
132        return read( createReader( in ) );
133    }
134
135    /** <p>Reads a Document from the given <code>Reader</code></p>
136      *
137      * @param reader is the reader for the input
138      * @return the newly created Document instance
139      * @throws DocumentException if an error occurs during parsing.
140      */
141    public Document read(Reader reader) throws DocumentException, IOException, XmlPullParserException {
142        getXPPParser().setInput(reader);
143        return parseDocument();
144    }
145
146    /** <p>Reads a Document from the given array of characters</p>
147      *
148      * @param text is the text to parse
149      * @return the newly created Document instance
150      * @throws DocumentException if an error occurs during parsing.
151      */
152    public Document read(char[] text) throws DocumentException, IOException, XmlPullParserException {
153        getXPPParser().setInput(text);
154        return parseDocument();
155    }
156
157    /** <p>Reads a Document from the given stream</p>
158      *
159      * @param in <code>InputStream</code> to read from.
160      * @param systemID is the URI for the input
161      * @return the newly created Document instance
162      * @throws DocumentException if an error occurs during parsing.
163      */
164    public Document read(InputStream in, String systemID) throws DocumentException, IOException, XmlPullParserException {
165        return read( createReader( in ), systemID );
166    }
167
168    /** <p>Reads a Document from the given <code>Reader</code></p>
169      *
170      * @param reader is the reader for the input
171      * @param systemID is the URI for the input
172      * @return the newly created Document instance
173      * @throws DocumentException if an error occurs during parsing.
174      */
175    public Document read(Reader reader, String systemID) throws DocumentException, IOException, XmlPullParserException {
176        Document document = read( reader );
177        document.setName( systemID );
178        return document;
179    }
180
181    
182    // Properties
183    //-------------------------------------------------------------------------                
184
185    public XmlPullParser getXPPParser() throws XmlPullParserException {
186        if ( xppParser == null ) {
187            xppParser = getXPPFactory().newPullParser();
188        }
189        return xppParser;
190    }
191    
192    public XmlPullParserFactory getXPPFactory() throws XmlPullParserException {
193        if ( xppFactory == null ) {
194            xppFactory = XmlPullParserFactory.newInstance();
195        }
196        return xppFactory;
197    }
198
199    public void setXPPFactory(XmlPullParserFactory xppFactory) {
200        this.xppFactory = xppFactory;
201    }
202    
203    /** @return the <code>DocumentFactory</code> used to create document objects
204      */
205    public DocumentFactory getDocumentFactory() {
206        if (factory == null) {
207            factory = DocumentFactory.getInstance();
208        }
209        return factory;
210    }
211
212    /** <p>This sets the <code>DocumentFactory</code> used to create new documents.
213      * This method allows the building of custom DOM4J tree objects to be implemented
214      * easily using a custom derivation of {@link DocumentFactory}</p>
215      *
216      * @param factory <code>DocumentFactory</code> used to create DOM4J objects
217      */
218    public void setDocumentFactory(DocumentFactory factory) {
219        this.factory = factory;
220    }
221
222    
223    /** Adds the <code>ElementHandler</code> to be called when the 
224      * specified path is encounted.
225      *
226      * @param path is the path to be handled
227      * @param handler is the <code>ElementHandler</code> to be called
228      * by the event based processor.
229      */
230    public void addHandler(String path, ElementHandler handler) {
231        getDispatchHandler().addHandler(path, handler);   
232    }
233    
234    /** Removes the <code>ElementHandler</code> from the event based
235      * processor, for the specified path.
236      *
237      * @param path is the path to remove the <code>ElementHandler</code> for.
238      */
239    public void removeHandler(String path) {
240        getDispatchHandler().removeHandler(path);   
241    }
242    
243    /** When multiple <code>ElementHandler</code> instances have been 
244      * registered, this will set a default <code>ElementHandler</code>
245      * to be called for any path which does <b>NOT</b> have a handler
246      * registered.
247      * @param handler is the <code>ElementHandler</code> to be called
248      * by the event based processor.
249      */
250    public void setDefaultHandler(ElementHandler handler) {
251        getDispatchHandler().setDefaultHandler(handler);   
252    }
253    
254    // Implementation methods    
255    //-------------------------------------------------------------------------                    
256    protected Document parseDocument() throws DocumentException, IOException, XmlPullParserException {
257        Document document = getDocumentFactory().createDocument();
258        Element parent = null;
259        XmlPullParser xppParser = getXPPParser();
260        xppParser.setNamespaceAware(true);
261        ProxyXmlStartTag startTag = new ProxyXmlStartTag();
262        XmlEndTag endTag = xppFactory.newEndTag();
263        while (true) {
264            int type = xppParser.next();
265            switch (type) {
266                case XmlPullParser.END_DOCUMENT: {
267                    return document;
268                }
269                case XmlPullParser.START_TAG: {
270                    xppParser.readStartTag( startTag );
271                    Element newElement = startTag.getElement();
272                    if ( parent != null ) {
273                        parent.add( newElement );
274                    }
275                    else {
276                        document.add( newElement );
277                    }
278                    parent = newElement;
279                    break;
280                }
281                case XmlPullParser.END_TAG: {
282                    xppParser.readEndTag( endTag );
283                    if (parent != null) {
284                        parent = parent.getParent();
285                    }
286                    break;
287                }
288                case XmlPullParser.CONTENT: {
289                    String text = xppParser.readContent();
290                    if ( parent != null ) {
291                        parent.addText( text );
292                    }
293                    else {
294                        throw new DocumentException( "Cannot have text content outside of the root document" );
295                    }
296                    break;
297                }
298                default: {
299                    throw new DocumentException( "Error: unknown PullParser type: " + type );
300                }
301            }
302        }
303    }
304
305    protected DispatchHandler getDispatchHandler() {
306        if (dispatchHandler == null) {
307            dispatchHandler = new DispatchHandler();
308        }
309        return dispatchHandler;   
310    }
311    
312    protected void setDispatchHandler(DispatchHandler dispatchHandler) {
313        this.dispatchHandler = dispatchHandler;
314    }
315    
316    /** Factory method to create a Reader from the given InputStream.
317     */
318    protected Reader createReader(InputStream in) throws IOException {
319        return new BufferedReader( new InputStreamReader( in ) );
320    }    
321}
322
323
324
325
326/*
327 * Redistribution and use of this software and associated documentation
328 * ("Software"), with or without modification, are permitted provided
329 * that the following conditions are met:
330 *
331 * 1. Redistributions of source code must retain copyright
332 *    statements and notices.  Redistributions must also contain a
333 *    copy of this document.
334 *
335 * 2. Redistributions in binary form must reproduce the
336 *    above copyright notice, this list of conditions and the
337 *    following disclaimer in the documentation and/or other
338 *    materials provided with the distribution.
339 *
340 * 3. The name "DOM4J" must not be used to endorse or promote
341 *    products derived from this Software without prior written
342 *    permission of MetaStuff, Ltd.  For written permission,
343 *    please contact dom4j-info@metastuff.com.
344 *
345 * 4. Products derived from this Software may not be called "DOM4J"
346 *    nor may "DOM4J" appear in their names without prior written
347 *    permission of MetaStuff, Ltd. DOM4J is a registered
348 *    trademark of MetaStuff, Ltd.
349 *
350 * 5. Due credit should be given to the DOM4J Project
351 *    (http://dom4j.org/).
352 *
353 * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS
354 * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
355 * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
356 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
357 * METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
358 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
359 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
360 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
361 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
362 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
363 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
364 * OF THE POSSIBILITY OF SUCH DAMAGE.
365 *
366 * Copyright 2001 (C) MetaStuff, Ltd. All Rights Reserved.
367 *
368 * $Id: XPPReader.java,v 1.1 2001/12/19 09:51:39 jstrachan Exp $
369 */