001/* 002 * Copyright 2001 (C) MetaStuff, Ltd. All Rights Reserved. 003 * 004 * This software is open source. 005 * See the bottom of this file for the licence. 006 * 007 * $Id: XPPReader.java,v 1.1 2001/12/19 09:51:39 jstrachan Exp $ 008 */ 009 010package org.dom4j.io; 011 012import java.io.BufferedReader; 013import java.io.File; 014import java.io.FileReader; 015import java.io.FileNotFoundException; 016import java.io.InputStream; 017import java.io.InputStreamReader; 018import java.io.IOException; 019import java.io.Reader; 020import java.net.MalformedURLException; 021import java.net.URL; 022import java.util.ArrayList; 023 024import org.dom4j.Branch; 025import org.dom4j.Document; 026import org.dom4j.DocumentException; 027import org.dom4j.DocumentFactory; 028import org.dom4j.Element; 029import org.dom4j.ElementHandler; 030import org.dom4j.QName; 031import org.dom4j.xpp.ProxyXmlStartTag; 032 033import org.xml.sax.InputSource; 034 035import org.gjt.xpp.XmlEndTag; 036import org.gjt.xpp.XmlPullParser; 037import org.gjt.xpp.XmlPullParserFactory; 038import org.gjt.xpp.XmlPullParserException; 039import org.gjt.xpp.XmlStartTag; 040 041/** <p><code>XPPReader</code> is a Reader of DOM4J documents that 042 * uses the fast 043 * <a href="http://www.extreme.indiana.edu/soap/xpp/">XML Pull Parser 2.x</a>. 044 * It does not currently support comments, CDATA or ProcessingInstructions or 045 * validation but it is very fast for use in SOAP style environments.</p> 046 * 047 * @author <a href="mailto:jstrachan@apache.org">James Strachan</a> 048 * @version $Revision: 1.1 $ 049 */ 050public class XPPReader { 051 052 /** <code>DocumentFactory</code> used to create new document objects */ 053 private DocumentFactory factory; 054 055 /** <code>XmlPullParser</code> used to parse XML */ 056 private XmlPullParser xppParser; 057 058 /** <code>XmlPullParser</code> used to parse XML */ 059 private XmlPullParserFactory xppFactory; 060 061 /** DispatchHandler to call when each <code>Element</code> is encountered */ 062 private DispatchHandler dispatchHandler; 063 064 065 066 public XPPReader() { 067 } 068 069 public XPPReader(DocumentFactory factory) { 070 this.factory = factory; 071 } 072 073 074 075 076 /** <p>Reads a Document from the given <code>File</code></p> 077 * 078 * @param file is the <code>File</code> to read from. 079 * @return the newly created Document instance 080 * @throws DocumentException if an error occurs during parsing. 081 * @throws MalformedURLException if a URL could not be made for the given File 082 */ 083 public Document read(File file) throws DocumentException, IOException, XmlPullParserException { 084 String systemID = file.getAbsolutePath(); 085 return read( new BufferedReader( new FileReader( file ) ), systemID ); 086 } 087 088 /** <p>Reads a Document from the given <code>URL</code></p> 089 * 090 * @param url <code>URL</code> to read from. 091 * @return the newly created Document instance 092 * @throws DocumentException if an error occurs during parsing. 093 */ 094 public Document read(URL url) throws DocumentException, IOException, XmlPullParserException { 095 String systemID = url.toExternalForm(); 096 return read( createReader( url.openStream() ), systemID); 097 } 098 099 /** <p>Reads a Document from the given URL or filename.</p> 100 * 101 * <p> 102 * If the systemID contains a <code>':'</code> character then it is 103 * assumed to be a URL otherwise its assumed to be a file name. 104 * If you want finer grained control over this mechansim then please 105 * explicitly pass in either a {@link URL} or a {@link File} instance 106 * instead of a {@link String} to denote the source of the document. 107 * </p> 108 * 109 * @param systemID is a URL for a document or a file name. 110 * @return the newly created Document instance 111 * @throws DocumentException if an error occurs during parsing. 112 * @throws MalformedURLException if a URL could not be made for the given File 113 */ 114 public Document read(String systemID) throws DocumentException, IOException, XmlPullParserException { 115 if ( systemID.indexOf( ':' ) >= 0 ) { 116 // lets assume its a URL 117 return read(new URL(systemID)); 118 } 119 else { 120 // lets assume that we are given a file name 121 return read( new File(systemID) ); 122 } 123 } 124 125 /** <p>Reads a Document from the given stream</p> 126 * 127 * @param in <code>InputStream</code> to read from. 128 * @return the newly created Document instance 129 * @throws DocumentException if an error occurs during parsing. 130 */ 131 public Document read(InputStream in) throws DocumentException, IOException, XmlPullParserException { 132 return read( createReader( in ) ); 133 } 134 135 /** <p>Reads a Document from the given <code>Reader</code></p> 136 * 137 * @param reader is the reader for the input 138 * @return the newly created Document instance 139 * @throws DocumentException if an error occurs during parsing. 140 */ 141 public Document read(Reader reader) throws DocumentException, IOException, XmlPullParserException { 142 getXPPParser().setInput(reader); 143 return parseDocument(); 144 } 145 146 /** <p>Reads a Document from the given array of characters</p> 147 * 148 * @param text is the text to parse 149 * @return the newly created Document instance 150 * @throws DocumentException if an error occurs during parsing. 151 */ 152 public Document read(char[] text) throws DocumentException, IOException, XmlPullParserException { 153 getXPPParser().setInput(text); 154 return parseDocument(); 155 } 156 157 /** <p>Reads a Document from the given stream</p> 158 * 159 * @param in <code>InputStream</code> to read from. 160 * @param systemID is the URI for the input 161 * @return the newly created Document instance 162 * @throws DocumentException if an error occurs during parsing. 163 */ 164 public Document read(InputStream in, String systemID) throws DocumentException, IOException, XmlPullParserException { 165 return read( createReader( in ), systemID ); 166 } 167 168 /** <p>Reads a Document from the given <code>Reader</code></p> 169 * 170 * @param reader is the reader for the input 171 * @param systemID is the URI for the input 172 * @return the newly created Document instance 173 * @throws DocumentException if an error occurs during parsing. 174 */ 175 public Document read(Reader reader, String systemID) throws DocumentException, IOException, XmlPullParserException { 176 Document document = read( reader ); 177 document.setName( systemID ); 178 return document; 179 } 180 181 182 // Properties 183 //------------------------------------------------------------------------- 184 185 public XmlPullParser getXPPParser() throws XmlPullParserException { 186 if ( xppParser == null ) { 187 xppParser = getXPPFactory().newPullParser(); 188 } 189 return xppParser; 190 } 191 192 public XmlPullParserFactory getXPPFactory() throws XmlPullParserException { 193 if ( xppFactory == null ) { 194 xppFactory = XmlPullParserFactory.newInstance(); 195 } 196 return xppFactory; 197 } 198 199 public void setXPPFactory(XmlPullParserFactory xppFactory) { 200 this.xppFactory = xppFactory; 201 } 202 203 /** @return the <code>DocumentFactory</code> used to create document objects 204 */ 205 public DocumentFactory getDocumentFactory() { 206 if (factory == null) { 207 factory = DocumentFactory.getInstance(); 208 } 209 return factory; 210 } 211 212 /** <p>This sets the <code>DocumentFactory</code> used to create new documents. 213 * This method allows the building of custom DOM4J tree objects to be implemented 214 * easily using a custom derivation of {@link DocumentFactory}</p> 215 * 216 * @param factory <code>DocumentFactory</code> used to create DOM4J objects 217 */ 218 public void setDocumentFactory(DocumentFactory factory) { 219 this.factory = factory; 220 } 221 222 223 /** Adds the <code>ElementHandler</code> to be called when the 224 * specified path is encounted. 225 * 226 * @param path is the path to be handled 227 * @param handler is the <code>ElementHandler</code> to be called 228 * by the event based processor. 229 */ 230 public void addHandler(String path, ElementHandler handler) { 231 getDispatchHandler().addHandler(path, handler); 232 } 233 234 /** Removes the <code>ElementHandler</code> from the event based 235 * processor, for the specified path. 236 * 237 * @param path is the path to remove the <code>ElementHandler</code> for. 238 */ 239 public void removeHandler(String path) { 240 getDispatchHandler().removeHandler(path); 241 } 242 243 /** When multiple <code>ElementHandler</code> instances have been 244 * registered, this will set a default <code>ElementHandler</code> 245 * to be called for any path which does <b>NOT</b> have a handler 246 * registered. 247 * @param handler is the <code>ElementHandler</code> to be called 248 * by the event based processor. 249 */ 250 public void setDefaultHandler(ElementHandler handler) { 251 getDispatchHandler().setDefaultHandler(handler); 252 } 253 254 // Implementation methods 255 //------------------------------------------------------------------------- 256 protected Document parseDocument() throws DocumentException, IOException, XmlPullParserException { 257 Document document = getDocumentFactory().createDocument(); 258 Element parent = null; 259 XmlPullParser xppParser = getXPPParser(); 260 xppParser.setNamespaceAware(true); 261 ProxyXmlStartTag startTag = new ProxyXmlStartTag(); 262 XmlEndTag endTag = xppFactory.newEndTag(); 263 while (true) { 264 int type = xppParser.next(); 265 switch (type) { 266 case XmlPullParser.END_DOCUMENT: { 267 return document; 268 } 269 case XmlPullParser.START_TAG: { 270 xppParser.readStartTag( startTag ); 271 Element newElement = startTag.getElement(); 272 if ( parent != null ) { 273 parent.add( newElement ); 274 } 275 else { 276 document.add( newElement ); 277 } 278 parent = newElement; 279 break; 280 } 281 case XmlPullParser.END_TAG: { 282 xppParser.readEndTag( endTag ); 283 if (parent != null) { 284 parent = parent.getParent(); 285 } 286 break; 287 } 288 case XmlPullParser.CONTENT: { 289 String text = xppParser.readContent(); 290 if ( parent != null ) { 291 parent.addText( text ); 292 } 293 else { 294 throw new DocumentException( "Cannot have text content outside of the root document" ); 295 } 296 break; 297 } 298 default: { 299 throw new DocumentException( "Error: unknown PullParser type: " + type ); 300 } 301 } 302 } 303 } 304 305 protected DispatchHandler getDispatchHandler() { 306 if (dispatchHandler == null) { 307 dispatchHandler = new DispatchHandler(); 308 } 309 return dispatchHandler; 310 } 311 312 protected void setDispatchHandler(DispatchHandler dispatchHandler) { 313 this.dispatchHandler = dispatchHandler; 314 } 315 316 /** Factory method to create a Reader from the given InputStream. 317 */ 318 protected Reader createReader(InputStream in) throws IOException { 319 return new BufferedReader( new InputStreamReader( in ) ); 320 } 321} 322 323 324 325 326/* 327 * Redistribution and use of this software and associated documentation 328 * ("Software"), with or without modification, are permitted provided 329 * that the following conditions are met: 330 * 331 * 1. Redistributions of source code must retain copyright 332 * statements and notices. Redistributions must also contain a 333 * copy of this document. 334 * 335 * 2. Redistributions in binary form must reproduce the 336 * above copyright notice, this list of conditions and the 337 * following disclaimer in the documentation and/or other 338 * materials provided with the distribution. 339 * 340 * 3. The name "DOM4J" must not be used to endorse or promote 341 * products derived from this Software without prior written 342 * permission of MetaStuff, Ltd. For written permission, 343 * please contact dom4j-info@metastuff.com. 344 * 345 * 4. Products derived from this Software may not be called "DOM4J" 346 * nor may "DOM4J" appear in their names without prior written 347 * permission of MetaStuff, Ltd. DOM4J is a registered 348 * trademark of MetaStuff, Ltd. 349 * 350 * 5. Due credit should be given to the DOM4J Project 351 * (http://dom4j.org/). 352 * 353 * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS 354 * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT 355 * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 356 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 357 * METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 358 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 359 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 360 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 361 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 362 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 363 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 364 * OF THE POSSIBILITY OF SUCH DAMAGE. 365 * 366 * Copyright 2001 (C) MetaStuff, Ltd. All Rights Reserved. 367 * 368 * $Id: XPPReader.java,v 1.1 2001/12/19 09:51:39 jstrachan Exp $ 369 */