001/* 002 * Copyright 2001 (C) MetaStuff, Ltd. All Rights Reserved. 003 * 004 * This software is open source. 005 * See the bottom of this file for the licence. 006 * 007 * $Id: DOMReader.java,v 1.9 2002/03/02 14:23:25 slehmann Exp $ 008 */ 009 010package org.dom4j.io; 011 012import java.util.ArrayList; 013import java.util.HashMap; 014import java.util.Iterator; 015import java.util.List; 016import java.util.Map; 017 018import org.dom4j.Attribute; 019import org.dom4j.Branch; 020import org.dom4j.CDATA; 021import org.dom4j.Comment; 022import org.dom4j.DocumentType; 023import org.dom4j.Document; 024import org.dom4j.DocumentFactory; 025import org.dom4j.DocumentException; 026import org.dom4j.Element; 027import org.dom4j.Entity; 028import org.dom4j.Namespace; 029import org.dom4j.Node; 030import org.dom4j.ProcessingInstruction; 031import org.dom4j.QName; 032import org.dom4j.Text; 033 034import org.dom4j.tree.NamespaceStack; 035 036/** <p><code>DOMReader</code> navigates a W3C DOM tree and creates 037 * a DOM4J tree from it.</p> 038 * 039 * @author <a href="mailto:jstrachan@apache.org">James Strachan</a> 040 * @version $Revision: 1.9 $ 041 */ 042public class DOMReader { 043 044 /** <code>DocumentFactory</code> used to create new document objects */ 045 private DocumentFactory factory; 046 047 /** stack of <code>Namespace</code> and <code>QName</code> objects */ 048 private NamespaceStack namespaceStack; 049 050 051 public DOMReader() { 052 this.factory = DocumentFactory.getInstance(); 053 this.namespaceStack = new NamespaceStack(factory); 054 } 055 056 public DOMReader(DocumentFactory factory) { 057 this.factory = factory; 058 this.namespaceStack = new NamespaceStack(factory); 059 } 060 061 /** @return the <code>DocumentFactory</code> used to create document objects 062 */ 063 public DocumentFactory getDocumentFactory() { 064 return factory; 065 } 066 067 /** <p>This sets the <code>DocumentFactory</code> used to create new documents. 068 * This method allows the building of custom DOM4J tree objects to be implemented 069 * easily using a custom derivation of {@link DocumentFactory}</p> 070 * 071 * @param factory <code>DocumentFactory</code> used to create DOM4J objects 072 */ 073 public void setDocumentFactory(DocumentFactory factory) { 074 this.factory = factory; 075 this.namespaceStack.setDocumentFactory(factory); 076 } 077 078 public Document read(org.w3c.dom.Document domDocument) { 079 if ( domDocument instanceof Document ) { 080 return (Document) domDocument; 081 } 082 Document document = createDocument(); 083 084 clearNamespaceStack(); 085 086 org.w3c.dom.NodeList nodeList = domDocument.getChildNodes(); 087 for ( int i = 0, size = nodeList.getLength(); i < size; i++ ) { 088 readTree( nodeList.item(i), document ); 089 } 090 return document; 091 } 092 093 094 // Implementation methods 095 protected void readTree(org.w3c.dom.Node node, Branch current) { 096 Element element = null; 097 Document document = null; 098 if ( current instanceof Element ) { 099 element = (Element) current; 100 } 101 else { 102 document = (Document) current; 103 } 104 switch (node.getNodeType()) { 105 case org.w3c.dom.Node.ELEMENT_NODE: 106 readElement(node, current); 107 break; 108 109 case org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE: 110 if ( current instanceof Element ) { 111 ((Element) current).addProcessingInstruction( 112 node.getNodeName(), node.getNodeValue() 113 ); 114 } 115 else { 116 ((Document) current).addProcessingInstruction( 117 node.getNodeName(), node.getNodeValue() 118 ); 119 } 120 break; 121 122 case org.w3c.dom.Node.COMMENT_NODE: 123 if ( current instanceof Element ) { 124 ((Element) current).addComment( node.getNodeValue() ); 125 } 126 else { 127 ((Document) current).addComment( node.getNodeValue() ); 128 } 129 break; 130 131 case org.w3c.dom.Node.DOCUMENT_TYPE_NODE: 132 org.w3c.dom.DocumentType domDocType 133 = (org.w3c.dom.DocumentType) node; 134 135 document.addDocType( 136 domDocType.getName(), 137 domDocType.getPublicId(), 138 domDocType.getSystemId() 139 ); 140 break; 141 142 case org.w3c.dom.Node.TEXT_NODE: 143 element.addText( node.getNodeValue() ); 144 break; 145 146 case org.w3c.dom.Node.CDATA_SECTION_NODE: 147 element.addCDATA( node.getNodeValue() ); 148 break; 149 150 151 case org.w3c.dom.Node.ENTITY_REFERENCE_NODE: { 152 // is there a better way to get the value of an entity? 153 org.w3c.dom.Node firstChild = node.getFirstChild(); 154 if ( firstChild != null ) { 155 element.addEntity( 156 node.getNodeName(), 157 firstChild.getNodeValue() 158 ); 159 } 160 else { 161 element.addEntity( node.getNodeName(), "" ); 162 } 163 } 164 break; 165 166 case org.w3c.dom.Node.ENTITY_NODE: 167 element.addEntity( 168 node.getNodeName(), 169 node.getNodeValue() 170 ); 171 break; 172 173 default: 174 System.out.println( "WARNING: Unknown DOM node type: " + node.getNodeType() ); 175 } 176 } 177 178 protected void readElement(org.w3c.dom.Node node, Branch current) { 179 int previouslyDeclaredNamespaces = namespaceStack.size(); 180 181 String namespaceUri = node.getNamespaceURI(); 182 org.w3c.dom.NamedNodeMap attributeList = node.getAttributes(); 183 if ( namespaceUri == null ) { 184 // test if we have an "xmlns" attribute 185 org.w3c.dom.Node attribute = attributeList.getNamedItem( "xmlns" ); 186 if ( attribute != null ) { 187 namespaceUri = attribute.getNodeValue(); 188 } 189 } 190 191 QName qName = namespaceStack.getQName( namespaceUri, node.getLocalName(), node.getNodeName() ); 192 Element element = current.addElement(qName); 193 194 if ( attributeList != null ) { 195 int size = attributeList.getLength(); 196 List attributes = new ArrayList(size); 197 for ( int i = 0; i < size; i++ ) { 198 org.w3c.dom.Node attribute = attributeList.item(i); 199 200 // Define all namespaces first then process attributes later 201 String name = attribute.getNodeName(); 202 if (name.startsWith("xmlns")) { 203 int index = name.indexOf( ':', 5 ); 204 String uri = attribute.getNodeValue(); 205 if ( namespaceUri == null || ! namespaceUri.equals( uri ) ) { 206 Namespace namespace = null; 207 if ( index > 0 ) { 208 String prefix = name.substring(index + 1); 209 namespace = namespaceStack.addNamespace( prefix, uri ); 210 } 211 else { 212 namespace = namespaceStack.addNamespace( "", uri ); 213 } 214 element.add( namespace ); 215 } 216 } 217 else { 218 attributes.add( attribute ); 219 } 220 } 221 222 // now add the attributes, the namespaces should be available 223 size = attributes.size(); 224 for ( int i = 0; i < size; i++ ) { 225 org.w3c.dom.Node attribute = (org.w3c.dom.Node) attributes.get(i); 226 QName attributeQName = namespaceStack.getQName( 227 attribute.getNamespaceURI(), 228 attribute.getLocalName(), 229 attribute.getNodeName() 230 ); 231 element.addAttribute( attributeQName, attribute.getNodeValue() ); 232 } 233 } 234 235 // Recurse on child nodes 236 org.w3c.dom.NodeList children = node.getChildNodes(); 237 for ( int i = 0, size = children.getLength(); i < size; i++ ) { 238 org.w3c.dom.Node child = children.item(i); 239 readTree( child, element ); 240 } 241 242 // pop namespaces from the stack 243 while (namespaceStack.size() > previouslyDeclaredNamespaces) { 244 namespaceStack.pop(); 245 } 246 } 247 248 protected Namespace getNamespace(String prefix, String uri) { 249 return getDocumentFactory().createNamespace(prefix, uri); 250 } 251 252 protected Document createDocument() { 253 return getDocumentFactory().createDocument(); 254 } 255 256 protected void clearNamespaceStack() { 257 namespaceStack.clear(); 258 if ( ! namespaceStack.contains( Namespace.XML_NAMESPACE ) ) { 259 namespaceStack.push( Namespace.XML_NAMESPACE ); 260 } 261 } 262} 263 264 265 266 267/* 268 * Redistribution and use of this software and associated documentation 269 * ("Software"), with or without modification, are permitted provided 270 * that the following conditions are met: 271 * 272 * 1. Redistributions of source code must retain copyright 273 * statements and notices. Redistributions must also contain a 274 * copy of this document. 275 * 276 * 2. Redistributions in binary form must reproduce the 277 * above copyright notice, this list of conditions and the 278 * following disclaimer in the documentation and/or other 279 * materials provided with the distribution. 280 * 281 * 3. The name "DOM4J" must not be used to endorse or promote 282 * products derived from this Software without prior written 283 * permission of MetaStuff, Ltd. For written permission, 284 * please contact dom4j-info@metastuff.com. 285 * 286 * 4. Products derived from this Software may not be called "DOM4J" 287 * nor may "DOM4J" appear in their names without prior written 288 * permission of MetaStuff, Ltd. DOM4J is a registered 289 * trademark of MetaStuff, Ltd. 290 * 291 * 5. Due credit should be given to the DOM4J Project 292 * (http://dom4j.org/). 293 * 294 * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS 295 * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT 296 * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 297 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 298 * METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 299 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 300 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 301 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 302 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 303 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 304 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 305 * OF THE POSSIBILITY OF SUCH DAMAGE. 306 * 307 * Copyright 2001 (C) MetaStuff, Ltd. All Rights Reserved. 308 * 309 * $Id: DOMReader.java,v 1.9 2002/03/02 14:23:25 slehmann Exp $ 310 */