001/*
002 * Copyright 2001 (C) MetaStuff, Ltd. All Rights Reserved.
003 *
004 * This software is open source.
005 * See the bottom of this file for the licence.
006 *
007 * $Id: DOMReader.java,v 1.9 2002/03/02 14:23:25 slehmann Exp $
008 */
009
010package org.dom4j.io;
011
012import java.util.ArrayList;
013import java.util.HashMap;
014import java.util.Iterator;
015import java.util.List;
016import java.util.Map;
017
018import org.dom4j.Attribute;
019import org.dom4j.Branch;
020import org.dom4j.CDATA;
021import org.dom4j.Comment;
022import org.dom4j.DocumentType;
023import org.dom4j.Document;
024import org.dom4j.DocumentFactory;
025import org.dom4j.DocumentException;
026import org.dom4j.Element;
027import org.dom4j.Entity;
028import org.dom4j.Namespace;
029import org.dom4j.Node;
030import org.dom4j.ProcessingInstruction;
031import org.dom4j.QName;
032import org.dom4j.Text;
033
034import org.dom4j.tree.NamespaceStack;
035
036/** <p><code>DOMReader</code> navigates a W3C DOM tree and creates
037  * a DOM4J tree from it.</p>
038  *
039  * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
040  * @version $Revision: 1.9 $
041  */
042public class DOMReader {
043
044    /** <code>DocumentFactory</code> used to create new document objects */
045    private DocumentFactory factory;
046
047    /** stack of <code>Namespace</code> and <code>QName</code> objects */
048    private NamespaceStack namespaceStack;
049
050
051    public DOMReader() {
052        this.factory = DocumentFactory.getInstance();
053        this.namespaceStack = new NamespaceStack(factory);
054    }
055
056    public DOMReader(DocumentFactory factory) {
057        this.factory = factory;
058        this.namespaceStack = new NamespaceStack(factory);
059    }
060
061    /** @return the <code>DocumentFactory</code> used to create document objects
062      */
063    public DocumentFactory getDocumentFactory() {
064        return factory;
065    }
066
067    /** <p>This sets the <code>DocumentFactory</code> used to create new documents.
068      * This method allows the building of custom DOM4J tree objects to be implemented
069      * easily using a custom derivation of {@link DocumentFactory}</p>
070      *
071      * @param factory <code>DocumentFactory</code> used to create DOM4J objects
072      */
073    public void setDocumentFactory(DocumentFactory factory) {
074        this.factory = factory;
075        this.namespaceStack.setDocumentFactory(factory);
076    }
077
078    public Document read(org.w3c.dom.Document domDocument) {
079        if ( domDocument instanceof Document ) {
080            return (Document) domDocument;
081        }
082        Document document = createDocument();
083
084        clearNamespaceStack();
085
086        org.w3c.dom.NodeList nodeList = domDocument.getChildNodes();
087        for ( int i = 0, size = nodeList.getLength(); i < size; i++ ) {
088            readTree( nodeList.item(i), document );
089        }
090        return document;
091    }
092
093
094    // Implementation methods
095    protected void readTree(org.w3c.dom.Node node, Branch current) {
096        Element element = null;
097        Document document = null;
098        if ( current instanceof Element ) {
099            element = (Element) current;
100        }
101        else {
102            document = (Document) current;
103        }
104        switch (node.getNodeType()) {
105            case org.w3c.dom.Node.ELEMENT_NODE:
106                readElement(node, current);
107                break;
108
109            case org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE:
110                if ( current instanceof Element ) {
111                    ((Element) current).addProcessingInstruction(
112                        node.getNodeName(), node.getNodeValue()
113                    );
114                }
115                else {
116                    ((Document) current).addProcessingInstruction(
117                        node.getNodeName(), node.getNodeValue()
118                    );
119                }
120                break;
121
122            case org.w3c.dom.Node.COMMENT_NODE:
123                if ( current instanceof Element ) {
124                    ((Element) current).addComment( node.getNodeValue() );
125                }
126                else {
127                    ((Document) current).addComment( node.getNodeValue() );
128                }
129                break;
130
131            case org.w3c.dom.Node.DOCUMENT_TYPE_NODE:
132                org.w3c.dom.DocumentType domDocType
133                    = (org.w3c.dom.DocumentType) node;
134
135                document.addDocType(
136                    domDocType.getName(),
137                    domDocType.getPublicId(),
138                    domDocType.getSystemId()
139                );
140                break;
141
142            case org.w3c.dom.Node.TEXT_NODE:
143                element.addText( node.getNodeValue() );
144                break;
145
146            case org.w3c.dom.Node.CDATA_SECTION_NODE:
147                element.addCDATA( node.getNodeValue() );
148                break;
149
150
151            case org.w3c.dom.Node.ENTITY_REFERENCE_NODE: {
152                // is there a better way to get the value of an entity?
153                    org.w3c.dom.Node firstChild = node.getFirstChild();
154                    if ( firstChild != null ) {
155                        element.addEntity(
156                            node.getNodeName(),
157                            firstChild.getNodeValue()
158                        );
159                    }
160                    else {
161                        element.addEntity( node.getNodeName(), "" );
162                    }
163                }
164                break;
165
166            case org.w3c.dom.Node.ENTITY_NODE:
167                element.addEntity(
168                    node.getNodeName(),
169                    node.getNodeValue()
170                );
171                break;
172
173            default:
174                System.out.println( "WARNING: Unknown DOM node type: " + node.getNodeType() );
175        }
176    }
177
178    protected void readElement(org.w3c.dom.Node node, Branch current) {
179        int previouslyDeclaredNamespaces = namespaceStack.size();
180
181        String namespaceUri = node.getNamespaceURI();
182        org.w3c.dom.NamedNodeMap attributeList = node.getAttributes();
183        if ( namespaceUri == null ) {
184            // test if we have an "xmlns" attribute
185            org.w3c.dom.Node attribute = attributeList.getNamedItem( "xmlns" );
186            if ( attribute != null ) {
187                namespaceUri = attribute.getNodeValue();
188            }
189        }
190
191        QName qName = namespaceStack.getQName( namespaceUri, node.getLocalName(), node.getNodeName() );
192        Element element = current.addElement(qName);
193
194        if ( attributeList != null ) {
195            int size = attributeList.getLength();
196            List attributes = new ArrayList(size);
197            for ( int i = 0; i < size; i++ ) {
198                org.w3c.dom.Node attribute = attributeList.item(i);
199
200                // Define all namespaces first then process attributes later
201                String name = attribute.getNodeName();
202                if (name.startsWith("xmlns")) {
203                    int index = name.indexOf( ':', 5 );
204                    String uri = attribute.getNodeValue();
205                    if ( namespaceUri == null || ! namespaceUri.equals( uri ) ) {
206                        Namespace namespace = null;
207                        if ( index > 0 ) {
208                            String prefix = name.substring(index + 1);
209                            namespace = namespaceStack.addNamespace( prefix, uri );
210                        }
211                        else {
212                            namespace = namespaceStack.addNamespace( "", uri );
213                        }
214                        element.add( namespace );
215                    }
216                }
217                else {
218                    attributes.add( attribute );
219                }
220            }
221
222            // now add the attributes, the namespaces should be available
223            size = attributes.size();
224            for ( int i = 0; i < size; i++ ) {
225                org.w3c.dom.Node attribute = (org.w3c.dom.Node) attributes.get(i);
226                QName attributeQName = namespaceStack.getQName(
227                    attribute.getNamespaceURI(),
228                    attribute.getLocalName(),
229                    attribute.getNodeName()
230                );
231                element.addAttribute( attributeQName, attribute.getNodeValue() );
232            }
233        }
234
235        // Recurse on child nodes
236        org.w3c.dom.NodeList children = node.getChildNodes();
237        for ( int i = 0, size = children.getLength(); i < size; i++ ) {
238            org.w3c.dom.Node child = children.item(i);
239            readTree( child, element );
240        }
241
242        // pop namespaces from the stack
243        while (namespaceStack.size() > previouslyDeclaredNamespaces) {
244            namespaceStack.pop();
245        }
246    }
247
248    protected Namespace getNamespace(String prefix, String uri) {
249        return getDocumentFactory().createNamespace(prefix, uri);
250    }
251
252    protected Document createDocument() {
253        return getDocumentFactory().createDocument();
254    }
255
256    protected void clearNamespaceStack() {
257        namespaceStack.clear();
258        if ( ! namespaceStack.contains( Namespace.XML_NAMESPACE ) ) {
259            namespaceStack.push( Namespace.XML_NAMESPACE );
260        }
261    }
262}
263
264
265
266
267/*
268 * Redistribution and use of this software and associated documentation
269 * ("Software"), with or without modification, are permitted provided
270 * that the following conditions are met:
271 *
272 * 1. Redistributions of source code must retain copyright
273 *    statements and notices.  Redistributions must also contain a
274 *    copy of this document.
275 *
276 * 2. Redistributions in binary form must reproduce the
277 *    above copyright notice, this list of conditions and the
278 *    following disclaimer in the documentation and/or other
279 *    materials provided with the distribution.
280 *
281 * 3. The name "DOM4J" must not be used to endorse or promote
282 *    products derived from this Software without prior written
283 *    permission of MetaStuff, Ltd.  For written permission,
284 *    please contact dom4j-info@metastuff.com.
285 *
286 * 4. Products derived from this Software may not be called "DOM4J"
287 *    nor may "DOM4J" appear in their names without prior written
288 *    permission of MetaStuff, Ltd. DOM4J is a registered
289 *    trademark of MetaStuff, Ltd.
290 *
291 * 5. Due credit should be given to the DOM4J Project
292 *    (http://dom4j.org/).
293 *
294 * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS
295 * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
296 * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
297 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
298 * METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
299 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
300 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
301 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
302 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
303 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
304 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
305 * OF THE POSSIBILITY OF SUCH DAMAGE.
306 *
307 * Copyright 2001 (C) MetaStuff, Ltd. All Rights Reserved.
308 *
309 * $Id: DOMReader.java,v 1.9 2002/03/02 14:23:25 slehmann Exp $
310 */