001/*
002 * Copyright 1999-2005 The Apache Software Foundation.
003 * 
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 * 
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 * 
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.itextpdf.text.xml;
018
019import java.io.OutputStream;
020import java.io.OutputStreamWriter;
021import java.io.PrintWriter;
022import java.io.UnsupportedEncodingException;
023
024import org.w3c.dom.Attr;
025import org.w3c.dom.Document;
026import org.w3c.dom.DocumentType;
027import org.w3c.dom.NamedNodeMap;
028import org.w3c.dom.Node;
029
030/**
031 *
032 * @author psoares
033 */
034public class XmlDomWriter {
035    
036    /** Print writer. */
037    protected PrintWriter fOut;
038    
039    /** Canonical output. */
040    protected boolean fCanonical;
041    
042    /** Processing XML 1.1 document. */
043    protected boolean fXML11;
044    
045    //
046    // Constructors
047    //
048    
049    /** Default constructor. */
050    public XmlDomWriter() {
051    } // <init>()
052    
053    public XmlDomWriter(boolean canonical) {
054        fCanonical = canonical;
055    } // <init>(boolean)
056    
057    //
058    // Public methods
059    //
060    
061    /** Sets whether output is canonical. */
062    public void setCanonical(boolean canonical) {
063        fCanonical = canonical;
064    } // setCanonical(boolean)
065    
066    /** Sets the output stream for printing. */
067    public void setOutput(OutputStream stream, String encoding)
068    throws UnsupportedEncodingException {
069        
070        if (encoding == null) {
071            encoding = "UTF8";
072        }
073        
074        java.io.Writer writer = new OutputStreamWriter(stream, encoding);
075        fOut = new PrintWriter(writer);
076        
077    } // setOutput(OutputStream,String)
078    
079    /** Sets the output writer. */
080    public void setOutput(java.io.Writer writer) {
081        
082        fOut = writer instanceof PrintWriter
083                ? (PrintWriter)writer : new PrintWriter(writer);
084        
085    } // setOutput(java.io.Writer)
086    
087    /** Writes the specified node, recursively. */
088    public void write(Node node) {
089        
090        // is there anything to do?
091        if (node == null) {
092            return;
093        }
094        
095        short type = node.getNodeType();
096        switch (type) {
097            case Node.DOCUMENT_NODE: {
098                Document document = (Document)node;
099                fXML11 = false; //"1.1".equals(getVersion(document));
100                if (!fCanonical) {
101                    if (fXML11) {
102                        fOut.println("<?xml version=\"1.1\" encoding=\"UTF-8\"?>");
103                    } else {
104                        fOut.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
105                    }
106                    fOut.flush();
107                    write(document.getDoctype());
108                }
109                write(document.getDocumentElement());
110                break;
111            }
112            
113            case Node.DOCUMENT_TYPE_NODE: {
114                DocumentType doctype = (DocumentType)node;
115                fOut.print("<!DOCTYPE ");
116                fOut.print(doctype.getName());
117                String publicId = doctype.getPublicId();
118                String systemId = doctype.getSystemId();
119                if (publicId != null) {
120                    fOut.print(" PUBLIC '");
121                    fOut.print(publicId);
122                    fOut.print("' '");
123                    fOut.print(systemId);
124                    fOut.print('\'');
125                } else if (systemId != null) {
126                    fOut.print(" SYSTEM '");
127                    fOut.print(systemId);
128                    fOut.print('\'');
129                }
130                String internalSubset = doctype.getInternalSubset();
131                if (internalSubset != null) {
132                    fOut.println(" [");
133                    fOut.print(internalSubset);
134                    fOut.print(']');
135                }
136                fOut.println('>');
137                break;
138            }
139            
140            case Node.ELEMENT_NODE: {
141                fOut.print('<');
142                fOut.print(node.getNodeName());
143                Attr attrs[] = sortAttributes(node.getAttributes());
144                for (int i = 0; i < attrs.length; i++) {
145                    Attr attr = attrs[i];
146                    fOut.print(' ');
147                    fOut.print(attr.getNodeName());
148                    fOut.print("=\"");
149                    normalizeAndPrint(attr.getNodeValue(), true);
150                    fOut.print('"');
151                }
152                fOut.print('>');
153                fOut.flush();
154                
155                Node child = node.getFirstChild();
156                while (child != null) {
157                    write(child);
158                    child = child.getNextSibling();
159                }
160                break;
161            }
162            
163            case Node.ENTITY_REFERENCE_NODE: {
164                if (fCanonical) {
165                    Node child = node.getFirstChild();
166                    while (child != null) {
167                        write(child);
168                        child = child.getNextSibling();
169                    }
170                } else {
171                    fOut.print('&');
172                    fOut.print(node.getNodeName());
173                    fOut.print(';');
174                    fOut.flush();
175                }
176                break;
177            }
178            
179            case Node.CDATA_SECTION_NODE: {
180                if (fCanonical) {
181                    normalizeAndPrint(node.getNodeValue(), false);
182                } else {
183                    fOut.print("<![CDATA[");
184                    fOut.print(node.getNodeValue());
185                    fOut.print("]]>");
186                }
187                fOut.flush();
188                break;
189            }
190            
191            case Node.TEXT_NODE: {
192                normalizeAndPrint(node.getNodeValue(), false);
193                fOut.flush();
194                break;
195            }
196            
197            case Node.PROCESSING_INSTRUCTION_NODE: {
198                fOut.print("<?");
199                fOut.print(node.getNodeName());
200                String data = node.getNodeValue();
201                if (data != null && data.length() > 0) {
202                    fOut.print(' ');
203                    fOut.print(data);
204                }
205                fOut.print("?>");
206                fOut.flush();
207                break;
208            }
209            
210            case Node.COMMENT_NODE: {
211                if (!fCanonical) {
212                    fOut.print("<!--");
213                    String comment = node.getNodeValue();
214                    if (comment != null && comment.length() > 0) {
215                        fOut.print(comment);
216                    }
217                    fOut.print("-->");
218                    fOut.flush();
219                }
220            }
221        }
222        
223        if (type == Node.ELEMENT_NODE) {
224            fOut.print("</");
225            fOut.print(node.getNodeName());
226            fOut.print('>');
227            fOut.flush();
228        }
229        
230    } // write(Node)
231    
232    /** Returns a sorted list of attributes. */
233    protected Attr[] sortAttributes(NamedNodeMap attrs) {
234        
235        int len = (attrs != null) ? attrs.getLength() : 0;
236        Attr array[] = new Attr[len];
237        for (int i = 0; i < len; i++) {
238            array[i] = (Attr)attrs.item(i);
239        }
240        for (int i = 0; i < len - 1; i++) {
241            String name = array[i].getNodeName();
242            int index = i;
243            for (int j = i + 1; j < len; j++) {
244                String curName = array[j].getNodeName();
245                if (curName.compareTo(name) < 0) {
246                    name = curName;
247                    index = j;
248                }
249            }
250            if (index != i) {
251                Attr temp = array[i];
252                array[i] = array[index];
253                array[index] = temp;
254            }
255        }
256        
257        return array;
258        
259    } // sortAttributes(NamedNodeMap):Attr[]
260    
261    //
262    // Protected methods
263    //
264    
265    /** Normalizes and prints the given string. */
266    protected void normalizeAndPrint(String s, boolean isAttValue) {
267        
268        int len = (s != null) ? s.length() : 0;
269        for (int i = 0; i < len; i++) {
270            char c = s.charAt(i);
271            normalizeAndPrint(c, isAttValue);
272        }
273        
274    } // normalizeAndPrint(String,boolean)
275    
276    /** Normalizes and print the given character. */
277    protected void normalizeAndPrint(char c, boolean isAttValue) {
278        
279        switch (c) {
280            case '<': {
281                fOut.print("&lt;");
282                break;
283            }
284            case '>': {
285                fOut.print("&gt;");
286                break;
287            }
288            case '&': {
289                fOut.print("&amp;");
290                break;
291            }
292            case '"': {
293                // A '"' that appears in character data
294                // does not need to be escaped.
295                if (isAttValue) {
296                    fOut.print("&quot;");
297                } else {
298                    fOut.print("\"");
299                }
300                break;
301            }
302            case '\r': {
303                // If CR is part of the document's content, it
304                // must not be printed as a literal otherwise
305                // it would be normalized to LF when the document
306                // is reparsed.
307                fOut.print("&#xD;");
308                break;
309            }
310            case '\n': {
311                if (fCanonical) {
312                    fOut.print("&#xA;");
313                    break;
314                }
315                // else, default print char
316            }
317            default: {
318                // In XML 1.1, control chars in the ranges [#x1-#x1F, #x7F-#x9F] must be escaped.
319                //
320                // Escape space characters that would be normalized to #x20 in attribute values
321                // when the document is reparsed.
322                //
323                // Escape NEL (0x85) and LSEP (0x2028) that appear in content
324                // if the document is XML 1.1, since they would be normalized to LF
325                // when the document is reparsed.
326                if (fXML11 && ((c >= 0x01 && c <= 0x1F && c != 0x09 && c != 0x0A)
327                || (c >= 0x7F && c <= 0x9F) || c == 0x2028)
328                || isAttValue && (c == 0x09 || c == 0x0A)) {
329                    fOut.print("&#x");
330                    fOut.print(Integer.toHexString(c).toUpperCase());
331                    fOut.print(";");
332                } else {
333                    fOut.print(c);
334                }
335            }
336        }
337    } // normalizeAndPrint(char,boolean)
338    
339    /** Extracts the XML version from the Document. */
340//    protected String getVersion(Document document) {
341//        if (document == null) {
342//            return null;
343//        }
344//        String version = null;
345//        Method getXMLVersion = null;
346//        try {
347//            getXMLVersion = document.getClass().getMethod("getXmlVersion", new Class[]{});
348//            // If Document class implements DOM L3, this method will exist.
349//            if (getXMLVersion != null) {
350//                version = (String) getXMLVersion.invoke(document, (Object[]) null);
351//            }
352//        } catch (Exception e) {
353//            // Either this locator object doesn't have
354//            // this method, or we're on an old JDK.
355//        }
356//        return version;
357//    } // getVersion(Document)
358}