001/* 002 * Copyright 1999-2005 The Apache Software Foundation. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.itextpdf.text.xml; 018 019import java.io.OutputStream; 020import java.io.OutputStreamWriter; 021import java.io.PrintWriter; 022import java.io.UnsupportedEncodingException; 023 024import org.w3c.dom.Attr; 025import org.w3c.dom.Document; 026import org.w3c.dom.DocumentType; 027import org.w3c.dom.NamedNodeMap; 028import org.w3c.dom.Node; 029 030/** 031 * 032 * @author psoares 033 */ 034public class XmlDomWriter { 035 036 /** Print writer. */ 037 protected PrintWriter fOut; 038 039 /** Canonical output. */ 040 protected boolean fCanonical; 041 042 /** Processing XML 1.1 document. */ 043 protected boolean fXML11; 044 045 // 046 // Constructors 047 // 048 049 /** Default constructor. */ 050 public XmlDomWriter() { 051 } // <init>() 052 053 public XmlDomWriter(boolean canonical) { 054 fCanonical = canonical; 055 } // <init>(boolean) 056 057 // 058 // Public methods 059 // 060 061 /** Sets whether output is canonical. */ 062 public void setCanonical(boolean canonical) { 063 fCanonical = canonical; 064 } // setCanonical(boolean) 065 066 /** Sets the output stream for printing. */ 067 public void setOutput(OutputStream stream, String encoding) 068 throws UnsupportedEncodingException { 069 070 if (encoding == null) { 071 encoding = "UTF8"; 072 } 073 074 java.io.Writer writer = new OutputStreamWriter(stream, encoding); 075 fOut = new PrintWriter(writer); 076 077 } // setOutput(OutputStream,String) 078 079 /** Sets the output writer. */ 080 public void setOutput(java.io.Writer writer) { 081 082 fOut = writer instanceof PrintWriter 083 ? (PrintWriter)writer : new PrintWriter(writer); 084 085 } // setOutput(java.io.Writer) 086 087 /** Writes the specified node, recursively. */ 088 public void write(Node node) { 089 090 // is there anything to do? 091 if (node == null) { 092 return; 093 } 094 095 short type = node.getNodeType(); 096 switch (type) { 097 case Node.DOCUMENT_NODE: { 098 Document document = (Document)node; 099 fXML11 = false; //"1.1".equals(getVersion(document)); 100 if (!fCanonical) { 101 if (fXML11) { 102 fOut.println("<?xml version=\"1.1\" encoding=\"UTF-8\"?>"); 103 } else { 104 fOut.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); 105 } 106 fOut.flush(); 107 write(document.getDoctype()); 108 } 109 write(document.getDocumentElement()); 110 break; 111 } 112 113 case Node.DOCUMENT_TYPE_NODE: { 114 DocumentType doctype = (DocumentType)node; 115 fOut.print("<!DOCTYPE "); 116 fOut.print(doctype.getName()); 117 String publicId = doctype.getPublicId(); 118 String systemId = doctype.getSystemId(); 119 if (publicId != null) { 120 fOut.print(" PUBLIC '"); 121 fOut.print(publicId); 122 fOut.print("' '"); 123 fOut.print(systemId); 124 fOut.print('\''); 125 } else if (systemId != null) { 126 fOut.print(" SYSTEM '"); 127 fOut.print(systemId); 128 fOut.print('\''); 129 } 130 String internalSubset = doctype.getInternalSubset(); 131 if (internalSubset != null) { 132 fOut.println(" ["); 133 fOut.print(internalSubset); 134 fOut.print(']'); 135 } 136 fOut.println('>'); 137 break; 138 } 139 140 case Node.ELEMENT_NODE: { 141 fOut.print('<'); 142 fOut.print(node.getNodeName()); 143 Attr attrs[] = sortAttributes(node.getAttributes()); 144 for (int i = 0; i < attrs.length; i++) { 145 Attr attr = attrs[i]; 146 fOut.print(' '); 147 fOut.print(attr.getNodeName()); 148 fOut.print("=\""); 149 normalizeAndPrint(attr.getNodeValue(), true); 150 fOut.print('"'); 151 } 152 fOut.print('>'); 153 fOut.flush(); 154 155 Node child = node.getFirstChild(); 156 while (child != null) { 157 write(child); 158 child = child.getNextSibling(); 159 } 160 break; 161 } 162 163 case Node.ENTITY_REFERENCE_NODE: { 164 if (fCanonical) { 165 Node child = node.getFirstChild(); 166 while (child != null) { 167 write(child); 168 child = child.getNextSibling(); 169 } 170 } else { 171 fOut.print('&'); 172 fOut.print(node.getNodeName()); 173 fOut.print(';'); 174 fOut.flush(); 175 } 176 break; 177 } 178 179 case Node.CDATA_SECTION_NODE: { 180 if (fCanonical) { 181 normalizeAndPrint(node.getNodeValue(), false); 182 } else { 183 fOut.print("<![CDATA["); 184 fOut.print(node.getNodeValue()); 185 fOut.print("]]>"); 186 } 187 fOut.flush(); 188 break; 189 } 190 191 case Node.TEXT_NODE: { 192 normalizeAndPrint(node.getNodeValue(), false); 193 fOut.flush(); 194 break; 195 } 196 197 case Node.PROCESSING_INSTRUCTION_NODE: { 198 fOut.print("<?"); 199 fOut.print(node.getNodeName()); 200 String data = node.getNodeValue(); 201 if (data != null && data.length() > 0) { 202 fOut.print(' '); 203 fOut.print(data); 204 } 205 fOut.print("?>"); 206 fOut.flush(); 207 break; 208 } 209 210 case Node.COMMENT_NODE: { 211 if (!fCanonical) { 212 fOut.print("<!--"); 213 String comment = node.getNodeValue(); 214 if (comment != null && comment.length() > 0) { 215 fOut.print(comment); 216 } 217 fOut.print("-->"); 218 fOut.flush(); 219 } 220 } 221 } 222 223 if (type == Node.ELEMENT_NODE) { 224 fOut.print("</"); 225 fOut.print(node.getNodeName()); 226 fOut.print('>'); 227 fOut.flush(); 228 } 229 230 } // write(Node) 231 232 /** Returns a sorted list of attributes. */ 233 protected Attr[] sortAttributes(NamedNodeMap attrs) { 234 235 int len = (attrs != null) ? attrs.getLength() : 0; 236 Attr array[] = new Attr[len]; 237 for (int i = 0; i < len; i++) { 238 array[i] = (Attr)attrs.item(i); 239 } 240 for (int i = 0; i < len - 1; i++) { 241 String name = array[i].getNodeName(); 242 int index = i; 243 for (int j = i + 1; j < len; j++) { 244 String curName = array[j].getNodeName(); 245 if (curName.compareTo(name) < 0) { 246 name = curName; 247 index = j; 248 } 249 } 250 if (index != i) { 251 Attr temp = array[i]; 252 array[i] = array[index]; 253 array[index] = temp; 254 } 255 } 256 257 return array; 258 259 } // sortAttributes(NamedNodeMap):Attr[] 260 261 // 262 // Protected methods 263 // 264 265 /** Normalizes and prints the given string. */ 266 protected void normalizeAndPrint(String s, boolean isAttValue) { 267 268 int len = (s != null) ? s.length() : 0; 269 for (int i = 0; i < len; i++) { 270 char c = s.charAt(i); 271 normalizeAndPrint(c, isAttValue); 272 } 273 274 } // normalizeAndPrint(String,boolean) 275 276 /** Normalizes and print the given character. */ 277 protected void normalizeAndPrint(char c, boolean isAttValue) { 278 279 switch (c) { 280 case '<': { 281 fOut.print("<"); 282 break; 283 } 284 case '>': { 285 fOut.print(">"); 286 break; 287 } 288 case '&': { 289 fOut.print("&"); 290 break; 291 } 292 case '"': { 293 // A '"' that appears in character data 294 // does not need to be escaped. 295 if (isAttValue) { 296 fOut.print("""); 297 } else { 298 fOut.print("\""); 299 } 300 break; 301 } 302 case '\r': { 303 // If CR is part of the document's content, it 304 // must not be printed as a literal otherwise 305 // it would be normalized to LF when the document 306 // is reparsed. 307 fOut.print("
"); 308 break; 309 } 310 case '\n': { 311 if (fCanonical) { 312 fOut.print("
"); 313 break; 314 } 315 // else, default print char 316 } 317 default: { 318 // In XML 1.1, control chars in the ranges [#x1-#x1F, #x7F-#x9F] must be escaped. 319 // 320 // Escape space characters that would be normalized to #x20 in attribute values 321 // when the document is reparsed. 322 // 323 // Escape NEL (0x85) and LSEP (0x2028) that appear in content 324 // if the document is XML 1.1, since they would be normalized to LF 325 // when the document is reparsed. 326 if (fXML11 && ((c >= 0x01 && c <= 0x1F && c != 0x09 && c != 0x0A) 327 || (c >= 0x7F && c <= 0x9F) || c == 0x2028) 328 || isAttValue && (c == 0x09 || c == 0x0A)) { 329 fOut.print("&#x"); 330 fOut.print(Integer.toHexString(c).toUpperCase()); 331 fOut.print(";"); 332 } else { 333 fOut.print(c); 334 } 335 } 336 } 337 } // normalizeAndPrint(char,boolean) 338 339 /** Extracts the XML version from the Document. */ 340// protected String getVersion(Document document) { 341// if (document == null) { 342// return null; 343// } 344// String version = null; 345// Method getXMLVersion = null; 346// try { 347// getXMLVersion = document.getClass().getMethod("getXmlVersion", new Class[]{}); 348// // If Document class implements DOM L3, this method will exist. 349// if (getXMLVersion != null) { 350// version = (String) getXMLVersion.invoke(document, (Object[]) null); 351// } 352// } catch (Exception e) { 353// // Either this locator object doesn't have 354// // this method, or we're on an old JDK. 355// } 356// return version; 357// } // getVersion(Document) 358}