001/* 002 * $Id: HTMLWorker.java 4666 2011-01-29 12:53:09Z blowagie $ 003 * 004 * This file is part of the iText (R) project. 005 * Copyright (c) 1998-2011 1T3XT BVBA 006 * Authors: Bruno Lowagie, Paulo Soares, et al. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Affero General Public License version 3 010 * as published by the Free Software Foundation with the addition of the 011 * following permission added to Section 15 as permitted in Section 7(a): 012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT, 013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS. 014 * 015 * This program is distributed in the hope that it will be useful, but 016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 017 * or FITNESS FOR A PARTICULAR PURPOSE. 018 * See the GNU Affero General Public License for more details. 019 * You should have received a copy of the GNU Affero General Public License 020 * along with this program; if not, see http://www.gnu.org/licenses or write to 021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 022 * Boston, MA, 02110-1301 USA, or download the license from the following URL: 023 * http://itextpdf.com/terms-of-use/ 024 * 025 * The interactive user interfaces in modified source and object code versions 026 * of this program must display Appropriate Legal Notices, as required under 027 * Section 5 of the GNU Affero General Public License. 028 * 029 * In accordance with Section 7(b) of the GNU Affero General Public License, 030 * a covered work must retain the producer line in every PDF that is created 031 * or manipulated using iText. 032 * 033 * You can be released from the requirements of the license by purchasing 034 * a commercial license. Buying such a license is mandatory as soon as you 035 * develop commercial activities involving the iText software without 036 * disclosing the source code of your own applications. 037 * These activities include: offering paid services to customers as an ASP, 038 * serving PDFs on the fly in a web application, shipping iText with a closed 039 * source product. 040 * 041 * For more information, please contact iText Software Corp. at this 042 * address: sales@itextpdf.com 043 */ 044package com.itextpdf.text.html.simpleparser; 045 046import java.io.IOException; 047import java.util.HashMap; 048import java.util.Map; 049 050import com.itextpdf.text.DocumentException; 051import com.itextpdf.text.html.HtmlTags; 052 053/** 054 * This class maps tags such as div and span to their corresponding 055 * TagProcessor classes. 056 * @since 5.0.6 057 */ 058public class HTMLTagProcessors extends HashMap<String, HTMLTagProcessor> { 059 060 /** 061 * Creates a Map containing supported tags. 062 */ 063 public HTMLTagProcessors() { 064 super(); 065 put(HtmlTags.A, A); 066 put(HtmlTags.B, EM_STRONG_STRIKE_SUP_SUP); 067 put(HtmlTags.BODY, DIV); 068 put(HtmlTags.BR, BR); 069 put(HtmlTags.DIV, DIV); 070 put(HtmlTags.EM, EM_STRONG_STRIKE_SUP_SUP); 071 put(HtmlTags.FONT, SPAN); 072 put(HtmlTags.H1, H); 073 put(HtmlTags.H2, H); 074 put(HtmlTags.H3, H); 075 put(HtmlTags.H4, H); 076 put(HtmlTags.H5, H); 077 put(HtmlTags.H6, H); 078 put(HtmlTags.HR, HR); 079 put(HtmlTags.I, EM_STRONG_STRIKE_SUP_SUP); 080 put(HtmlTags.IMG, IMG); 081 put(HtmlTags.LI, LI); 082 put(HtmlTags.OL, UL_OL); 083 put(HtmlTags.P, DIV); 084 put(HtmlTags.PRE, PRE); 085 put(HtmlTags.S, EM_STRONG_STRIKE_SUP_SUP); 086 put(HtmlTags.SPAN, SPAN); 087 put(HtmlTags.STRIKE, EM_STRONG_STRIKE_SUP_SUP); 088 put(HtmlTags.STRONG, EM_STRONG_STRIKE_SUP_SUP); 089 put(HtmlTags.SUB, EM_STRONG_STRIKE_SUP_SUP); 090 put(HtmlTags.SUP, EM_STRONG_STRIKE_SUP_SUP); 091 put(HtmlTags.TABLE, TABLE); 092 put(HtmlTags.TD, TD); 093 put(HtmlTags.TH, TD); 094 put(HtmlTags.TR, TR); 095 put(HtmlTags.U, EM_STRONG_STRIKE_SUP_SUP); 096 put(HtmlTags.UL, UL_OL); 097 } 098 099 /** 100 * Object that processes the following tags: 101 * i, em, b, strong, s, strike, u, sup, sub 102 */ 103 public static final HTMLTagProcessor EM_STRONG_STRIKE_SUP_SUP = new HTMLTagProcessor() { 104 /** 105 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 106 */ 107 public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) { 108 tag = mapTag(tag); 109 attrs.put(tag, null); 110 worker.updateChain(tag, attrs); 111 } 112 /** 113 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 114 */ 115 public void endElement(HTMLWorker worker, String tag) { 116 tag = mapTag(tag); 117 worker.updateChain(tag); 118 } 119 /** 120 * Maps em to i, strong to b, and strike to s. 121 * This is a convention: the style parser expects i, b and s. 122 * @param tag the original tag 123 * @return the mapped tag 124 */ 125 private String mapTag(String tag) { 126 if (HtmlTags.EM.equalsIgnoreCase(tag)) 127 return HtmlTags.I; 128 if (HtmlTags.STRONG.equalsIgnoreCase(tag)) 129 return HtmlTags.B; 130 if (HtmlTags.STRIKE.equalsIgnoreCase(tag)) 131 return HtmlTags.S; 132 return tag; 133 } 134 135 }; 136 137 /** 138 * Object that processes the a tag. 139 */ 140 public static final HTMLTagProcessor A = new HTMLTagProcessor() { 141 /** 142 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 143 */ 144 public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) { 145 worker.updateChain(tag, attrs); 146 worker.flushContent(); 147 } 148 /** 149 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 150 */ 151 public void endElement(HTMLWorker worker, String tag) { 152 worker.processLink(); 153 worker.updateChain(tag); 154 } 155 }; 156 157 /** 158 * Object that processes the br tag. 159 */ 160 public static final HTMLTagProcessor BR = new HTMLTagProcessor(){ 161 /** 162 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map) 163 */ 164 public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) { 165 worker.newLine(); 166 } 167 /** 168 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 169 */ 170 public void endElement(HTMLWorker worker, String tag) { 171 } 172 173 }; 174 175 public static final HTMLTagProcessor UL_OL = new HTMLTagProcessor(){ 176 177 /** 178 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map) 179 */ 180 public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException { 181 worker.carriageReturn(); 182 if (worker.isPendingLI()) 183 worker.endElement(HtmlTags.LI); 184 worker.setSkipText(true); 185 worker.updateChain(tag, attrs);; 186 worker.pushToStack(worker.createList(tag)); 187 } 188 189 /** 190 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 191 */ 192 public void endElement(HTMLWorker worker, String tag) throws DocumentException { 193 worker.carriageReturn(); 194 if (worker.isPendingLI()) 195 worker.endElement(HtmlTags.LI); 196 worker.setSkipText(false); 197 worker.updateChain(tag); 198 worker.processList(); 199 } 200 201 }; 202 203 public static final HTMLTagProcessor HR = new HTMLTagProcessor(){ 204 205 public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException { 206 worker.carriageReturn(); 207 worker.pushToStack(worker.createLineSeparator(attrs)); 208 } 209 210 public void endElement(HTMLWorker worker, String tag) { 211 } 212 213 }; 214 215 public static final HTMLTagProcessor SPAN = new HTMLTagProcessor(){ 216 217 /** 218 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map) 219 */ 220 public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) { 221 worker.updateChain(tag, attrs); 222 } 223 224 /** 225 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 226 */ 227 public void endElement(HTMLWorker worker, String tag) { 228 worker.updateChain(tag); 229 } 230 231 }; 232 233 public static final HTMLTagProcessor H = new HTMLTagProcessor(){ 234 235 /** 236 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map) 237 */ 238 public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException { 239 worker.carriageReturn(); 240 if (!attrs.containsKey(HtmlTags.SIZE)) { 241 int v = 7 - Integer.parseInt(tag.substring(1)); 242 attrs.put(HtmlTags.SIZE, Integer.toString(v)); 243 } 244 worker.updateChain(tag, attrs); 245 } 246 247 /** 248 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 249 */ 250 public void endElement(HTMLWorker worker, String tag) throws DocumentException { 251 worker.carriageReturn(); 252 worker.updateChain(tag); 253 } 254 255 }; 256 257 public static final HTMLTagProcessor LI = new HTMLTagProcessor(){ 258 259 /** 260 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map) 261 */ 262 public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException { 263 worker.carriageReturn(); 264 if (worker.isPendingLI()) 265 worker.endElement(tag); 266 worker.setSkipText(false); 267 worker.setPendingLI(true); 268 worker.updateChain(tag, attrs); 269 worker.pushToStack(worker.createListItem()); 270 } 271 272 /** 273 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 274 */ 275 public void endElement(HTMLWorker worker, String tag) throws DocumentException { 276 worker.carriageReturn(); 277 worker.setPendingLI(false); 278 worker.setSkipText(true); 279 worker.updateChain(tag); 280 worker.processListItem(); 281 } 282 283 }; 284 285 public static final HTMLTagProcessor PRE = new HTMLTagProcessor(){ 286 287 /** 288 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map) 289 */ 290 public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException { 291 worker.carriageReturn(); 292 if (!attrs.containsKey(HtmlTags.FACE)) { 293 attrs.put(HtmlTags.FACE, "Courier"); 294 } 295 worker.updateChain(tag, attrs); 296 worker.setInsidePRE(true); 297 } 298 299 /** 300 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 301 */ 302 public void endElement(HTMLWorker worker, String tag) throws DocumentException { 303 worker.carriageReturn(); 304 worker.updateChain(tag); 305 worker.setInsidePRE(false); 306 } 307 308 }; 309 310 public static final HTMLTagProcessor DIV = new HTMLTagProcessor(){ 311 312 /** 313 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map) 314 */ 315 public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException { 316 worker.carriageReturn(); 317 worker.updateChain(tag, attrs); 318 } 319 320 /** 321 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 322 */ 323 public void endElement(HTMLWorker worker, String tag) throws DocumentException { 324 worker.carriageReturn(); 325 worker.updateChain(tag); 326 } 327 328 }; 329 330 331 public static final HTMLTagProcessor TABLE = new HTMLTagProcessor(){ 332 333 /** 334 * @throws DocumentException 335 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map) 336 */ 337 public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException { 338 worker.carriageReturn(); 339 TableWrapper table = new TableWrapper(attrs); 340 worker.pushToStack(table); 341 worker.pushTableState(); 342 worker.setPendingTD(false); 343 worker.setPendingTR(false); 344 worker.setSkipText(true); 345 // Table alignment should not affect children elements, thus remove 346 attrs.remove(HtmlTags.ALIGN); 347 // In case this is a nested table reset colspan and rowspan 348 attrs.put(HtmlTags.COLSPAN, "1"); 349 attrs.put(HtmlTags.ROWSPAN, "1"); 350 worker.updateChain(tag, attrs); 351 } 352 353 /** 354 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 355 */ 356 public void endElement(HTMLWorker worker, String tag) throws DocumentException { 357 worker.carriageReturn(); 358 if (worker.isPendingTR()) 359 worker.endElement(HtmlTags.TR); 360 worker.updateChain(tag); 361 worker.processTable(); 362 worker.popTableState(); 363 worker.setSkipText(false); 364 } 365 366 }; 367 public static final HTMLTagProcessor TR = new HTMLTagProcessor(){ 368 369 /** 370 * @throws DocumentException 371 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map) 372 */ 373 public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException { 374 worker.carriageReturn(); 375 if (worker.isPendingTR()) 376 worker.endElement(tag); 377 worker.setSkipText(true); 378 worker.setPendingTR(true); 379 worker.updateChain(tag, attrs); 380 } 381 382 /** 383 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 384 */ 385 public void endElement(HTMLWorker worker, String tag) throws DocumentException { 386 worker.carriageReturn(); 387 if (worker.isPendingTD()) 388 worker.endElement(HtmlTags.TD); 389 worker.setPendingTR(false); 390 worker.updateChain(tag); 391 worker.processRow(); 392 worker.setSkipText(true); 393 } 394 395 }; 396 public static final HTMLTagProcessor TD = new HTMLTagProcessor(){ 397 398 /** 399 * @throws DocumentException 400 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map) 401 */ 402 public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException { 403 worker.carriageReturn(); 404 if (worker.isPendingTD()) 405 worker.endElement(tag); 406 worker.setSkipText(false); 407 worker.setPendingTD(true); 408 worker.updateChain(HtmlTags.TD, attrs); 409 worker.pushToStack(worker.createCell(tag)); 410 } 411 412 /** 413 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 414 */ 415 public void endElement(HTMLWorker worker, String tag) throws DocumentException { 416 worker.carriageReturn(); 417 worker.setPendingTD(false); 418 worker.updateChain(HtmlTags.TD); 419 worker.setSkipText(true); 420 } 421 422 }; 423 424 public static final HTMLTagProcessor IMG = new HTMLTagProcessor(){ 425 426 /** 427 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map) 428 */ 429 public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException, IOException { 430 worker.updateChain(tag, attrs); 431 worker.processImage(worker.createImage(attrs), attrs); 432 worker.updateChain(tag); 433 } 434 435 /** 436 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String) 437 */ 438 public void endElement(HTMLWorker worker, String tag) { 439 } 440 441 }; 442 443 /** Serial version UID. */ 444 private static final long serialVersionUID = -959260811961222824L; 445}