001/* 002 * $Id: HTMLWorker.java 4863 2011-05-12 07:01:55Z redlab_b $ 003 * 004 * This file is part of the iText (R) project. 005 * Copyright (c) 1998-2011 1T3XT BVBA 006 * Authors: Bruno Lowagie, Paulo Soares, et al. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Affero General Public License version 3 010 * as published by the Free Software Foundation with the addition of the 011 * following permission added to Section 15 as permitted in Section 7(a): 012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT, 013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS. 014 * 015 * This program is distributed in the hope that it will be useful, but 016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 017 * or FITNESS FOR A PARTICULAR PURPOSE. 018 * See the GNU Affero General Public License for more details. 019 * You should have received a copy of the GNU Affero General Public License 020 * along with this program; if not, see http://www.gnu.org/licenses or write to 021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 022 * Boston, MA, 02110-1301 USA, or download the license from the following URL: 023 * http://itextpdf.com/terms-of-use/ 024 * 025 * The interactive user interfaces in modified source and object code versions 026 * of this program must display Appropriate Legal Notices, as required under 027 * Section 5 of the GNU Affero General Public License. 028 * 029 * In accordance with Section 7(b) of the GNU Affero General Public License, 030 * a covered work must retain the producer line in every PDF that is created 031 * or manipulated using iText. 032 * 033 * You can be released from the requirements of the license by purchasing 034 * a commercial license. Buying such a license is mandatory as soon as you 035 * develop commercial activities involving the iText software without 036 * disclosing the source code of your own applications. 037 * These activities include: offering paid services to customers as an ASP, 038 * serving PDFs on the fly in a web application, shipping iText with a closed 039 * source product. 040 * 041 * For more information, please contact iText Software Corp. at this 042 * address: sales@itextpdf.com 043 */ 044package com.itextpdf.text.html.simpleparser; 045 046import java.io.IOException; 047import java.io.Reader; 048import java.util.ArrayList; 049import java.util.Collections; 050import java.util.HashMap; 051import java.util.List; 052import java.util.Map; 053import java.util.Stack; 054 055import com.itextpdf.text.Chunk; 056import com.itextpdf.text.DocListener; 057import com.itextpdf.text.DocumentException; 058import com.itextpdf.text.Element; 059import com.itextpdf.text.ExceptionConverter; 060import com.itextpdf.text.FontProvider; 061import com.itextpdf.text.Image; 062import com.itextpdf.text.ListItem; 063import com.itextpdf.text.Paragraph; 064import com.itextpdf.text.Phrase; 065import com.itextpdf.text.Rectangle; 066import com.itextpdf.text.TextElementArray; 067import com.itextpdf.text.html.HtmlTags; 068import com.itextpdf.text.html.HtmlUtilities; 069import com.itextpdf.text.log.Logger; 070import com.itextpdf.text.log.LoggerFactory; 071import com.itextpdf.text.pdf.PdfPCell; 072import com.itextpdf.text.pdf.PdfPTable; 073import com.itextpdf.text.pdf.draw.LineSeparator; 074import com.itextpdf.text.xml.simpleparser.SimpleXMLDocHandler; 075import com.itextpdf.text.xml.simpleparser.SimpleXMLParser; 076 077public class HTMLWorker implements SimpleXMLDocHandler, DocListener { 078 079 private static Logger LOGGER = LoggerFactory.getLogger(HTMLWorker.class); 080 /** 081 * DocListener that will listen to the Elements 082 * produced by parsing the HTML. 083 * This can be a com.lowagie.text.Document adding 084 * the elements to a Document directly, or an 085 * HTMLWorker instance strong the objects in a List 086 */ 087 protected DocListener document; 088 089 /** 090 * The map with all the supported tags. 091 * @since 5.0.6 092 */ 093 protected Map<String, HTMLTagProcessor> tags; 094 095 /** The object defining all the styles. */ 096 private StyleSheet style = new StyleSheet(); 097 098 /** 099 * Creates a new instance of HTMLWorker 100 * @param document A class that implements <CODE>DocListener</CODE> 101 */ 102 public HTMLWorker(final DocListener document) { 103 this(document, null, null); 104 } 105 106 /** 107 * Creates a new instance of HTMLWorker 108 * @param document A class that implements <CODE>DocListener</CODE> 109 * @param tags A map containing the supported tags 110 * @param style A StyleSheet 111 * @since 5.0.6 112 */ 113 public HTMLWorker(final DocListener document, final Map<String, HTMLTagProcessor> tags, final StyleSheet style) { 114 this.document = document; 115 setSupportedTags(tags); 116 setStyleSheet(style); 117 } 118 119 /** 120 * Sets the map with supported tags. 121 * @param tags 122 * @since 5.0.6 123 */ 124 public void setSupportedTags(Map<String, HTMLTagProcessor> tags) { 125 if (tags == null) 126 tags = new HTMLTagProcessors(); 127 this.tags = tags; 128 } 129 130 /** 131 * Setter for the StyleSheet 132 * @param style the StyleSheet 133 */ 134 public void setStyleSheet(StyleSheet style) { 135 if (style == null) 136 style = new StyleSheet(); 137 this.style = style; 138 } 139 140 /** 141 * Parses content read from a java.io.Reader object. 142 * @param reader the content 143 * @throws IOException 144 */ 145 public void parse(final Reader reader) throws IOException { 146 LOGGER.info("Please note, there is a more extended version of the HTMLWorker available in the iText XMLWorker"); 147 SimpleXMLParser.parse(this, null, reader, true); 148 } 149 150 // state machine 151 152 /** 153 * Stack with the Elements that already have been processed. 154 * @since iText 5.0.6 (private => protected) 155 */ 156 protected Stack<Element> stack = new Stack<Element>(); 157 158 /** 159 * Keeps the content of the current paragraph 160 * @since iText 5.0.6 (private => protected) 161 */ 162 protected Paragraph currentParagraph; 163 164 /** 165 * The current hierarchy chain of tags. 166 * @since 5.0.6 167 */ 168 private final ChainedProperties chain = new ChainedProperties(); 169 170 /** 171 * @see com.itextpdf.text.xml.simpleparser.SimpleXMLDocHandler#startDocument() 172 */ 173 public void startDocument() { 174 HashMap<String, String> attrs = new HashMap<String, String>(); 175 style.applyStyle(HtmlTags.BODY, attrs); 176 chain.addToChain(HtmlTags.BODY, attrs); 177 } 178 179 /** 180 * @see com.itextpdf.text.xml.simpleparser.SimpleXMLDocHandler#startElement(java.lang.String, java.util.Map) 181 */ 182 public void startElement(final String tag, final Map<String, String> attrs) { 183 HTMLTagProcessor htmlTag = tags.get(tag); 184 if (htmlTag == null) { 185 return; 186 } 187 // apply the styles to attrs 188 style.applyStyle(tag, attrs); 189 // deal with the style attribute 190 StyleSheet.resolveStyleAttribute(attrs, chain); 191 // process the tag 192 try { 193 htmlTag.startElement(this, tag, attrs); 194 } catch (DocumentException e) { 195 throw new ExceptionConverter(e); 196 } catch (IOException e) { 197 throw new ExceptionConverter(e); 198 } 199 } 200 201 /** 202 * @see com.itextpdf.text.xml.simpleparser.SimpleXMLDocHandler#text(java.lang.String) 203 */ 204 public void text(String content) { 205 if (skipText) 206 return; 207 if (currentParagraph == null) { 208 currentParagraph = createParagraph(); 209 } 210 if (!insidePRE) { 211 // newlines and carriage returns are ignored 212 if (content.trim().length() == 0 && content.indexOf(' ') < 0) { 213 return; 214 } 215 content = HtmlUtilities.eliminateWhiteSpace(content); 216 } 217 Chunk chunk = createChunk(content); 218 currentParagraph.add(chunk); 219 } 220 221 /** 222 * @see com.itextpdf.text.xml.simpleparser.SimpleXMLDocHandler#endElement(java.lang.String) 223 */ 224 public void endElement(final String tag) { 225 HTMLTagProcessor htmlTag = tags.get(tag); 226 if (htmlTag == null) { 227 return; 228 } 229 // process the tag 230 try { 231 htmlTag.endElement(this, tag); 232 } catch (DocumentException e) { 233 throw new ExceptionConverter(e); 234 } 235 } 236 237 /** 238 * @see com.itextpdf.text.xml.simpleparser.SimpleXMLDocHandler#endDocument() 239 */ 240 public void endDocument() { 241 try { 242 // flush the stack 243 for (int k = 0; k < stack.size(); ++k) 244 document.add(stack.elementAt(k)); 245 // add current paragraph 246 if (currentParagraph != null) 247 document.add(currentParagraph); 248 currentParagraph = null; 249 } catch (Exception e) { 250 throw new ExceptionConverter(e); 251 } 252 } 253 254 // stack and current paragraph operations 255 256 /** 257 * Adds a new line to the currentParagraph. 258 * @since 5.0.6 259 */ 260 public void newLine() { 261 if (currentParagraph == null) { 262 currentParagraph = new Paragraph(); 263 } 264 currentParagraph.add(createChunk("\n")); 265 } 266 267 /** 268 * Flushes the current paragraph, indicating that we're starting 269 * a new block. 270 * If the stack is empty, the paragraph is added to the document. 271 * Otherwise the Paragraph is added to the stack. 272 * @since 5.0.6 273 */ 274 public void carriageReturn() throws DocumentException { 275 if (currentParagraph == null) 276 return; 277 if (stack.empty()) 278 document.add(currentParagraph); 279 else { 280 Element obj = stack.pop(); 281 if (obj instanceof TextElementArray) { 282 TextElementArray current = (TextElementArray) obj; 283 current.add(currentParagraph); 284 } 285 stack.push(obj); 286 } 287 currentParagraph = null; 288 } 289 290 /** 291 * Stacks the current paragraph, indicating that we're starting 292 * a new span. 293 * @since 5.0.6 294 */ 295 public void flushContent() { 296 pushToStack(currentParagraph); 297 currentParagraph = new Paragraph(); 298 } 299 300 /** 301 * Pushes an element to the Stack. 302 * @param element 303 * @since 5.0.6 304 */ 305 public void pushToStack(final Element element) { 306 if (element != null) 307 stack.push(element); 308 } 309 310 /** 311 * Updates the chain with a new tag and new attributes. 312 * @param tag the new tag 313 * @param attrs the corresponding attributes 314 * @since 5.0.6 315 */ 316 public void updateChain(final String tag, final Map<String, String> attrs) { 317 chain.addToChain(tag, attrs); 318 } 319 320 /** 321 * Updates the chain by removing a tag. 322 * @param tag the new tag 323 * @since 5.0.6 324 */ 325 public void updateChain(final String tag) { 326 chain.removeChain(tag); 327 } 328 329 // providers that help find resources such as images and fonts 330 331 /** 332 * Key used to store the image provider in the providers map. 333 * @since 5.0.6 334 */ 335 public static final String IMG_PROVIDER = "img_provider"; 336 337 /** 338 * Key used to store the image processor in the providers map. 339 * @since 5.0.6 340 */ 341 public static final String IMG_PROCESSOR = "img_interface"; 342 343 /** 344 * Key used to store the image store in the providers map. 345 * @since 5.0.6 346 */ 347 public static final String IMG_STORE = "img_static"; 348 349 /** 350 * Key used to store the image baseurl provider in the providers map. 351 * @since 5.0.6 352 */ 353 public static final String IMG_BASEURL = "img_baseurl"; 354 355 /** 356 * Key used to store the font provider in the providers map. 357 * @since 5.0.6 358 */ 359 public static final String FONT_PROVIDER = "font_factory"; 360 361 /** 362 * Key used to store the link provider in the providers map. 363 * @since 5.0.6 364 */ 365 public static final String LINK_PROVIDER = "alink_interface"; 366 367 /** 368 * Map containing providers such as a FontProvider or ImageProvider. 369 * @since 5.0.6 (renamed from interfaceProps) 370 */ 371 private Map<String, Object> providers = new HashMap<String, Object>(); 372 373 /** 374 * Setter for the providers. 375 * If a FontProvider is added, the ElementFactory is updated. 376 * @param providers a Map with different providers 377 * @since 5.0.6 378 */ 379 public void setProviders(final Map<String, Object> providers) { 380 if (providers == null) 381 return; 382 this.providers = providers; 383 FontProvider ff = null; 384 if (providers != null) 385 ff = (FontProvider) providers.get(FONT_PROVIDER); 386 if (ff != null) 387 factory.setFontProvider(ff); 388 } 389 390 // factory that helps create objects 391 392 /** 393 * Factory that is able to create iText Element objects. 394 * @since 5.0.6 395 */ 396 private final ElementFactory factory = new ElementFactory(); 397 398 /** 399 * Creates a Chunk using the factory. 400 * @param content the content of the chunk 401 * @return a Chunk with content 402 * @since 5.0.6 403 */ 404 public Chunk createChunk(final String content) { 405 return factory.createChunk(content, chain); 406 } 407 /** 408 * Creates a Paragraph using the factory. 409 * @return a Paragraph without any content 410 * @since 5.0.6 411 */ 412 public Paragraph createParagraph() { 413 return factory.createParagraph(chain); 414 } 415 /** 416 * Creates a List object. 417 * @param tag should be "ol" or "ul" 418 * @return a List object 419 * @since 5.0.6 420 */ 421 public com.itextpdf.text.List createList(final String tag) { 422 return factory.createList(tag, chain); 423 } 424 /** 425 * Creates a ListItem object. 426 * @return a ListItem object 427 * @since 5.0.6 428 */ 429 public ListItem createListItem() { 430 return factory.createListItem(chain); 431 } 432 /** 433 * Creates a LineSeparator object. 434 * @param attrs properties of the LineSeparator 435 * @return a LineSeparator object 436 * @since 5.0.6 437 */ 438 public LineSeparator createLineSeparator(final Map<String, String> attrs) { 439 return factory.createLineSeparator(attrs, currentParagraph.getLeading()/2); 440 } 441 442 /** 443 * Creates an Image object. 444 * @param attrs properties of the Image 445 * @return an Image object (or null if the Image couldn't be found) 446 * @throws DocumentException 447 * @throws IOException 448 * @since 5.0.6 449 */ 450 public Image createImage(final Map<String, String> attrs) throws DocumentException, IOException { 451 String src = attrs.get(HtmlTags.SRC); 452 if (src == null) 453 return null; 454 Image img = factory.createImage( 455 src, attrs, chain, document, 456 (ImageProvider)providers.get(IMG_PROVIDER), 457 (ImageStore)providers.get(IMG_STORE), 458 (String)providers.get(IMG_BASEURL)); 459 return img; 460 } 461 462 /** 463 * Creates a Cell. 464 * @param tag the tag 465 * @return a CellWrapper object 466 * @since 5.0.6 467 */ 468 public CellWrapper createCell(final String tag) { 469 return new CellWrapper(tag, chain); 470 } 471 472 // processing objects 473 474 /** 475 * Adds a link to the current paragraph. 476 * @since 5.0.6 477 */ 478 public void processLink() { 479 if (currentParagraph == null) { 480 currentParagraph = new Paragraph(); 481 } 482 // The link provider allows you to do additional processing 483 LinkProcessor i = (LinkProcessor) providers.get(HTMLWorker.LINK_PROVIDER); 484 if (i == null || !i.process(currentParagraph, chain)) { 485 // sets an Anchor for all the Chunks in the current paragraph 486 String href = chain.getProperty(HtmlTags.HREF); 487 if (href != null) { 488 for (Chunk ck : currentParagraph.getChunks()) { 489 ck.setAnchor(href); 490 } 491 } 492 } 493 // a link should be added to the current paragraph as a phrase 494 if (stack.isEmpty()) { 495 // no paragraph to add too, 'a' tag is first element 496 Paragraph tmp = new Paragraph(new Phrase(currentParagraph)); 497 currentParagraph = tmp; 498 } else { 499 Paragraph tmp = (Paragraph) stack.pop(); 500 tmp.add(new Phrase(currentParagraph)); 501 currentParagraph = tmp; 502 } 503 } 504 505 /** 506 * Fetches the List from the Stack and adds it to 507 * the TextElementArray on top of the Stack, 508 * or to the Document if the Stack is empty. 509 * @throws DocumentException 510 * @since 5.0.6 511 */ 512 public void processList() throws DocumentException { 513 if (stack.empty()) 514 return; 515 Element obj = stack.pop(); 516 if (!(obj instanceof com.itextpdf.text.List)) { 517 stack.push(obj); 518 return; 519 } 520 if (stack.empty()) 521 document.add(obj); 522 else 523 ((TextElementArray) stack.peek()).add(obj); 524 } 525 526 /** 527 * Looks for the List object on the Stack, 528 * and adds the ListItem to the List. 529 * @throws DocumentException 530 * @since 5.0.6 531 */ 532 public void processListItem() throws DocumentException { 533 if (stack.empty()) 534 return; 535 Element obj = stack.pop(); 536 if (!(obj instanceof ListItem)) { 537 stack.push(obj); 538 return; 539 } 540 if (stack.empty()) { 541 document.add(obj); 542 return; 543 } 544 ListItem item = (ListItem) obj; 545 Element list = stack.pop(); 546 if (!(list instanceof com.itextpdf.text.List)) { 547 stack.push(list); 548 return; 549 } 550 ((com.itextpdf.text.List) list).add(item); 551 item.adjustListSymbolFont(); 552 stack.push(list); 553 } 554 555 /** 556 * Processes an Image. 557 * @param img 558 * @param attrs 559 * @throws DocumentException 560 * @since 5.0.6 561 */ 562 public void processImage(final Image img, final Map<String, String> attrs) throws DocumentException { 563 ImageProcessor processor = (ImageProcessor)providers.get(HTMLWorker.IMG_PROCESSOR); 564 if (processor == null || !processor.process(img, attrs, chain, document)) { 565 String align = attrs.get(HtmlTags.ALIGN); 566 if (align != null) { 567 carriageReturn(); 568 } 569 if (currentParagraph == null) { 570 currentParagraph = createParagraph(); 571 } 572 currentParagraph.add(new Chunk(img, 0, 0, true)); 573 currentParagraph.setAlignment(HtmlUtilities.alignmentValue(align)); 574 if (align != null) { 575 carriageReturn(); 576 } 577 } 578 } 579 580 /** 581 * Processes the Table. 582 * @throws DocumentException 583 * @since 5.0.6 584 */ 585 public void processTable() throws DocumentException{ 586 TableWrapper table = (TableWrapper) stack.pop(); 587 PdfPTable tb = table.createTable(); 588 tb.setSplitRows(true); 589 if (stack.empty()) 590 document.add(tb); 591 else 592 ((TextElementArray) stack.peek()).add(tb); 593 } 594 595 /** 596 * Gets the TableWrapper from the Stack and adds a new row. 597 * @since 5.0.6 598 */ 599 public void processRow() { 600 ArrayList<PdfPCell> row = new ArrayList<PdfPCell>(); 601 ArrayList<Float> cellWidths = new ArrayList<Float>(); 602 boolean percentage = false; 603 float width; 604 float totalWidth = 0; 605 int zeroWidth = 0; 606 TableWrapper table = null; 607 while (true) { 608 Element obj = stack.pop(); 609 if (obj instanceof CellWrapper) { 610 CellWrapper cell = (CellWrapper)obj; 611 width = cell.getWidth(); 612 cellWidths.add(new Float(width)); 613 percentage |= cell.isPercentage(); 614 if (width == 0) { 615 zeroWidth++; 616 } 617 else { 618 totalWidth += width; 619 } 620 row.add(cell.getCell()); 621 } 622 if (obj instanceof TableWrapper) { 623 table = (TableWrapper) obj; 624 break; 625 } 626 } 627 table.addRow(row); 628 if (cellWidths.size() > 0) { 629 // cells come off the stack in reverse, naturally 630 totalWidth = 100 - totalWidth; 631 Collections.reverse(cellWidths); 632 float[] widths = new float[cellWidths.size()]; 633 boolean hasZero = false; 634 for (int i = 0; i < widths.length; i++) { 635 widths[i] = cellWidths.get(i).floatValue(); 636 if (widths[i] == 0 && percentage && zeroWidth > 0) { 637 widths[i] = totalWidth / zeroWidth; 638 } 639 if (widths[i] == 0) { 640 hasZero = true; 641 break; 642 } 643 } 644 if (!hasZero) 645 table.setColWidths(widths); 646 } 647 stack.push(table); 648 } 649 650 // state variables and methods 651 652 /** Stack to keep track of table tags. */ 653 private final Stack<boolean[]> tableState = new Stack<boolean[]>(); 654 655 /** Boolean to keep track of TR tags. */ 656 private boolean pendingTR = false; 657 658 /** Boolean to keep track of TD and TH tags */ 659 private boolean pendingTD = false; 660 661 /** Boolean to keep track of LI tags */ 662 private boolean pendingLI = false; 663 664 /** 665 * Boolean to keep track of PRE tags 666 * @since 5.0.6 renamed from isPRE 667 */ 668 private boolean insidePRE = false; 669 670 /** 671 * Indicates if text needs to be skipped. 672 * @since iText 5.0.6 (private => protected) 673 */ 674 protected boolean skipText = false; 675 676 /** 677 * Pushes the values of pendingTR and pendingTD 678 * to a state stack. 679 * @since 5.0.6 680 */ 681 public void pushTableState() { 682 tableState.push(new boolean[] { pendingTR, pendingTD }); 683 } 684 685 /** 686 * Pops the values of pendingTR and pendingTD 687 * from a state stack. 688 * @since 5.0.6 689 */ 690 public void popTableState() { 691 boolean[] state = tableState.pop(); 692 pendingTR = state[0]; 693 pendingTD = state[1]; 694 } 695 696 /** 697 * @return the pendingTR 698 * @since 5.0.6 699 */ 700 public boolean isPendingTR() { 701 return pendingTR; 702 } 703 704 /** 705 * @param pendingTR the pendingTR to set 706 * @since 5.0.6 707 */ 708 public void setPendingTR(final boolean pendingTR) { 709 this.pendingTR = pendingTR; 710 } 711 712 /** 713 * @return the pendingTD 714 * @since 5.0.6 715 */ 716 public boolean isPendingTD() { 717 return pendingTD; 718 } 719 720 /** 721 * @param pendingTD the pendingTD to set 722 * @since 5.0.6 723 */ 724 public void setPendingTD(final boolean pendingTD) { 725 this.pendingTD = pendingTD; 726 } 727 728 /** 729 * @return the pendingLI 730 * @since 5.0.6 731 */ 732 public boolean isPendingLI() { 733 return pendingLI; 734 } 735 736 /** 737 * @param pendingLI the pendingLI to set 738 * @since 5.0.6 739 */ 740 public void setPendingLI(final boolean pendingLI) { 741 this.pendingLI = pendingLI; 742 } 743 744 /** 745 * @return the insidePRE 746 * @since 5.0.6 747 */ 748 public boolean isInsidePRE() { 749 return insidePRE; 750 } 751 752 /** 753 * @param insidePRE the insidePRE to set 754 * @since 5.0.6 755 */ 756 public void setInsidePRE(final boolean insidePRE) { 757 this.insidePRE = insidePRE; 758 } 759 760 /** 761 * @return the skipText 762 * @since 5.0.6 763 */ 764 public boolean isSkipText() { 765 return skipText; 766 } 767 768 /** 769 * @param skipText the skipText to set 770 * @since 5.0.6 771 */ 772 public void setSkipText(final boolean skipText) { 773 this.skipText = skipText; 774 } 775 776 // static methods to parse HTML to a List of Element objects. 777 778 /** The resulting list of elements. */ 779 protected List<Element> objectList; 780 781 /** 782 * Parses an HTML source to a List of Element objects 783 * @param reader the HTML source 784 * @param style a StyleSheet object 785 * @return a List of Element objects 786 * @throws IOException 787 */ 788 public static List<Element> parseToList(final Reader reader, final StyleSheet style) 789 throws IOException { 790 return parseToList(reader, style, null); 791 } 792 793 /** 794 * Parses an HTML source to a List of Element objects 795 * @param reader the HTML source 796 * @param style a StyleSheet object 797 * @param providers map containing classes with extra info 798 * @return a List of Element objects 799 * @throws IOException 800 */ 801 public static List<Element> parseToList(final Reader reader, final StyleSheet style, 802 final HashMap<String, Object> providers) throws IOException { 803 return parseToList(reader, style, null, providers); 804 } 805 806 /** 807 * Parses an HTML source to a List of Element objects 808 * @param reader the HTML source 809 * @param style a StyleSheet object 810 * @param tags a map containing supported tags and their processors 811 * @param providers map containing classes with extra info 812 * @return a List of Element objects 813 * @throws IOException 814 * @since 5.0.6 815 */ 816 public static List<Element> parseToList(final Reader reader, final StyleSheet style, 817 final Map<String, HTMLTagProcessor> tags, final HashMap<String, Object> providers) throws IOException { 818 HTMLWorker worker = new HTMLWorker(null, tags, style); 819 worker.document = worker; 820 worker.setProviders(providers); 821 worker.objectList = new ArrayList<Element>(); 822 worker.parse(reader); 823 return worker.objectList; 824 } 825 826 // DocListener interface 827 828 /** 829 * @see com.itextpdf.text.ElementListener#add(com.itextpdf.text.Element) 830 */ 831 public boolean add(final Element element) throws DocumentException { 832 objectList.add(element); 833 return true; 834 } 835 836 /** 837 * @see com.itextpdf.text.DocListener#close() 838 */ 839 public void close() { 840 } 841 842 /** 843 * @see com.itextpdf.text.DocListener#newPage() 844 */ 845 public boolean newPage() { 846 return true; 847 } 848 849 /** 850 * @see com.itextpdf.text.DocListener#open() 851 */ 852 public void open() { 853 } 854 855 /** 856 * @see com.itextpdf.text.DocListener#resetPageCount() 857 */ 858 public void resetPageCount() { 859 } 860 861 /** 862 * @see com.itextpdf.text.DocListener#setMarginMirroring(boolean) 863 */ 864 public boolean setMarginMirroring(final boolean marginMirroring) { 865 return false; 866 } 867 868 /** 869 * @see com.itextpdf.text.DocListener#setMarginMirroring(boolean) 870 * @since 2.1.6 871 */ 872 public boolean setMarginMirroringTopBottom(final boolean marginMirroring) { 873 return false; 874 } 875 876 /** 877 * @see com.itextpdf.text.DocListener#setMargins(float, float, float, float) 878 */ 879 public boolean setMargins(final float marginLeft, final float marginRight, 880 final float marginTop, final float marginBottom) { 881 return true; 882 } 883 884 /** 885 * @see com.itextpdf.text.DocListener#setPageCount(int) 886 */ 887 public void setPageCount(final int pageN) { 888 } 889 890 /** 891 * @see com.itextpdf.text.DocListener#setPageSize(com.itextpdf.text.Rectangle) 892 */ 893 public boolean setPageSize(final Rectangle pageSize) { 894 return true; 895 } 896 897 // deprecated methods 898 899 /** 900 * Sets the providers. 901 * @deprecated use setProviders() instead 902 */ 903 @Deprecated 904 public void setInterfaceProps(final HashMap<String, Object> providers) { 905 setProviders(providers); 906 } 907 /** 908 * Gets the providers 909 * @deprecated use getProviders() instead 910 */ 911 @Deprecated 912 public Map<String, Object> getInterfaceProps() { 913 return providers; 914 } 915 916}