001/*
002 * $Id: HTMLWorker.java 4666 2011-01-29 12:53:09Z blowagie $
003 *
004 * This file is part of the iText (R) project.
005 * Copyright (c) 1998-2011 1T3XT BVBA
006 * Authors: Bruno Lowagie, Paulo Soares, et al.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU Affero General Public License version 3
010 * as published by the Free Software Foundation with the addition of the
011 * following permission added to Section 15 as permitted in Section 7(a):
012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT,
013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS.
014 *
015 * This program is distributed in the hope that it will be useful, but
016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
017 * or FITNESS FOR A PARTICULAR PURPOSE.
018 * See the GNU Affero General Public License for more details.
019 * You should have received a copy of the GNU Affero General Public License
020 * along with this program; if not, see http://www.gnu.org/licenses or write to
021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
022 * Boston, MA, 02110-1301 USA, or download the license from the following URL:
023 * http://itextpdf.com/terms-of-use/
024 *
025 * The interactive user interfaces in modified source and object code versions
026 * of this program must display Appropriate Legal Notices, as required under
027 * Section 5 of the GNU Affero General Public License.
028 *
029 * In accordance with Section 7(b) of the GNU Affero General Public License,
030 * a covered work must retain the producer line in every PDF that is created
031 * or manipulated using iText.
032 *
033 * You can be released from the requirements of the license by purchasing
034 * a commercial license. Buying such a license is mandatory as soon as you
035 * develop commercial activities involving the iText software without
036 * disclosing the source code of your own applications.
037 * These activities include: offering paid services to customers as an ASP,
038 * serving PDFs on the fly in a web application, shipping iText with a closed
039 * source product.
040 *
041 * For more information, please contact iText Software Corp. at this
042 * address: sales@itextpdf.com
043 */
044package com.itextpdf.text.html.simpleparser;
045
046import java.io.IOException;
047import java.util.HashMap;
048import java.util.Map;
049
050import com.itextpdf.text.DocumentException;
051import com.itextpdf.text.html.HtmlTags;
052
053/**
054 * This class maps tags such as div and span to their corresponding
055 * TagProcessor classes.
056 * @since 5.0.6
057 */
058public class HTMLTagProcessors extends HashMap<String, HTMLTagProcessor> {
059
060        /**
061         * Creates a Map containing supported tags.
062         */
063        public HTMLTagProcessors() {
064                super();
065                put(HtmlTags.A, A);
066                put(HtmlTags.B, EM_STRONG_STRIKE_SUP_SUP);
067                put(HtmlTags.BODY, DIV);
068                put(HtmlTags.BR, BR);
069                put(HtmlTags.DIV, DIV);
070                put(HtmlTags.EM, EM_STRONG_STRIKE_SUP_SUP);
071                put(HtmlTags.FONT, SPAN);
072                put(HtmlTags.H1, H);
073                put(HtmlTags.H2, H);
074                put(HtmlTags.H3, H);
075                put(HtmlTags.H4, H);
076                put(HtmlTags.H5, H);
077                put(HtmlTags.H6, H);
078                put(HtmlTags.HR, HR);
079                put(HtmlTags.I, EM_STRONG_STRIKE_SUP_SUP);
080                put(HtmlTags.IMG, IMG);
081                put(HtmlTags.LI, LI);
082                put(HtmlTags.OL, UL_OL);
083                put(HtmlTags.P, DIV);
084                put(HtmlTags.PRE, PRE);
085                put(HtmlTags.S, EM_STRONG_STRIKE_SUP_SUP);
086                put(HtmlTags.SPAN, SPAN);
087                put(HtmlTags.STRIKE, EM_STRONG_STRIKE_SUP_SUP);
088                put(HtmlTags.STRONG, EM_STRONG_STRIKE_SUP_SUP);
089                put(HtmlTags.SUB, EM_STRONG_STRIKE_SUP_SUP);
090                put(HtmlTags.SUP, EM_STRONG_STRIKE_SUP_SUP);
091                put(HtmlTags.TABLE, TABLE);
092                put(HtmlTags.TD, TD);
093                put(HtmlTags.TH, TD);
094                put(HtmlTags.TR, TR);
095                put(HtmlTags.U, EM_STRONG_STRIKE_SUP_SUP);
096                put(HtmlTags.UL, UL_OL);
097        }
098
099        /**
100         * Object that processes the following tags:
101         * i, em, b, strong, s, strike, u, sup, sub
102         */
103        public static final HTMLTagProcessor EM_STRONG_STRIKE_SUP_SUP = new HTMLTagProcessor() {
104                /**
105                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
106                 */
107                public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) {
108                        tag = mapTag(tag);
109                        attrs.put(tag, null);
110                        worker.updateChain(tag, attrs);
111                }
112                /**
113                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
114                 */
115                public void endElement(HTMLWorker worker, String tag) {
116                        tag = mapTag(tag);
117                        worker.updateChain(tag);
118                }
119                /**
120                 * Maps em to i, strong to b, and strike to s.
121                 * This is a convention: the style parser expects i, b and s.
122                 * @param tag the original tag
123                 * @return the mapped tag
124                 */
125                private String mapTag(String tag) {
126                        if (HtmlTags.EM.equalsIgnoreCase(tag))
127                                return HtmlTags.I;
128                        if (HtmlTags.STRONG.equalsIgnoreCase(tag))
129                                return HtmlTags.B;
130                        if (HtmlTags.STRIKE.equalsIgnoreCase(tag))
131                                return HtmlTags.S;
132                        return tag;
133                }
134
135        };
136
137        /**
138         * Object that processes the a tag.
139         */
140        public static final HTMLTagProcessor A = new HTMLTagProcessor() {
141                /**
142                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
143                 */
144                public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) {
145                        worker.updateChain(tag, attrs);
146                        worker.flushContent();
147                }
148                /**
149                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
150                 */
151                public void endElement(HTMLWorker worker, String tag) {
152                        worker.processLink();
153                        worker.updateChain(tag);
154                }
155        };
156
157        /**
158         * Object that processes the br tag.
159         */
160        public static final HTMLTagProcessor BR = new HTMLTagProcessor(){
161                /**
162                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map)
163                 */
164                public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) {
165                        worker.newLine();
166                }
167                /**
168                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
169                 */
170                public void endElement(HTMLWorker worker, String tag) {
171                }
172
173        };
174
175        public static final HTMLTagProcessor UL_OL = new HTMLTagProcessor(){
176
177                /**
178                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map)
179                 */
180                public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException {
181                        worker.carriageReturn();
182                        if (worker.isPendingLI())
183                                worker.endElement(HtmlTags.LI);
184                        worker.setSkipText(true);
185                        worker.updateChain(tag, attrs);;
186                        worker.pushToStack(worker.createList(tag));
187                }
188
189                /**
190                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
191                 */
192                public void endElement(HTMLWorker worker, String tag) throws DocumentException {
193                        worker.carriageReturn();
194                        if (worker.isPendingLI())
195                                worker.endElement(HtmlTags.LI);
196                        worker.setSkipText(false);
197                        worker.updateChain(tag);
198                        worker.processList();
199                }
200
201        };
202
203        public static final HTMLTagProcessor HR = new HTMLTagProcessor(){
204
205                public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException {
206                        worker.carriageReturn();
207                        worker.pushToStack(worker.createLineSeparator(attrs));
208                }
209
210                public void endElement(HTMLWorker worker, String tag) {
211                }
212
213        };
214
215        public static final HTMLTagProcessor SPAN = new HTMLTagProcessor(){
216
217                /**
218                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map)
219                 */
220                public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) {
221                        worker.updateChain(tag, attrs);
222                }
223
224                /**
225                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
226                 */
227                public void endElement(HTMLWorker worker, String tag) {
228                        worker.updateChain(tag);
229                }
230
231        };
232
233        public static final HTMLTagProcessor H = new HTMLTagProcessor(){
234
235                /**
236                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map)
237                 */
238                public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException {
239                        worker.carriageReturn();
240                        if (!attrs.containsKey(HtmlTags.SIZE)) {
241                                int v = 7 - Integer.parseInt(tag.substring(1));
242                                attrs.put(HtmlTags.SIZE, Integer.toString(v));
243                        }
244                        worker.updateChain(tag, attrs);
245                }
246
247                /**
248                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
249                 */
250                public void endElement(HTMLWorker worker, String tag) throws DocumentException {
251                        worker.carriageReturn();
252                        worker.updateChain(tag);
253                }
254
255        };
256
257        public static final HTMLTagProcessor LI = new HTMLTagProcessor(){
258
259                /**
260                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map)
261                 */
262                public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException {
263                        worker.carriageReturn();
264                        if (worker.isPendingLI())
265                                worker.endElement(tag);
266                        worker.setSkipText(false);
267                        worker.setPendingLI(true);
268                        worker.updateChain(tag, attrs);
269                        worker.pushToStack(worker.createListItem());
270                }
271
272                /**
273                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
274                 */
275                public void endElement(HTMLWorker worker, String tag) throws DocumentException {
276                        worker.carriageReturn();
277                        worker.setPendingLI(false);
278                        worker.setSkipText(true);
279                        worker.updateChain(tag);
280                        worker.processListItem();
281                }
282
283        };
284
285        public static final HTMLTagProcessor PRE = new HTMLTagProcessor(){
286
287                /**
288                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map)
289                 */
290                public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException {
291                        worker.carriageReturn();
292                        if (!attrs.containsKey(HtmlTags.FACE)) {
293                                attrs.put(HtmlTags.FACE, "Courier");
294                        }
295                        worker.updateChain(tag, attrs);
296                        worker.setInsidePRE(true);
297                }
298
299                /**
300                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
301                 */
302                public void endElement(HTMLWorker worker, String tag) throws DocumentException {
303                        worker.carriageReturn();
304                        worker.updateChain(tag);
305                        worker.setInsidePRE(false);
306                }
307
308        };
309
310        public static final HTMLTagProcessor DIV = new HTMLTagProcessor(){
311
312                /**
313                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map)
314                 */
315                public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException {
316                        worker.carriageReturn();
317                        worker.updateChain(tag, attrs);
318                }
319
320                /**
321                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
322                 */
323                public void endElement(HTMLWorker worker, String tag) throws DocumentException {
324                        worker.carriageReturn();
325                        worker.updateChain(tag);
326                }
327
328        };
329
330
331        public static final HTMLTagProcessor TABLE = new HTMLTagProcessor(){
332
333                /**
334                 * @throws DocumentException
335                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map)
336                 */
337                public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException {
338                        worker.carriageReturn();
339                        TableWrapper table = new TableWrapper(attrs);
340                        worker.pushToStack(table);
341                        worker.pushTableState();
342                        worker.setPendingTD(false);
343                        worker.setPendingTR(false);
344                        worker.setSkipText(true);
345                        // Table alignment should not affect children elements, thus remove
346                        attrs.remove(HtmlTags.ALIGN);
347            // In case this is a nested table reset colspan and rowspan
348                        attrs.put(HtmlTags.COLSPAN, "1");
349                        attrs.put(HtmlTags.ROWSPAN, "1");
350                        worker.updateChain(tag, attrs);
351                }
352
353                /**
354                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
355                 */
356                public void endElement(HTMLWorker worker, String tag) throws DocumentException {
357                        worker.carriageReturn();
358                        if (worker.isPendingTR())
359                                worker.endElement(HtmlTags.TR);
360                        worker.updateChain(tag);
361                        worker.processTable();
362                        worker.popTableState();
363                        worker.setSkipText(false);
364                }
365
366        };
367        public static final HTMLTagProcessor TR = new HTMLTagProcessor(){
368
369                /**
370                 * @throws DocumentException
371                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map)
372                 */
373                public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException {
374                        worker.carriageReturn();
375                        if (worker.isPendingTR())
376                                worker.endElement(tag);
377                        worker.setSkipText(true);
378                        worker.setPendingTR(true);
379                        worker.updateChain(tag, attrs);
380                }
381
382                /**
383                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
384                 */
385                public void endElement(HTMLWorker worker, String tag) throws DocumentException {
386                        worker.carriageReturn();
387                        if (worker.isPendingTD())
388                                worker.endElement(HtmlTags.TD);
389                        worker.setPendingTR(false);
390                        worker.updateChain(tag);
391                        worker.processRow();
392                        worker.setSkipText(true);
393                }
394
395        };
396        public static final HTMLTagProcessor TD = new HTMLTagProcessor(){
397
398                /**
399                 * @throws DocumentException
400                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map)
401                 */
402                public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException {
403                        worker.carriageReturn();
404                        if (worker.isPendingTD())
405                                worker.endElement(tag);
406                        worker.setSkipText(false);
407                        worker.setPendingTD(true);
408                        worker.updateChain(HtmlTags.TD, attrs);
409                        worker.pushToStack(worker.createCell(tag));
410                }
411
412                /**
413                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
414                 */
415                public void endElement(HTMLWorker worker, String tag) throws DocumentException {
416                        worker.carriageReturn();
417                        worker.setPendingTD(false);
418                        worker.updateChain(HtmlTags.TD);
419                        worker.setSkipText(true);
420                }
421
422        };
423
424        public static final HTMLTagProcessor IMG = new HTMLTagProcessor(){
425
426                /**
427                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#startElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String, java.util.Map)
428                 */
429                public void startElement(HTMLWorker worker, String tag, Map<String, String> attrs) throws DocumentException, IOException {
430                        worker.updateChain(tag, attrs);
431                        worker.processImage(worker.createImage(attrs), attrs);
432                        worker.updateChain(tag);
433                }
434
435                /**
436                 * @see com.itextpdf.text.html.simpleparser.HTMLTagProcessors#endElement(com.itextpdf.text.html.simpleparser.HTMLWorker, java.lang.String)
437                 */
438                public void endElement(HTMLWorker worker, String tag) {
439                }
440
441        };
442
443        /** Serial version UID. */
444        private static final long serialVersionUID = -959260811961222824L;
445}