001/*
002 * $Id: HtmlEncoder.java 4784 2011-03-15 08:33:00Z blowagie $
003 *
004 * This file is part of the iText (R) project.
005 * Copyright (c) 1998-2011 1T3XT BVBA
006 * Authors: Bruno Lowagie, Paulo Soares, et al.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU Affero General Public License version 3
010 * as published by the Free Software Foundation with the addition of the
011 * following permission added to Section 15 as permitted in Section 7(a):
012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT,
013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS.
014 *
015 * This program is distributed in the hope that it will be useful, but
016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
017 * or FITNESS FOR A PARTICULAR PURPOSE.
018 * See the GNU Affero General Public License for more details.
019 * You should have received a copy of the GNU Affero General Public License
020 * along with this program; if not, see http://www.gnu.org/licenses or write to
021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
022 * Boston, MA, 02110-1301 USA, or download the license from the following URL:
023 * http://itextpdf.com/terms-of-use/
024 *
025 * The interactive user interfaces in modified source and object code versions
026 * of this program must display Appropriate Legal Notices, as required under
027 * Section 5 of the GNU Affero General Public License.
028 *
029 * In accordance with Section 7(b) of the GNU Affero General Public License,
030 * a covered work must retain the producer line in every PDF that is created
031 * or manipulated using iText.
032 *
033 * You can be released from the requirements of the license by purchasing
034 * a commercial license. Buying such a license is mandatory as soon as you
035 * develop commercial activities involving the iText software without
036 * disclosing the source code of your own applications.
037 * These activities include: offering paid services to customers as an ASP,
038 * serving PDFs on the fly in a web application, shipping iText with a closed
039 * source product.
040 *
041 * For more information, please contact iText Software Corp. at this
042 * address: sales@itextpdf.com
043 */
044package com.itextpdf.text.html;
045
046import java.util.HashSet;
047import java.util.Set;
048
049import com.itextpdf.text.Element;
050import com.itextpdf.text.BaseColor;
051
052/**
053 * This class converts a <CODE>String</CODE> to the HTML-format of a String.
054 * <P>
055 * To convert the <CODE>String</CODE>, each character is examined:
056 * <UL>
057 * <LI>ASCII-characters from 000 till 031 are represented as &amp;#xxx;<BR>
058 *     (with xxx = the value of the character)
059 * <LI>ASCII-characters from 032 t/m 127 are represented by the character itself, except for:
060 *     <UL>
061 *     <LI>'\n' becomes &lt;BR&gt;\n
062 *     <LI>&quot; becomes &amp;quot;
063 *     <LI>&amp; becomes &amp;amp;
064 *     <LI>&lt; becomes &amp;lt;
065 *     <LI>&gt; becomes &amp;gt;
066 *     </UL>
067 * <LI>ASCII-characters from 128 till 255 are represented as &amp;#xxx;<BR>
068 *     (with xxx = the value of the character)
069 * </UL>
070 * <P>
071 * Example:
072 * <P><BLOCKQUOTE><PRE>
073 *    String htmlPresentation = HtmlEncoder.encode("Marie-Th&#233;r&#232;se S&#248;rensen");
074 * </PRE></BLOCKQUOTE><P>
075 * for more info: see O'Reilly; "HTML: The Definitive Guide" (page 164)
076 */
077
078public final class HtmlEncoder {
079    
080        /**
081         * This class will never be constructed.
082         */
083        private HtmlEncoder() {
084        }
085        
086    // membervariables
087    /**
088     * List with the HTML translation of all the characters.
089     * @since 5.0.6 (renamed from htmlCode)
090     */
091    private static final String[] HTML_CODE = new String[256];
092    
093    static {
094        for (int i = 0; i < 10; i++) {
095            HTML_CODE[i] = "&#00" + i + ";";
096        }
097        
098        for (int i = 10; i < 32; i++) {
099            HTML_CODE[i] = "&#0" + i + ";";
100        }
101        
102        for (int i = 32; i < 128; i++) {
103            HTML_CODE[i] = String.valueOf((char)i);
104        }
105        
106        // Special characters
107        HTML_CODE['\t'] = "\t";
108        HTML_CODE['\n'] = "<br />\n";
109        HTML_CODE['\"'] = "&quot;"; // double quote
110        HTML_CODE['&'] = "&amp;"; // ampersand
111        HTML_CODE['<'] = "&lt;"; // lower than
112        HTML_CODE['>'] = "&gt;"; // greater than
113        
114        for (int i = 128; i < 256; i++) {
115            HTML_CODE[i] = "&#" + i + ";";
116        }
117    }
118    
119    // methods
120    
121    /**
122     * Converts a <CODE>String</CODE> to the HTML-format of this <CODE>String</CODE>.
123     *
124     * @param   string  The <CODE>String</CODE> to convert
125     * @return  a <CODE>String</CODE>
126     */
127    public static String encode(String string) {
128        int n = string.length();
129        char character;
130        StringBuffer buffer = new StringBuffer();
131        // loop over all the characters of the String.
132        for (int i = 0; i < n; i++) {
133            character = string.charAt(i);
134            // the Htmlcode of these characters are added to a StringBuffer one by one
135            if (character < 256) {
136                buffer.append(HTML_CODE[character]);
137            }
138            else {
139                // Improvement posted by Joachim Eyrich
140                buffer.append("&#").append((int)character).append(';');
141            }
142        }
143        return buffer.toString();
144    }
145    
146    /**
147     * Converts a <CODE>BaseColor</CODE> into a HTML representation of this <CODE>BaseColor</CODE>.
148     *
149     * @param   color   the <CODE>BaseColor</CODE> that has to be converted.
150     * @return  the HTML representation of this <COLOR>BaseColor</COLOR>
151     */
152    public static String encode(BaseColor color) {
153        StringBuffer buffer = new StringBuffer("#");
154        if (color.getRed() < 16) {
155            buffer.append('0');
156        }
157        buffer.append(Integer.toString(color.getRed(), 16));
158        if (color.getGreen() < 16) {
159            buffer.append('0');
160        }
161        buffer.append(Integer.toString(color.getGreen(), 16));
162        if (color.getBlue() < 16) {
163            buffer.append('0');
164        }
165        buffer.append(Integer.toString(color.getBlue(), 16));
166        return buffer.toString();
167    }
168
169    /**
170     * Translates the alignment value.
171     *
172     * @param   alignment   the alignment value
173     * @return  the translated value
174     */
175    public static String getAlignment(int alignment) {
176        switch(alignment) {
177            case Element.ALIGN_LEFT:
178                return HtmlTags.ALIGN_LEFT;
179            case Element.ALIGN_CENTER:
180                return HtmlTags.ALIGN_CENTER;
181            case Element.ALIGN_RIGHT:
182                return HtmlTags.ALIGN_RIGHT;
183            case Element.ALIGN_JUSTIFIED:
184            case Element.ALIGN_JUSTIFIED_ALL:
185                return HtmlTags.ALIGN_JUSTIFY;
186            case Element.ALIGN_TOP:
187                return HtmlTags.ALIGN_TOP;
188            case Element.ALIGN_MIDDLE:
189                return HtmlTags.ALIGN_MIDDLE;
190            case Element.ALIGN_BOTTOM:
191                return HtmlTags.ALIGN_BOTTOM;
192            case Element.ALIGN_BASELINE:
193                return HtmlTags.ALIGN_BASELINE;
194                default:
195                    return "";
196        }
197    }
198    
199        /**
200         * Set containing tags that trigger a new line.
201         * @since iText 5.0.6
202         */
203        private static final Set<String> NEWLINETAGS = new HashSet<String>();
204        static {
205                // Following list are the basic html tags that force new lines
206                // List may be extended as we discover them
207                NEWLINETAGS.add(HtmlTags.P);
208                NEWLINETAGS.add(HtmlTags.BLOCKQUOTE);
209                NEWLINETAGS.add(HtmlTags.BR);
210        }       
211        
212        /**
213         * Returns true if the tag causes a new line like p, br etc.
214         * @since iText 5.0.6
215         */
216        public static boolean isNewLineTag(String tag) {
217                return NEWLINETAGS.contains(tag);
218        }
219}