001/* 002 * $Id: HtmlEncoder.java 4784 2011-03-15 08:33:00Z blowagie $ 003 * 004 * This file is part of the iText (R) project. 005 * Copyright (c) 1998-2011 1T3XT BVBA 006 * Authors: Bruno Lowagie, Paulo Soares, et al. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Affero General Public License version 3 010 * as published by the Free Software Foundation with the addition of the 011 * following permission added to Section 15 as permitted in Section 7(a): 012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT, 013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS. 014 * 015 * This program is distributed in the hope that it will be useful, but 016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 017 * or FITNESS FOR A PARTICULAR PURPOSE. 018 * See the GNU Affero General Public License for more details. 019 * You should have received a copy of the GNU Affero General Public License 020 * along with this program; if not, see http://www.gnu.org/licenses or write to 021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 022 * Boston, MA, 02110-1301 USA, or download the license from the following URL: 023 * http://itextpdf.com/terms-of-use/ 024 * 025 * The interactive user interfaces in modified source and object code versions 026 * of this program must display Appropriate Legal Notices, as required under 027 * Section 5 of the GNU Affero General Public License. 028 * 029 * In accordance with Section 7(b) of the GNU Affero General Public License, 030 * a covered work must retain the producer line in every PDF that is created 031 * or manipulated using iText. 032 * 033 * You can be released from the requirements of the license by purchasing 034 * a commercial license. Buying such a license is mandatory as soon as you 035 * develop commercial activities involving the iText software without 036 * disclosing the source code of your own applications. 037 * These activities include: offering paid services to customers as an ASP, 038 * serving PDFs on the fly in a web application, shipping iText with a closed 039 * source product. 040 * 041 * For more information, please contact iText Software Corp. at this 042 * address: sales@itextpdf.com 043 */ 044package com.itextpdf.text.html; 045 046import java.util.HashSet; 047import java.util.Set; 048 049import com.itextpdf.text.Element; 050import com.itextpdf.text.BaseColor; 051 052/** 053 * This class converts a <CODE>String</CODE> to the HTML-format of a String. 054 * <P> 055 * To convert the <CODE>String</CODE>, each character is examined: 056 * <UL> 057 * <LI>ASCII-characters from 000 till 031 are represented as &#xxx;<BR> 058 * (with xxx = the value of the character) 059 * <LI>ASCII-characters from 032 t/m 127 are represented by the character itself, except for: 060 * <UL> 061 * <LI>'\n' becomes <BR>\n 062 * <LI>" becomes &quot; 063 * <LI>& becomes &amp; 064 * <LI>< becomes &lt; 065 * <LI>> becomes &gt; 066 * </UL> 067 * <LI>ASCII-characters from 128 till 255 are represented as &#xxx;<BR> 068 * (with xxx = the value of the character) 069 * </UL> 070 * <P> 071 * Example: 072 * <P><BLOCKQUOTE><PRE> 073 * String htmlPresentation = HtmlEncoder.encode("Marie-Thérèse Sørensen"); 074 * </PRE></BLOCKQUOTE><P> 075 * for more info: see O'Reilly; "HTML: The Definitive Guide" (page 164) 076 */ 077 078public final class HtmlEncoder { 079 080 /** 081 * This class will never be constructed. 082 */ 083 private HtmlEncoder() { 084 } 085 086 // membervariables 087 /** 088 * List with the HTML translation of all the characters. 089 * @since 5.0.6 (renamed from htmlCode) 090 */ 091 private static final String[] HTML_CODE = new String[256]; 092 093 static { 094 for (int i = 0; i < 10; i++) { 095 HTML_CODE[i] = "�" + i + ";"; 096 } 097 098 for (int i = 10; i < 32; i++) { 099 HTML_CODE[i] = "�" + i + ";"; 100 } 101 102 for (int i = 32; i < 128; i++) { 103 HTML_CODE[i] = String.valueOf((char)i); 104 } 105 106 // Special characters 107 HTML_CODE['\t'] = "\t"; 108 HTML_CODE['\n'] = "<br />\n"; 109 HTML_CODE['\"'] = """; // double quote 110 HTML_CODE['&'] = "&"; // ampersand 111 HTML_CODE['<'] = "<"; // lower than 112 HTML_CODE['>'] = ">"; // greater than 113 114 for (int i = 128; i < 256; i++) { 115 HTML_CODE[i] = "&#" + i + ";"; 116 } 117 } 118 119 // methods 120 121 /** 122 * Converts a <CODE>String</CODE> to the HTML-format of this <CODE>String</CODE>. 123 * 124 * @param string The <CODE>String</CODE> to convert 125 * @return a <CODE>String</CODE> 126 */ 127 public static String encode(String string) { 128 int n = string.length(); 129 char character; 130 StringBuffer buffer = new StringBuffer(); 131 // loop over all the characters of the String. 132 for (int i = 0; i < n; i++) { 133 character = string.charAt(i); 134 // the Htmlcode of these characters are added to a StringBuffer one by one 135 if (character < 256) { 136 buffer.append(HTML_CODE[character]); 137 } 138 else { 139 // Improvement posted by Joachim Eyrich 140 buffer.append("&#").append((int)character).append(';'); 141 } 142 } 143 return buffer.toString(); 144 } 145 146 /** 147 * Converts a <CODE>BaseColor</CODE> into a HTML representation of this <CODE>BaseColor</CODE>. 148 * 149 * @param color the <CODE>BaseColor</CODE> that has to be converted. 150 * @return the HTML representation of this <COLOR>BaseColor</COLOR> 151 */ 152 public static String encode(BaseColor color) { 153 StringBuffer buffer = new StringBuffer("#"); 154 if (color.getRed() < 16) { 155 buffer.append('0'); 156 } 157 buffer.append(Integer.toString(color.getRed(), 16)); 158 if (color.getGreen() < 16) { 159 buffer.append('0'); 160 } 161 buffer.append(Integer.toString(color.getGreen(), 16)); 162 if (color.getBlue() < 16) { 163 buffer.append('0'); 164 } 165 buffer.append(Integer.toString(color.getBlue(), 16)); 166 return buffer.toString(); 167 } 168 169 /** 170 * Translates the alignment value. 171 * 172 * @param alignment the alignment value 173 * @return the translated value 174 */ 175 public static String getAlignment(int alignment) { 176 switch(alignment) { 177 case Element.ALIGN_LEFT: 178 return HtmlTags.ALIGN_LEFT; 179 case Element.ALIGN_CENTER: 180 return HtmlTags.ALIGN_CENTER; 181 case Element.ALIGN_RIGHT: 182 return HtmlTags.ALIGN_RIGHT; 183 case Element.ALIGN_JUSTIFIED: 184 case Element.ALIGN_JUSTIFIED_ALL: 185 return HtmlTags.ALIGN_JUSTIFY; 186 case Element.ALIGN_TOP: 187 return HtmlTags.ALIGN_TOP; 188 case Element.ALIGN_MIDDLE: 189 return HtmlTags.ALIGN_MIDDLE; 190 case Element.ALIGN_BOTTOM: 191 return HtmlTags.ALIGN_BOTTOM; 192 case Element.ALIGN_BASELINE: 193 return HtmlTags.ALIGN_BASELINE; 194 default: 195 return ""; 196 } 197 } 198 199 /** 200 * Set containing tags that trigger a new line. 201 * @since iText 5.0.6 202 */ 203 private static final Set<String> NEWLINETAGS = new HashSet<String>(); 204 static { 205 // Following list are the basic html tags that force new lines 206 // List may be extended as we discover them 207 NEWLINETAGS.add(HtmlTags.P); 208 NEWLINETAGS.add(HtmlTags.BLOCKQUOTE); 209 NEWLINETAGS.add(HtmlTags.BR); 210 } 211 212 /** 213 * Returns true if the tag causes a new line like p, br etc. 214 * @since iText 5.0.6 215 */ 216 public static boolean isNewLineTag(String tag) { 217 return NEWLINETAGS.contains(tag); 218 } 219}