001/* 002 * $Id: SimplePatternParser.java 4784 2011-03-15 08:33:00Z blowagie $ 003 * 004 * This file is part of the iText (R) project. 005 * Copyright (c) 1998-2011 1T3XT BVBA 006 * Authors: Bruno Lowagie, Paulo Soares, et al. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Affero General Public License version 3 010 * as published by the Free Software Foundation with the addition of the 011 * following permission added to Section 15 as permitted in Section 7(a): 012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT, 013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS. 014 * 015 * This program is distributed in the hope that it will be useful, but 016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 017 * or FITNESS FOR A PARTICULAR PURPOSE. 018 * See the GNU Affero General Public License for more details. 019 * You should have received a copy of the GNU Affero General Public License 020 * along with this program; if not, see http://www.gnu.org/licenses or write to 021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 022 * Boston, MA, 02110-1301 USA, or download the license from the following URL: 023 * http://itextpdf.com/terms-of-use/ 024 * 025 * The interactive user interfaces in modified source and object code versions 026 * of this program must display Appropriate Legal Notices, as required under 027 * Section 5 of the GNU Affero General Public License. 028 * 029 * In accordance with Section 7(b) of the GNU Affero General Public License, 030 * a covered work must retain the producer line in every PDF that is created 031 * or manipulated using iText. 032 * 033 * You can be released from the requirements of the license by purchasing 034 * a commercial license. Buying such a license is mandatory as soon as you 035 * develop commercial activities involving the iText software without 036 * disclosing the source code of your own applications. 037 * These activities include: offering paid services to customers as an ASP, 038 * serving PDFs on the fly in a web application, shipping iText with a closed 039 * source product. 040 * 041 * For more information, please contact iText Software Corp. at this 042 * address: sales@itextpdf.com 043 */ 044package com.itextpdf.text.pdf.hyphenation; 045 046import java.io.IOException; 047import java.io.InputStream; 048import java.util.ArrayList; 049import java.util.Map; 050import java.util.StringTokenizer; 051 052import com.itextpdf.text.ExceptionConverter; 053import com.itextpdf.text.xml.simpleparser.SimpleXMLDocHandler; 054import com.itextpdf.text.xml.simpleparser.SimpleXMLParser; 055 056/** Parses the xml hyphenation pattern. 057 * 058 * @author Paulo Soares 059 */ 060public class SimplePatternParser implements SimpleXMLDocHandler, 061 PatternConsumer { 062 int currElement; 063 064 PatternConsumer consumer; 065 066 StringBuffer token; 067 068 ArrayList<Object> exception; 069 070 char hyphenChar; 071 072 SimpleXMLParser parser; 073 074 static final int ELEM_CLASSES = 1; 075 076 static final int ELEM_EXCEPTIONS = 2; 077 078 static final int ELEM_PATTERNS = 3; 079 080 static final int ELEM_HYPHEN = 4; 081 082 /** Creates a new instance of PatternParser2 */ 083 public SimplePatternParser() { 084 token = new StringBuffer(); 085 hyphenChar = '-'; // default 086 } 087 088 public void parse(InputStream stream, PatternConsumer consumer) { 089 this.consumer = consumer; 090 try { 091 SimpleXMLParser.parse(this, stream); 092 } catch (IOException e) { 093 throw new ExceptionConverter(e); 094 } finally { 095 try { 096 stream.close(); 097 } catch (Exception e) { 098 } 099 } 100 } 101 102 protected static String getPattern(String word) { 103 StringBuffer pat = new StringBuffer(); 104 int len = word.length(); 105 for (int i = 0; i < len; i++) { 106 if (!Character.isDigit(word.charAt(i))) { 107 pat.append(word.charAt(i)); 108 } 109 } 110 return pat.toString(); 111 } 112 113 protected ArrayList<Object> normalizeException(ArrayList<Object> ex) { 114 ArrayList<Object> res = new ArrayList<Object>(); 115 for (int i = 0; i < ex.size(); i++) { 116 Object item = ex.get(i); 117 if (item instanceof String) { 118 String str = (String) item; 119 StringBuffer buf = new StringBuffer(); 120 for (int j = 0; j < str.length(); j++) { 121 char c = str.charAt(j); 122 if (c != hyphenChar) { 123 buf.append(c); 124 } else { 125 res.add(buf.toString()); 126 buf.setLength(0); 127 char[] h = new char[1]; 128 h[0] = hyphenChar; 129 // we use here hyphenChar which is not necessarily 130 // the one to be printed 131 res.add(new Hyphen(new String(h), null, null)); 132 } 133 } 134 if (buf.length() > 0) { 135 res.add(buf.toString()); 136 } 137 } else { 138 res.add(item); 139 } 140 } 141 return res; 142 } 143 144 protected String getExceptionWord(ArrayList<Object> ex) { 145 StringBuffer res = new StringBuffer(); 146 for (int i = 0; i < ex.size(); i++) { 147 Object item = ex.get(i); 148 if (item instanceof String) { 149 res.append((String) item); 150 } else { 151 if (((Hyphen) item).noBreak != null) { 152 res.append(((Hyphen) item).noBreak); 153 } 154 } 155 } 156 return res.toString(); 157 } 158 159 protected static String getInterletterValues(String pat) { 160 StringBuffer il = new StringBuffer(); 161 String word = pat + "a"; // add dummy letter to serve as sentinel 162 int len = word.length(); 163 for (int i = 0; i < len; i++) { 164 char c = word.charAt(i); 165 if (Character.isDigit(c)) { 166 il.append(c); 167 i++; 168 } else { 169 il.append('0'); 170 } 171 } 172 return il.toString(); 173 } 174 175 public void endDocument() { 176 } 177 178 @SuppressWarnings("unchecked") 179 public void endElement(String tag) { 180 if (token.length() > 0) { 181 String word = token.toString(); 182 switch (currElement) { 183 case ELEM_CLASSES: 184 consumer.addClass(word); 185 break; 186 case ELEM_EXCEPTIONS: 187 exception.add(word); 188 exception = normalizeException(exception); 189 consumer.addException(getExceptionWord(exception), 190 (ArrayList<Object>) exception.clone()); 191 break; 192 case ELEM_PATTERNS: 193 consumer.addPattern(getPattern(word), 194 getInterletterValues(word)); 195 break; 196 case ELEM_HYPHEN: 197 // nothing to do 198 break; 199 } 200 if (currElement != ELEM_HYPHEN) { 201 token.setLength(0); 202 } 203 } 204 if (currElement == ELEM_HYPHEN) { 205 currElement = ELEM_EXCEPTIONS; 206 } else { 207 currElement = 0; 208 } 209 } 210 211 public void startDocument() { 212 } 213 214 public void startElement(String tag, Map<String, String> h) { 215 if (tag.equals("hyphen-char")) { 216 String hh = h.get("value"); 217 if (hh != null && hh.length() == 1) { 218 hyphenChar = hh.charAt(0); 219 } 220 } else if (tag.equals("classes")) { 221 currElement = ELEM_CLASSES; 222 } else if (tag.equals("patterns")) { 223 currElement = ELEM_PATTERNS; 224 } else if (tag.equals("exceptions")) { 225 currElement = ELEM_EXCEPTIONS; 226 exception = new ArrayList<Object>(); 227 } else if (tag.equals("hyphen")) { 228 if (token.length() > 0) { 229 exception.add(token.toString()); 230 } 231 exception.add(new Hyphen(h.get("pre"), h 232 .get("no"), h.get("post"))); 233 currElement = ELEM_HYPHEN; 234 } 235 token.setLength(0); 236 } 237 238 @SuppressWarnings("unchecked") 239 public void text(String str) { 240 StringTokenizer tk = new StringTokenizer(str); 241 while (tk.hasMoreTokens()) { 242 String word = tk.nextToken(); 243 // System.out.println("\"" + word + "\""); 244 switch (currElement) { 245 case ELEM_CLASSES: 246 consumer.addClass(word); 247 break; 248 case ELEM_EXCEPTIONS: 249 exception.add(word); 250 exception = normalizeException(exception); 251 consumer.addException(getExceptionWord(exception), 252 (ArrayList<Object>) exception.clone()); 253 exception.clear(); 254 break; 255 case ELEM_PATTERNS: 256 consumer.addPattern(getPattern(word), 257 getInterletterValues(word)); 258 break; 259 } 260 } 261 } 262 263 // PatternConsumer implementation for testing purposes 264 public void addClass(String c) { 265 System.out.println("class: " + c); 266 } 267 268 public void addException(String w, ArrayList<Object> e) { 269 System.out.println("exception: " + w + " : " + e.toString()); 270 } 271 272 public void addPattern(String p, String v) { 273 System.out.println("pattern: " + p + " : " + v); 274 } 275 /* 276 public static void main(String[] args) throws Exception { 277 try { 278 if (args.length > 0) { 279 SimplePatternParser pp = new SimplePatternParser(); 280 pp.parse(new FileInputStream(args[0]), pp); 281 } 282 } catch (Exception e) { 283 e.printStackTrace(); 284 } 285 } 286 */ 287}