001/*
002 * $Id: SimplePatternParser.java 4784 2011-03-15 08:33:00Z blowagie $
003 *
004 * This file is part of the iText (R) project.
005 * Copyright (c) 1998-2011 1T3XT BVBA
006 * Authors: Bruno Lowagie, Paulo Soares, et al.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU Affero General Public License version 3
010 * as published by the Free Software Foundation with the addition of the
011 * following permission added to Section 15 as permitted in Section 7(a):
012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT,
013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS.
014 *
015 * This program is distributed in the hope that it will be useful, but
016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
017 * or FITNESS FOR A PARTICULAR PURPOSE.
018 * See the GNU Affero General Public License for more details.
019 * You should have received a copy of the GNU Affero General Public License
020 * along with this program; if not, see http://www.gnu.org/licenses or write to
021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
022 * Boston, MA, 02110-1301 USA, or download the license from the following URL:
023 * http://itextpdf.com/terms-of-use/
024 *
025 * The interactive user interfaces in modified source and object code versions
026 * of this program must display Appropriate Legal Notices, as required under
027 * Section 5 of the GNU Affero General Public License.
028 *
029 * In accordance with Section 7(b) of the GNU Affero General Public License,
030 * a covered work must retain the producer line in every PDF that is created
031 * or manipulated using iText.
032 *
033 * You can be released from the requirements of the license by purchasing
034 * a commercial license. Buying such a license is mandatory as soon as you
035 * develop commercial activities involving the iText software without
036 * disclosing the source code of your own applications.
037 * These activities include: offering paid services to customers as an ASP,
038 * serving PDFs on the fly in a web application, shipping iText with a closed
039 * source product.
040 *
041 * For more information, please contact iText Software Corp. at this
042 * address: sales@itextpdf.com
043 */
044package com.itextpdf.text.pdf.hyphenation;
045
046import java.io.IOException;
047import java.io.InputStream;
048import java.util.ArrayList;
049import java.util.Map;
050import java.util.StringTokenizer;
051
052import com.itextpdf.text.ExceptionConverter;
053import com.itextpdf.text.xml.simpleparser.SimpleXMLDocHandler;
054import com.itextpdf.text.xml.simpleparser.SimpleXMLParser;
055
056/** Parses the xml hyphenation pattern.
057 *
058 * @author Paulo Soares
059 */
060public class SimplePatternParser implements SimpleXMLDocHandler,
061                PatternConsumer {
062        int currElement;
063
064        PatternConsumer consumer;
065
066        StringBuffer token;
067
068        ArrayList<Object> exception;
069
070        char hyphenChar;
071
072        SimpleXMLParser parser;
073
074        static final int ELEM_CLASSES = 1;
075
076        static final int ELEM_EXCEPTIONS = 2;
077
078        static final int ELEM_PATTERNS = 3;
079
080        static final int ELEM_HYPHEN = 4;
081
082        /** Creates a new instance of PatternParser2 */
083        public SimplePatternParser() {
084                token = new StringBuffer();
085                hyphenChar = '-'; // default
086        }
087
088        public void parse(InputStream stream, PatternConsumer consumer) {
089                this.consumer = consumer;
090                try {
091                        SimpleXMLParser.parse(this, stream);
092                } catch (IOException e) {
093                        throw new ExceptionConverter(e);
094                } finally {
095                        try {
096                                stream.close();
097                        } catch (Exception e) {
098                        }
099                }
100        }
101
102        protected static String getPattern(String word) {
103                StringBuffer pat = new StringBuffer();
104                int len = word.length();
105                for (int i = 0; i < len; i++) {
106                        if (!Character.isDigit(word.charAt(i))) {
107                                pat.append(word.charAt(i));
108                        }
109                }
110                return pat.toString();
111        }
112
113        protected ArrayList<Object> normalizeException(ArrayList<Object> ex) {
114                ArrayList<Object> res = new ArrayList<Object>();
115                for (int i = 0; i < ex.size(); i++) {
116                        Object item = ex.get(i);
117                        if (item instanceof String) {
118                                String str = (String) item;
119                                StringBuffer buf = new StringBuffer();
120                                for (int j = 0; j < str.length(); j++) {
121                                        char c = str.charAt(j);
122                                        if (c != hyphenChar) {
123                                                buf.append(c);
124                                        } else {
125                                                res.add(buf.toString());
126                                                buf.setLength(0);
127                                                char[] h = new char[1];
128                                                h[0] = hyphenChar;
129                                                // we use here hyphenChar which is not necessarily
130                                                // the one to be printed
131                                                res.add(new Hyphen(new String(h), null, null));
132                                        }
133                                }
134                                if (buf.length() > 0) {
135                                        res.add(buf.toString());
136                                }
137                        } else {
138                                res.add(item);
139                        }
140                }
141                return res;
142        }
143
144        protected String getExceptionWord(ArrayList<Object> ex) {
145                StringBuffer res = new StringBuffer();
146                for (int i = 0; i < ex.size(); i++) {
147                        Object item = ex.get(i);
148                        if (item instanceof String) {
149                                res.append((String) item);
150                        } else {
151                                if (((Hyphen) item).noBreak != null) {
152                                        res.append(((Hyphen) item).noBreak);
153                                }
154                        }
155                }
156                return res.toString();
157        }
158
159        protected static String getInterletterValues(String pat) {
160                StringBuffer il = new StringBuffer();
161                String word = pat + "a"; // add dummy letter to serve as sentinel
162                int len = word.length();
163                for (int i = 0; i < len; i++) {
164                        char c = word.charAt(i);
165                        if (Character.isDigit(c)) {
166                                il.append(c);
167                                i++;
168                        } else {
169                                il.append('0');
170                        }
171                }
172                return il.toString();
173        }
174
175        public void endDocument() {
176        }
177
178        @SuppressWarnings("unchecked")
179    public void endElement(String tag) {
180                if (token.length() > 0) {
181                        String word = token.toString();
182                        switch (currElement) {
183                        case ELEM_CLASSES:
184                                consumer.addClass(word);
185                                break;
186                        case ELEM_EXCEPTIONS:
187                                exception.add(word);
188                                exception = normalizeException(exception);
189                                consumer.addException(getExceptionWord(exception),
190                                                (ArrayList<Object>) exception.clone());
191                                break;
192                        case ELEM_PATTERNS:
193                                consumer.addPattern(getPattern(word),
194                                                getInterletterValues(word));
195                                break;
196                        case ELEM_HYPHEN:
197                                // nothing to do
198                                break;
199                        }
200                        if (currElement != ELEM_HYPHEN) {
201                                token.setLength(0);
202                        }
203                }
204                if (currElement == ELEM_HYPHEN) {
205                        currElement = ELEM_EXCEPTIONS;
206                } else {
207                        currElement = 0;
208                }
209        }
210
211        public void startDocument() {
212        }
213
214        public void startElement(String tag, Map<String, String> h) {
215                if (tag.equals("hyphen-char")) {
216                        String hh = h.get("value");
217                        if (hh != null && hh.length() == 1) {
218                                hyphenChar = hh.charAt(0);
219                        }
220                } else if (tag.equals("classes")) {
221                        currElement = ELEM_CLASSES;
222                } else if (tag.equals("patterns")) {
223                        currElement = ELEM_PATTERNS;
224                } else if (tag.equals("exceptions")) {
225                        currElement = ELEM_EXCEPTIONS;
226                        exception = new ArrayList<Object>();
227                } else if (tag.equals("hyphen")) {
228                        if (token.length() > 0) {
229                                exception.add(token.toString());
230                        }
231                        exception.add(new Hyphen(h.get("pre"), h
232                                        .get("no"), h.get("post")));
233                        currElement = ELEM_HYPHEN;
234                }
235                token.setLength(0);
236        }
237
238        @SuppressWarnings("unchecked")
239    public void text(String str) {
240                StringTokenizer tk = new StringTokenizer(str);
241                while (tk.hasMoreTokens()) {
242                        String word = tk.nextToken();
243                        // System.out.println("\"" + word + "\"");
244                        switch (currElement) {
245                        case ELEM_CLASSES:
246                                consumer.addClass(word);
247                                break;
248                        case ELEM_EXCEPTIONS:
249                                exception.add(word);
250                                exception = normalizeException(exception);
251                                consumer.addException(getExceptionWord(exception),
252                                                (ArrayList<Object>) exception.clone());
253                                exception.clear();
254                                break;
255                        case ELEM_PATTERNS:
256                                consumer.addPattern(getPattern(word),
257                                                getInterletterValues(word));
258                                break;
259                        }
260                }
261        }
262
263        // PatternConsumer implementation for testing purposes
264        public void addClass(String c) {
265                System.out.println("class: " + c);
266        }
267
268        public void addException(String w, ArrayList<Object> e) {
269                System.out.println("exception: " + w + " : " + e.toString());
270        }
271
272        public void addPattern(String p, String v) {
273                System.out.println("pattern: " + p + " : " + v);
274        }
275        /*
276        public static void main(String[] args) throws Exception {
277                try {
278                        if (args.length > 0) {
279                                SimplePatternParser pp = new SimplePatternParser();
280                                pp.parse(new FileInputStream(args[0]), pp);
281                        }
282                } catch (Exception e) {
283                        e.printStackTrace();
284                }
285        }
286        */
287}