001/**
002 * Portions Copyright 2001 Sun Microsystems, Inc.
003 * Portions Copyright 1999-2001 Language Technologies Institute, 
004 * Carnegie Mellon University.
005 * All Rights Reserved.  Use is subject to license terms.
006 * 
007 * See the file "license.terms" for information on usage and
008 * redistribution of this file, and for a DISCLAIMER OF ALL 
009 * WARRANTIES.
010 */
011package com.sun.speech.freetts.lexicon;
012
013import java.io.BufferedOutputStream;
014import java.io.BufferedReader;
015import java.io.DataInputStream;
016import java.io.DataOutputStream;
017import java.io.FileOutputStream;
018import java.io.IOException;
019import java.io.InputStream;
020import java.io.InputStreamReader;
021import java.net.URL;
022import java.util.ArrayList;
023import java.util.HashMap;
024import java.util.HashSet;
025import java.util.Iterator;
026import java.util.List;
027import java.util.Set;
028import java.util.StringTokenizer;
029
030import com.sun.speech.freetts.util.BulkTimer;
031import com.sun.speech.freetts.util.Utilities;
032
033/**
034 * Provides the phone list for words using the CMU6 letter-to-sound
035 * (LTS) rules, which are based on the Black, Lenzo, and Pagel paper,
036 * "Issues in Building General Letter-to-Sound Rules."  Proceedings
037 * of ECSA Workshop on Speech Synthesis, pages 77-80, Australia, 1998.
038 *
039 * <p>The LTS rules are a simple state machine, with one entry point
040 * for each letter of the alphabet (lower case letters are always
041 * assumed, and the rules keep an array with one entry per letter that
042 * point into the state machine).
043 *
044 * <p>The state machine consists of a huge array, with most entries
045 * containing a decision and the indices of two other entries. The
046 * first of these two indices represents where to go if the decision
047 * is true, and the second represents where to go if the decision is
048 * false. All entries that do not contain a decision are final
049 * entries, and these contain a phone. 
050 *
051 * <p>The decision in this case is a simple character comparison,
052 * but it is done in the context of a window around the character in
053 * the word. The decision consists of a index into the context window
054 * and a character value. If the character in the context window
055 * matches the character value, then the decision is true. 
056 *
057 * <p>The machine traversal for each letter starts at that letter's
058 * entry in the state machine and ends only when it reaches a final
059 * state. If there is no phone that can be mapped, the phone in the
060 * final state is set to 'epsilon.' 
061 *
062 * <p>The context window for a character is generated in the following
063 * way: 
064 *
065 * <ul>
066 * <li>Pad the original word on either side with '#' and '0'
067 * characters the size of the window for the LTS rules (in this case,
068 * the window size is 4). The "#" is used to indicate the beginning
069 * and end of the word. So, the word "monkey" would turn into
070 * "000#monkey#000".
071 * <li>For each character in the word, the context window consists of
072 * the characters in the padded form the preceed and follow the word.
073 * The number of characters on each side is dependent upon the window
074 * size. So, for this implementation, the context window for the 'k'
075 * in monkey is "#money#0". 
076 * </ul>
077 *  
078 * <p>Here's how the phone for 'k' in 'monkey' might be determined: 
079 *
080 * <ul>
081 * <li>Create the context window "#money#0". 
082 * <li>Start at the state machine entry for 'k' in the state machine. 
083 * <li>Grab the 'index' from the current state. This represents an
084 * index into the context window.
085 * <li>Compare the value of the character at the index in the context
086 * window to the character from the current state. If there is a
087 * match, the next state is the qtrue value. If there isn't a match,
088 * the next state is the qfalse state. 
089 * <li>Keep on working through the machine until you read a final
090 * state.
091 * <li>When you get to the final state, the phone is the character in
092 * that state. 
093 * </ul>
094 *
095 * <p>This implementation will either read from a straight ASCII file
096 * or a binary file.  When reading from an ASCII file, you can specify
097 * when the input line is tokenized:  load, lookup, or never.  If you
098 * specify 'load', the entire file will be parsed when it is loaded.
099 * If you specify 'lookup', the file will be loaded, but the parsing
100 * for each line will be delayed until it is referenced and the parsed
101 * form will be saved away.  If you specify 'never', the lines will
102 * parsed each time they are referenced.  The default is 'load'.  To
103 * specify the load type, set the system property as follows:
104 *
105 * <pre>
106 *   -Dcom.sun.speech.freetts.lexicon.LTSTokenize=load
107 * </pre>
108 *
109 * <p>[[[TODO:  This implementation uses ASCII 'a'-'z', which is not
110 * internationalized.]]]
111 */
112public class LetterToSoundImpl implements LetterToSound {
113    /**
114     * Entry in file represents the total number of states in the
115     * file.  This should be at the top of the file.  The format
116     * should be "TOTAL n" where n is an integer value.
117     */
118    final static String TOTAL = "TOTAL";
119
120    /**
121     * Entry in file represents the beginning of a new letter index.
122     * This should appear before the list of a new set of states for
123     * a particular letter.  The format should be "INDEX n c" where
124     * n is the index into the state machine array and c is the
125     * character.
126     */
127    final static String INDEX = "INDEX";
128
129    /**
130     * Entry in file represents a state.  The format should be
131     * "STATE i c t f" where 'i' represents an index to look at in the
132     * decision string, c is the character that should match, t is the
133     * index of the state to go to if there is a match, and f is the
134     * of the state to go to if there isn't a match.
135     */
136    final static String STATE = "STATE";
137
138    /**
139     * Entry in file represents a final state.  The format should be
140     * "PHONE p" where p represents a phone string that comes from the
141     * phone table.
142     */
143    final static String PHONE = "PHONE";
144
145    /**
146     * If true, the state string is tokenized when it is first read.
147     * The side effects of this are quicker lookups, but more memory
148     * usage and a longer startup time. 
149     */
150    protected boolean tokenizeOnLoad = false;
151       
152    /**
153     * If true, the state string is tokenized the first time it is
154     * referenced.  The side effects of this are quicker lookups, but
155     * more memory usage.
156     */
157    protected boolean tokenizeOnLookup = false;
158
159    /**
160     * Magic number for binary LTS files.
161     */
162    private final static int MAGIC = 0xdeadbeef;
163
164    /**
165     * Current binary file version.
166     */
167    private final static int VERSION = 1;
168
169    /**
170     * The LTS state machine. Entries can be String or State.  An
171     * ArrayList could be used here -- I chose not to because I
172     * thought it might be quicker to avoid dealing with the dynamic
173     * resizing.
174     */
175    private Object[] stateMachine = null;
176
177    /**
178     * The number of states in the state machine.
179     */
180    private int numStates = 0;
181
182    /**
183     * The 'window size' of the LTS rules.
184     */
185    private final static int WINDOW_SIZE = 4;
186        
187    /**
188     * An array of characters to hold a string for checking against a
189     * rule.  This will be reused over and over again, so the goal
190     * was just to have a single area instead of new'ing up a new one
191     * for every word.  The name choice is to match that in Flite's
192     * <code>cst_lts.c</code>.
193     */
194    private char[] fval_buff = new char[WINDOW_SIZE * 2];
195    
196    /**
197     * The indexes of the starting points for letters in the state machine.
198     */
199    protected HashMap letterIndex;
200
201    /**
202     * The list of phones that can be returned by the LTS rules.
203     */
204    static private List phonemeTable;
205
206    /**
207     * Class constructor.
208     *
209     * @param ltsRules a URL pointing to the text
210     *   containing the letter to sound rules
211     * @param binary if true, the URL is a binary source
212     *
213     * @throws NullPointerException if the ltsRules are null
214     * @throws IOException if errors are encountered while reading the
215     *   compiled form or the addenda
216     */ 
217    public LetterToSoundImpl(URL ltsRules, boolean binary) throws IOException {
218        BulkTimer.LOAD.start("LTS");
219        InputStream is = ltsRules.openStream();
220        if (binary) {
221            loadBinary(is);
222        } else {
223            loadText(is);
224        }
225        is.close();
226        BulkTimer.LOAD.stop("LTS");
227    }
228
229    /**
230     * Loads the LTS rules from the given text input stream.  The
231     * stream is not closed after the rules are read.       
232     *
233     * @param is the input stream
234     *
235     * @throws IOException if an error occurs on input.
236     */
237    private void loadText(InputStream is) throws IOException {
238        BufferedReader reader;
239        String line;
240
241        // Find out when to convert the phone string into an array.
242        //
243        String tokenize =
244            Utilities.getProperty("com.sun.speech.freetts.lexicon.LTSTokenize",
245                               "load");
246        tokenizeOnLoad = tokenize.equals("load");
247        tokenizeOnLookup = tokenize.equals("lookup");
248
249        letterIndex = new HashMap();
250
251        reader = new BufferedReader(new InputStreamReader(is));
252        line = reader.readLine();
253        while (line != null) {
254            if (!line.startsWith("***")) {
255                parseAndAdd(line);
256            }
257            line = reader.readLine();
258        }
259    }
260
261    /**
262     * Loads the LTS rules from the given binary input stream.  The
263     * input stream is not closed after the rules are read.
264     *
265     * @param is the input stream
266     *
267     * @throws IOException if an error occurs on input.
268     */
269    private void loadBinary(InputStream is) throws IOException {
270        DataInputStream dis = new DataInputStream(is);
271
272        if (dis.readInt() != MAGIC) {
273            throw new Error("Bad LTS binary file format");
274        }
275
276        if (dis.readInt() != VERSION) {
277            throw new Error("Bad LTS binary file version");
278        }
279
280        // read the phoneme table
281        //
282        int phonemeTableSize = dis.readInt();
283        phonemeTable = new ArrayList(phonemeTableSize);
284
285        for (int i = 0; i < phonemeTableSize; i++) {
286            String phoneme = dis.readUTF();
287            phonemeTable.add(phoneme);
288        }
289
290        // letter index
291        //
292        int letterIndexSize = dis.readInt();
293        letterIndex = new HashMap();
294        for (int i = 0; i < letterIndexSize; i++) {
295            char c = dis.readChar();
296            int index = dis.readInt();
297            letterIndex.put(Character.toString(c), new Integer(index));
298        }
299
300        // statemachine states
301        //
302        int stateMachineSize = dis.readInt();
303        stateMachine = new Object[stateMachineSize];
304        for (int i = 0; i < stateMachineSize; i++) {
305            int type = dis.readInt();
306
307            if (type == FinalState.TYPE) {
308                stateMachine[i] = FinalState.loadBinary(dis);
309            } else if (type == DecisionState.TYPE) {
310                stateMachine[i] = DecisionState.loadBinary(dis);
311            } else {
312                throw new Error("Unknown state type in LTS load");
313            }
314        }
315    }
316
317
318    /**
319     * Creates a word from the given input line and add it to the state
320     * machine.  It expects the TOTAL line to come before any of the
321     * states.
322     *
323     * @param line the line of text from the input file
324     */
325    protected void parseAndAdd(String line) {
326        StringTokenizer tokenizer = new StringTokenizer(line," ");
327        String type = tokenizer.nextToken();
328
329        if (type.equals(STATE) || type.equals(PHONE)) {
330            if (tokenizeOnLoad) {
331                stateMachine[numStates] = getState(type, tokenizer);
332            } else {
333                stateMachine[numStates] = line;
334            }
335            numStates++;
336        } else if (type.equals(INDEX)) {
337            Integer index = new Integer(tokenizer.nextToken());
338            if (index.intValue() != numStates) {
339                throw new Error("Bad INDEX in file.");
340            } else {
341                String c = tokenizer.nextToken();
342                letterIndex.put(c,index);
343            }
344        } else if (type.equals(TOTAL)) {
345            stateMachine = new Object[Integer.parseInt(tokenizer.nextToken())];
346        }
347    }
348
349    /**
350     * Dumps a binary form of the letter to sound rules.
351     * This method is not thread-safe.
352     *
353     * <p>Binary format is:
354     * <pre>
355     *   MAGIC
356     *   VERSION
357     *   NUM STATES
358     *   for each state ...
359     * </pre> 
360     *
361     * @param path the path to dump the file to
362     *
363     * @throws IOException if a problem occurs during the dump
364     */
365    public void dumpBinary(String path) throws IOException {
366        FileOutputStream fos = new FileOutputStream(path);
367        DataOutputStream dos = new DataOutputStream(new
368                BufferedOutputStream(fos));
369
370        dos.writeInt(MAGIC);
371        dos.writeInt(VERSION);
372
373        // Phoneme table
374        //
375        phonemeTable = findPhonemes();
376        dos.writeInt(phonemeTable.size());
377        for (Iterator i = phonemeTable.iterator(); i.hasNext(); ) {
378            String phoneme = (String) i.next();
379            dos.writeUTF(phoneme);
380        }
381
382        // letter index
383        //
384        dos.writeInt(letterIndex.size());
385        for (Iterator i = letterIndex.keySet().iterator(); i.hasNext(); ) {
386            String letter = (String) i.next();
387            int index = ((Integer) letterIndex.get(letter)).intValue();
388            dos.writeChar(letter.charAt(0));
389            dos.writeInt(index);
390        }
391
392        // statemachine states
393        //
394        dos.writeInt(stateMachine.length);
395
396        for (int i = 0; i < stateMachine.length; i++) {
397            getState(i).writeBinary(dos);
398        }
399        dos.close();
400    }
401
402    /**
403     * Returns a list of all the phonemes used by the LTS rules.
404     *
405     * @return a list of all the phonemes
406     */
407    private List findPhonemes() {
408        Set set = new HashSet();
409        for (int i = 0; i < stateMachine.length; i++) {
410            if (stateMachine[i] instanceof FinalState) {
411                FinalState fstate = (FinalState) stateMachine[i];
412                if (fstate.phoneList != null) {
413                    for (int j = 0; j < fstate.phoneList.length; j++) {
414                        set.add(fstate.phoneList[j]);
415                    }
416                }
417            }
418        }
419        return new ArrayList(set);
420    }
421
422
423    /**
424     * Gets the <code>State</code> at the given index.  This may
425     * replace a <code>String</code> at
426     * the current spot with an actual <code>State</code> instance.
427     *
428     * @param i the index into the state machine
429     *
430     * @return the <code>State</code> at the given index.
431     */
432    protected State getState(int i) {
433        State state = null;
434        if (stateMachine[i] instanceof String) {
435            state = getState((String) stateMachine[i]);
436            if (tokenizeOnLookup) {
437                stateMachine[i] = state;
438            }
439        } else {
440            state = (State) stateMachine[i];
441        }
442        return state;
443    }
444
445    /**
446     * Gets the <code>State</code> based upon the <code>String</code>.
447     *
448     * @param s the string to parse
449     *
450     * @return the parsed <code>State</code>
451     */
452    protected State getState(String s) {
453        StringTokenizer tokenizer = new StringTokenizer(s, " ");
454        return getState(tokenizer.nextToken(), tokenizer);
455    }
456    
457    /**
458     * Gets the <code>State</code> based upon the <code>type</code>
459     * and <code>tokenizer<code>.
460     *
461     * @param type one of <code>STATE</code> or <code>PHONE</code>
462     * @param tokenizer a <code>StringTokenizer</code> containing the
463     *   <code>State</code>
464     *
465     * @return the parsed <code>State</code>     
466     */
467    protected State getState(String type, StringTokenizer tokenizer) {
468        if (type.equals(STATE)) {
469            int index = Integer.parseInt(tokenizer.nextToken());
470            String c = tokenizer.nextToken();
471            int qtrue = Integer.parseInt(tokenizer.nextToken());
472            int qfalse = Integer.parseInt(tokenizer.nextToken());
473            return new DecisionState(index, c.charAt(0), qtrue, qfalse);
474        } else if (type.equals(PHONE)) {
475            return new FinalState(tokenizer.nextToken());
476        }
477        return null;
478    }
479
480    /**
481     * Makes a character array that looks like "000#word#000".
482     *
483     * @param word the original word
484     *
485     * @return the padded word
486     */
487    protected char[] getFullBuff(String word) {
488        char[] full_buff = new char[word.length() + (2 * WINDOW_SIZE)];
489
490        // Make full_buff look like "000#word#000"
491        //
492        for (int i = 0; i < (WINDOW_SIZE - 1); i++) 
493        {
494            full_buff[i] = '0';
495        }
496        full_buff[WINDOW_SIZE - 1] = '#';
497        word.getChars(0,word.length(),full_buff,WINDOW_SIZE);
498        for (int i = 0; i < (WINDOW_SIZE - 1); i++) 
499        {
500            full_buff[full_buff.length - i - 1] = '0';
501        }
502        full_buff[full_buff.length - WINDOW_SIZE] = '#';
503        return full_buff;
504    }
505    
506    /**
507     * Calculates the phone list for a given word.  If a phone list cannot
508     * be determined, <code>null</code> is returned.  This particular
509     * implementation ignores the part of speech.
510     *
511     * @param word the word to find
512     * @param partOfSpeech the part of speech.
513     *
514     * @return the list of phones for word or <code>null</code>
515     */
516    public String[] getPhones(String word, String partOfSpeech) {
517        ArrayList<String> phoneList = new ArrayList<String>();
518        State currentState;
519        Integer startIndex;
520        int stateIndex;
521        char c;
522
523        // Create "000#word#000"
524        //
525        char[] full_buff = getFullBuff(word);
526        
527        // For each character in the word, create a WINDOW_SIZE
528        // context on each size of the character, and then ask the
529        // state machine what's next.  It's magic.  BTW, this goes
530        // through the word from beginning to end.  Flite goes through
531        // it from end to beginning.  There doesn't seem to be a
532        // difference in the result.
533        //
534        for (int pos = 0; pos < word.length(); pos++) {
535            for (int i = 0; i < WINDOW_SIZE; i++) {
536                fval_buff[i] = full_buff[pos + i];
537                fval_buff[i + WINDOW_SIZE] =
538                    full_buff[i + pos + 1 + WINDOW_SIZE];
539            }
540            c = word.charAt(pos);
541            startIndex = (Integer) letterIndex.get(Character.toString(c));
542            if (startIndex == null) {
543                continue;
544            }
545            stateIndex = startIndex.intValue();
546            currentState = getState(stateIndex);
547            while (!(currentState instanceof FinalState)) {
548                stateIndex =
549                    ((DecisionState)
550                     currentState).getNextState(fval_buff);
551                currentState = getState(stateIndex);
552            }
553            ((FinalState) currentState).append(phoneList);
554        }
555        return (String[]) phoneList.toArray(new String[0]);
556    }
557
558    /**
559     * Compares this LTS to another for debugging purposes.
560     *
561     * @param other the other LTS to compare to
562     *
563     * @return <code>true</code> if these are equivalent
564     */
565    public boolean compare(LetterToSoundImpl other) {
566
567        // compare letter index table
568        //
569        for (Iterator i = letterIndex.keySet().iterator(); i.hasNext(); ) {
570            String key = (String) i.next();
571            Integer thisIndex = (Integer) letterIndex.get(key);
572            Integer otherIndex = (Integer) other.letterIndex.get(key);
573            if (!thisIndex.equals(otherIndex)) {
574                System.out.println("Bad Index for " + key);
575                return false;
576            }
577        }
578
579        // compare states
580        //
581        for (int i = 0; i < stateMachine.length; i++) {
582            State state = getState(i);
583            State otherState = other.getState(i);
584            if (!state.compare(otherState)) {
585                System.out.println("Bad state " + i);
586                return false;
587            }
588        }
589
590        return true;
591    }
592        
593    /**
594     * A marker interface for the states in the LTS state machine.
595     *
596     * @see DecisionState
597     * @see FinalState
598     */
599    static interface State {
600        public void writeBinary(DataOutputStream dos) throws IOException;
601        public boolean compare(State other);
602    }
603
604
605    /**
606     * A <code>State</code> that represents a decision to be made.
607     *
608     * @see FinalState
609     */
610    static class DecisionState implements State {
611        final static int TYPE = 1;
612        int index;
613        char c;
614        int qtrue;
615        int qfalse;
616        
617        /**
618         * Class constructor.
619         *
620         * @param index the index into a string for comparison to c
621         * @param c the character to match in a string at index
622         * @param qtrue the state to go to in the state machine on a match
623         * @param qfalse the state to go to in the state machine on no match
624         */
625        public DecisionState(int index, char c, int qtrue, int qfalse) {
626            this.index = index;
627            this.c = c;
628            this.qtrue = qtrue;
629            this.qfalse = qfalse;
630        }
631        
632        /**
633         * Gets the next state to go to based upon the given character
634         * sequence.
635         *
636         * @param chars the characters for comparison
637         *
638         * @ret an index into the state machine.
639         */
640        public int getNextState(char[] chars) {
641            return (chars[index] == c) ? qtrue : qfalse;
642        }
643        
644        /**
645         * Outputs this <code>State</code> as though it came from the
646         * text input file.
647         *
648         * @return a <code>String</code> describing this <code>State</code>.
649         */
650        public String toString() {
651            return STATE + " " + Integer.toString(index)
652                + " " + Character.toString(c)
653                + " " + Integer.toString(qtrue)
654                + " " + Integer.toString(qfalse);
655        }
656
657        /**
658         * Writes this <code>State</code> to the given output stream.
659         *
660         * @param dos the data output stream
661         *
662         * @throws IOException if an error occurs
663         */
664        public void writeBinary(DataOutputStream dos) throws IOException {
665            dos.writeInt(TYPE);
666            dos.writeInt(index);
667            dos.writeChar(c);
668            dos.writeInt(qtrue);
669            dos.writeInt(qfalse);
670        }
671
672        /**
673         * Loads a <code>DecisionState</code> object from the given
674         * input stream.
675         *
676         * @param dis the data input stream
677         * @return a newly constructed decision state
678         *
679         * @throws IOException if an error occurs
680         */
681        public static State loadBinary(DataInputStream dis)
682                            throws IOException {
683            int index = dis.readInt();
684            char c = dis.readChar();
685            int qtrue = dis.readInt();
686            int qfalse = dis.readInt();
687            return new DecisionState(index, c, qtrue, qfalse);
688        }
689
690        /**
691         * Compares this state to another state for debugging purposes.
692         *
693         * @param other the other state to compare against
694         *
695         * @return true if the states are equivalent
696         */
697        public boolean compare(State other) {
698            if (other instanceof DecisionState) {
699                DecisionState otherState = (DecisionState) other;
700                return index == otherState.index &&
701                       c == otherState.c &&
702                       qtrue == otherState.qtrue &&
703                       qfalse == otherState.qfalse;
704            }
705            return false;
706        }
707    }
708
709
710    /**
711     * A <code>State</code> that represents a final state in the
712     * state machine.  It contains one or more phones from the
713     * phone table.
714     *
715     * @see DecisionState
716     */
717    static class FinalState implements State {
718        final static int TYPE = 2;
719        String[] phoneList;
720
721        /**
722         * Class constructor.  The string "epsilon" is used to indicate
723         * an empty list.
724         *
725         * @param phones the phones for this state
726         */
727        public FinalState(String phones) {
728            if (phones.equals("epsilon")) {
729                phoneList = null;
730            } else {
731                int i = phones.indexOf('-');
732                if (i != -1) {
733                    phoneList = new String[2];
734                    phoneList[0] = phones.substring(0, i);
735                    phoneList[1] = phones.substring(i + 1);                
736                } else {
737                    phoneList = new String[1];
738                    phoneList[0] = phones;
739                }
740            }
741        }
742
743        /**
744         * Class constructor.
745         *
746         * @param phones an array of phones for this state
747         */
748        public FinalState(String[] phones) {
749            phoneList = phones;
750        }
751
752        /**
753         * Appends the phone list for this state to the given
754         * <code>ArrayList</code>.
755         *
756         * @param array the array to append to
757         */
758        public void append(ArrayList array) {
759            if (phoneList == null) {
760                return;
761            } else {
762                for (int i = 0; i < phoneList.length; i++) {
763                    array.add(phoneList[i]);
764                }
765            }
766        }
767
768        /**
769         * Outputs this <code>State</code> as though it came from the
770         * text input file.  The string "epsilon" is used to indicate
771         * an empty list.
772         *
773         * @return a <code>String</code> describing this <code>State</code>
774         */
775        public String toString() {
776            if (phoneList == null) {
777                return PHONE + " epsilon";
778            } else if (phoneList.length == 1) {
779                return PHONE + " " + phoneList[0];
780            } else {
781                return PHONE + " " + phoneList[0] + "-" + phoneList[1];
782            }
783        }
784
785        /**
786         * Compares this state to another state for debugging
787         * purposes.
788         *
789         * @param other the other state to compare against
790         *
791         * @return <code>true</code> if the states are equivalent
792         */
793        public boolean compare(State other) {
794            if (other instanceof FinalState) {
795                FinalState otherState = (FinalState) other;
796                if (phoneList == null) {
797                    return otherState.phoneList == null;
798                } else {
799                    for (int i = 0; i < phoneList.length; i++) {
800                        if (!phoneList[i].equals(otherState.phoneList[i])) {
801                            return false;
802                        }
803                    }
804                    return true;
805                }
806            }
807            return false;
808        }
809
810
811        /**
812         * Writes this state to the given output stream.
813         *
814         * @param dos the data output stream
815         *
816         * @throws IOException if an error occurs
817         */
818        public void writeBinary(DataOutputStream dos) throws IOException {
819            dos.writeInt(TYPE);
820            if (phoneList == null) {
821                dos.writeInt(0);
822            } else {
823                dos.writeInt(phoneList.length);
824                for (int i = 0; i < phoneList.length; i++) {
825                    dos.writeInt(phonemeTable.indexOf(phoneList[i]));
826                }
827            }
828        }
829
830        /**
831         * Loads a FinalState object from the given input stream
832         *
833         * @param dis the data input stream
834         *
835         * @return a newly constructed final state
836         *
837         * @throws IOException if an error occurs
838         */
839        public static State loadBinary(DataInputStream dis)
840                            throws IOException {
841            String[] phoneList;
842            int phoneListLength = dis.readInt();
843
844            if (phoneListLength == 0) {
845                phoneList = null;
846            } else {
847                phoneList = new String[phoneListLength];
848            }
849            for (int i = 0; i < phoneListLength; i++) {
850                int index = dis.readInt();
851                phoneList[i] = (String) phonemeTable.get(index);
852            }
853            return new FinalState(phoneList);
854        }
855    }
856
857
858    /**
859     * Translates between text and binary forms of the CMU6 LTS rules.
860     */
861    public static void main(String[] args) {
862        LexiconImpl lex, lex2;
863        boolean showTimes = false;
864        String srcPath = ".";
865        String destPath = ".";
866        String name = "cmulex_lts";
867
868        try {
869            if (args.length > 0) {
870                BulkTimer timer = new BulkTimer();
871                timer.start();
872                for (int i = 0 ; i < args.length; i++) {
873                    if (args[i].equals("-src")) {
874                        srcPath = args[++i];
875                    } else if (args[i].equals("-dest")) {
876                        destPath = args[++i];
877                    } else if (args[i].equals("-name")
878                               && i < args.length -1) {
879                        name = args[++i];
880                    } else if (args[i].equals("-generate_binary")) {
881
882                         System.out.println("Loading " + name);
883                         timer.start("load_text");
884                         LetterToSoundImpl text = new LetterToSoundImpl(
885                                new URL("file:" + srcPath + "/"
886                                        + name + ".txt"),
887                                false);
888                         timer.stop("load_text");
889
890                         System.out.println("Dumping " + name);
891                         timer.start("dump_binary");
892                         text.dumpBinary(destPath + "/" + name + ".bin");
893                         timer.stop("dump_binary");
894
895                    } else if (args[i].equals("-compare")) {
896
897                        timer.start("load_text");
898                         LetterToSoundImpl text = new LetterToSoundImpl(
899                                new URL("file:./" + name + ".txt"),  false);
900                        timer.stop("load_text");
901
902                        timer.start("load_binary");
903                        LetterToSoundImpl binary = new LetterToSoundImpl(
904                                new URL("file:./" + name + ".bin"),  true);
905                        timer.stop("load_binary");
906
907                        timer.start("compare");
908                        if (!text.compare(binary)) {
909                            System.out.println("NOT EQUIVALENT");
910                        } else {
911                            System.out.println("ok");
912                        }
913                        timer.stop("compare");
914                    } else if (args[i].equals("-showtimes")) {
915                        showTimes = true;
916                    } else {
917                        System.out.println("Unknown option " + args[i]);
918                    }
919                }
920                timer.stop();
921                if (showTimes) {
922                    timer.show("LTS loading and dumping");
923                }
924            } else {
925                System.out.println("Options: ");
926                System.out.println("    -src path");
927                System.out.println("    -dest path");
928                System.out.println("    -compare");
929                System.out.println("    -generate_binary");
930                System.out.println("    -showTimes");
931            }
932        } catch (IOException ioe) {
933            System.err.println(ioe);
934        }
935    }
936}