001/** 002 * Portions Copyright 2001 Sun Microsystems, Inc. 003 * Portions Copyright 1999-2001 Language Technologies Institute, 004 * Carnegie Mellon University. 005 * All Rights Reserved. Use is subject to license terms. 006 * 007 * See the file "license.terms" for information on usage and 008 * redistribution of this file, and for a DISCLAIMER OF ALL 009 * WARRANTIES. 010 */ 011package com.sun.speech.freetts.lexicon; 012 013import java.io.BufferedOutputStream; 014import java.io.BufferedReader; 015import java.io.DataInputStream; 016import java.io.DataOutputStream; 017import java.io.FileOutputStream; 018import java.io.IOException; 019import java.io.InputStream; 020import java.io.InputStreamReader; 021import java.net.URL; 022import java.util.ArrayList; 023import java.util.HashMap; 024import java.util.HashSet; 025import java.util.Iterator; 026import java.util.List; 027import java.util.Set; 028import java.util.StringTokenizer; 029 030import com.sun.speech.freetts.util.BulkTimer; 031import com.sun.speech.freetts.util.Utilities; 032 033/** 034 * Provides the phone list for words using the CMU6 letter-to-sound 035 * (LTS) rules, which are based on the Black, Lenzo, and Pagel paper, 036 * "Issues in Building General Letter-to-Sound Rules." Proceedings 037 * of ECSA Workshop on Speech Synthesis, pages 77-80, Australia, 1998. 038 * 039 * <p>The LTS rules are a simple state machine, with one entry point 040 * for each letter of the alphabet (lower case letters are always 041 * assumed, and the rules keep an array with one entry per letter that 042 * point into the state machine). 043 * 044 * <p>The state machine consists of a huge array, with most entries 045 * containing a decision and the indices of two other entries. The 046 * first of these two indices represents where to go if the decision 047 * is true, and the second represents where to go if the decision is 048 * false. All entries that do not contain a decision are final 049 * entries, and these contain a phone. 050 * 051 * <p>The decision in this case is a simple character comparison, 052 * but it is done in the context of a window around the character in 053 * the word. The decision consists of a index into the context window 054 * and a character value. If the character in the context window 055 * matches the character value, then the decision is true. 056 * 057 * <p>The machine traversal for each letter starts at that letter's 058 * entry in the state machine and ends only when it reaches a final 059 * state. If there is no phone that can be mapped, the phone in the 060 * final state is set to 'epsilon.' 061 * 062 * <p>The context window for a character is generated in the following 063 * way: 064 * 065 * <ul> 066 * <li>Pad the original word on either side with '#' and '0' 067 * characters the size of the window for the LTS rules (in this case, 068 * the window size is 4). The "#" is used to indicate the beginning 069 * and end of the word. So, the word "monkey" would turn into 070 * "000#monkey#000". 071 * <li>For each character in the word, the context window consists of 072 * the characters in the padded form the preceed and follow the word. 073 * The number of characters on each side is dependent upon the window 074 * size. So, for this implementation, the context window for the 'k' 075 * in monkey is "#money#0". 076 * </ul> 077 * 078 * <p>Here's how the phone for 'k' in 'monkey' might be determined: 079 * 080 * <ul> 081 * <li>Create the context window "#money#0". 082 * <li>Start at the state machine entry for 'k' in the state machine. 083 * <li>Grab the 'index' from the current state. This represents an 084 * index into the context window. 085 * <li>Compare the value of the character at the index in the context 086 * window to the character from the current state. If there is a 087 * match, the next state is the qtrue value. If there isn't a match, 088 * the next state is the qfalse state. 089 * <li>Keep on working through the machine until you read a final 090 * state. 091 * <li>When you get to the final state, the phone is the character in 092 * that state. 093 * </ul> 094 * 095 * <p>This implementation will either read from a straight ASCII file 096 * or a binary file. When reading from an ASCII file, you can specify 097 * when the input line is tokenized: load, lookup, or never. If you 098 * specify 'load', the entire file will be parsed when it is loaded. 099 * If you specify 'lookup', the file will be loaded, but the parsing 100 * for each line will be delayed until it is referenced and the parsed 101 * form will be saved away. If you specify 'never', the lines will 102 * parsed each time they are referenced. The default is 'load'. To 103 * specify the load type, set the system property as follows: 104 * 105 * <pre> 106 * -Dcom.sun.speech.freetts.lexicon.LTSTokenize=load 107 * </pre> 108 * 109 * <p>[[[TODO: This implementation uses ASCII 'a'-'z', which is not 110 * internationalized.]]] 111 */ 112public class LetterToSoundImpl implements LetterToSound { 113 /** 114 * Entry in file represents the total number of states in the 115 * file. This should be at the top of the file. The format 116 * should be "TOTAL n" where n is an integer value. 117 */ 118 final static String TOTAL = "TOTAL"; 119 120 /** 121 * Entry in file represents the beginning of a new letter index. 122 * This should appear before the list of a new set of states for 123 * a particular letter. The format should be "INDEX n c" where 124 * n is the index into the state machine array and c is the 125 * character. 126 */ 127 final static String INDEX = "INDEX"; 128 129 /** 130 * Entry in file represents a state. The format should be 131 * "STATE i c t f" where 'i' represents an index to look at in the 132 * decision string, c is the character that should match, t is the 133 * index of the state to go to if there is a match, and f is the 134 * of the state to go to if there isn't a match. 135 */ 136 final static String STATE = "STATE"; 137 138 /** 139 * Entry in file represents a final state. The format should be 140 * "PHONE p" where p represents a phone string that comes from the 141 * phone table. 142 */ 143 final static String PHONE = "PHONE"; 144 145 /** 146 * If true, the state string is tokenized when it is first read. 147 * The side effects of this are quicker lookups, but more memory 148 * usage and a longer startup time. 149 */ 150 protected boolean tokenizeOnLoad = false; 151 152 /** 153 * If true, the state string is tokenized the first time it is 154 * referenced. The side effects of this are quicker lookups, but 155 * more memory usage. 156 */ 157 protected boolean tokenizeOnLookup = false; 158 159 /** 160 * Magic number for binary LTS files. 161 */ 162 private final static int MAGIC = 0xdeadbeef; 163 164 /** 165 * Current binary file version. 166 */ 167 private final static int VERSION = 1; 168 169 /** 170 * The LTS state machine. Entries can be String or State. An 171 * ArrayList could be used here -- I chose not to because I 172 * thought it might be quicker to avoid dealing with the dynamic 173 * resizing. 174 */ 175 private Object[] stateMachine = null; 176 177 /** 178 * The number of states in the state machine. 179 */ 180 private int numStates = 0; 181 182 /** 183 * The 'window size' of the LTS rules. 184 */ 185 private final static int WINDOW_SIZE = 4; 186 187 /** 188 * An array of characters to hold a string for checking against a 189 * rule. This will be reused over and over again, so the goal 190 * was just to have a single area instead of new'ing up a new one 191 * for every word. The name choice is to match that in Flite's 192 * <code>cst_lts.c</code>. 193 */ 194 private char[] fval_buff = new char[WINDOW_SIZE * 2]; 195 196 /** 197 * The indexes of the starting points for letters in the state machine. 198 */ 199 protected HashMap letterIndex; 200 201 /** 202 * The list of phones that can be returned by the LTS rules. 203 */ 204 static private List phonemeTable; 205 206 /** 207 * Class constructor. 208 * 209 * @param ltsRules a URL pointing to the text 210 * containing the letter to sound rules 211 * @param binary if true, the URL is a binary source 212 * 213 * @throws NullPointerException if the ltsRules are null 214 * @throws IOException if errors are encountered while reading the 215 * compiled form or the addenda 216 */ 217 public LetterToSoundImpl(URL ltsRules, boolean binary) throws IOException { 218 BulkTimer.LOAD.start("LTS"); 219 InputStream is = ltsRules.openStream(); 220 if (binary) { 221 loadBinary(is); 222 } else { 223 loadText(is); 224 } 225 is.close(); 226 BulkTimer.LOAD.stop("LTS"); 227 } 228 229 /** 230 * Loads the LTS rules from the given text input stream. The 231 * stream is not closed after the rules are read. 232 * 233 * @param is the input stream 234 * 235 * @throws IOException if an error occurs on input. 236 */ 237 private void loadText(InputStream is) throws IOException { 238 BufferedReader reader; 239 String line; 240 241 // Find out when to convert the phone string into an array. 242 // 243 String tokenize = 244 Utilities.getProperty("com.sun.speech.freetts.lexicon.LTSTokenize", 245 "load"); 246 tokenizeOnLoad = tokenize.equals("load"); 247 tokenizeOnLookup = tokenize.equals("lookup"); 248 249 letterIndex = new HashMap(); 250 251 reader = new BufferedReader(new InputStreamReader(is)); 252 line = reader.readLine(); 253 while (line != null) { 254 if (!line.startsWith("***")) { 255 parseAndAdd(line); 256 } 257 line = reader.readLine(); 258 } 259 } 260 261 /** 262 * Loads the LTS rules from the given binary input stream. The 263 * input stream is not closed after the rules are read. 264 * 265 * @param is the input stream 266 * 267 * @throws IOException if an error occurs on input. 268 */ 269 private void loadBinary(InputStream is) throws IOException { 270 DataInputStream dis = new DataInputStream(is); 271 272 if (dis.readInt() != MAGIC) { 273 throw new Error("Bad LTS binary file format"); 274 } 275 276 if (dis.readInt() != VERSION) { 277 throw new Error("Bad LTS binary file version"); 278 } 279 280 // read the phoneme table 281 // 282 int phonemeTableSize = dis.readInt(); 283 phonemeTable = new ArrayList(phonemeTableSize); 284 285 for (int i = 0; i < phonemeTableSize; i++) { 286 String phoneme = dis.readUTF(); 287 phonemeTable.add(phoneme); 288 } 289 290 // letter index 291 // 292 int letterIndexSize = dis.readInt(); 293 letterIndex = new HashMap(); 294 for (int i = 0; i < letterIndexSize; i++) { 295 char c = dis.readChar(); 296 int index = dis.readInt(); 297 letterIndex.put(Character.toString(c), new Integer(index)); 298 } 299 300 // statemachine states 301 // 302 int stateMachineSize = dis.readInt(); 303 stateMachine = new Object[stateMachineSize]; 304 for (int i = 0; i < stateMachineSize; i++) { 305 int type = dis.readInt(); 306 307 if (type == FinalState.TYPE) { 308 stateMachine[i] = FinalState.loadBinary(dis); 309 } else if (type == DecisionState.TYPE) { 310 stateMachine[i] = DecisionState.loadBinary(dis); 311 } else { 312 throw new Error("Unknown state type in LTS load"); 313 } 314 } 315 } 316 317 318 /** 319 * Creates a word from the given input line and add it to the state 320 * machine. It expects the TOTAL line to come before any of the 321 * states. 322 * 323 * @param line the line of text from the input file 324 */ 325 protected void parseAndAdd(String line) { 326 StringTokenizer tokenizer = new StringTokenizer(line," "); 327 String type = tokenizer.nextToken(); 328 329 if (type.equals(STATE) || type.equals(PHONE)) { 330 if (tokenizeOnLoad) { 331 stateMachine[numStates] = getState(type, tokenizer); 332 } else { 333 stateMachine[numStates] = line; 334 } 335 numStates++; 336 } else if (type.equals(INDEX)) { 337 Integer index = new Integer(tokenizer.nextToken()); 338 if (index.intValue() != numStates) { 339 throw new Error("Bad INDEX in file."); 340 } else { 341 String c = tokenizer.nextToken(); 342 letterIndex.put(c,index); 343 } 344 } else if (type.equals(TOTAL)) { 345 stateMachine = new Object[Integer.parseInt(tokenizer.nextToken())]; 346 } 347 } 348 349 /** 350 * Dumps a binary form of the letter to sound rules. 351 * This method is not thread-safe. 352 * 353 * <p>Binary format is: 354 * <pre> 355 * MAGIC 356 * VERSION 357 * NUM STATES 358 * for each state ... 359 * </pre> 360 * 361 * @param path the path to dump the file to 362 * 363 * @throws IOException if a problem occurs during the dump 364 */ 365 public void dumpBinary(String path) throws IOException { 366 FileOutputStream fos = new FileOutputStream(path); 367 DataOutputStream dos = new DataOutputStream(new 368 BufferedOutputStream(fos)); 369 370 dos.writeInt(MAGIC); 371 dos.writeInt(VERSION); 372 373 // Phoneme table 374 // 375 phonemeTable = findPhonemes(); 376 dos.writeInt(phonemeTable.size()); 377 for (Iterator i = phonemeTable.iterator(); i.hasNext(); ) { 378 String phoneme = (String) i.next(); 379 dos.writeUTF(phoneme); 380 } 381 382 // letter index 383 // 384 dos.writeInt(letterIndex.size()); 385 for (Iterator i = letterIndex.keySet().iterator(); i.hasNext(); ) { 386 String letter = (String) i.next(); 387 int index = ((Integer) letterIndex.get(letter)).intValue(); 388 dos.writeChar(letter.charAt(0)); 389 dos.writeInt(index); 390 } 391 392 // statemachine states 393 // 394 dos.writeInt(stateMachine.length); 395 396 for (int i = 0; i < stateMachine.length; i++) { 397 getState(i).writeBinary(dos); 398 } 399 dos.close(); 400 } 401 402 /** 403 * Returns a list of all the phonemes used by the LTS rules. 404 * 405 * @return a list of all the phonemes 406 */ 407 private List findPhonemes() { 408 Set set = new HashSet(); 409 for (int i = 0; i < stateMachine.length; i++) { 410 if (stateMachine[i] instanceof FinalState) { 411 FinalState fstate = (FinalState) stateMachine[i]; 412 if (fstate.phoneList != null) { 413 for (int j = 0; j < fstate.phoneList.length; j++) { 414 set.add(fstate.phoneList[j]); 415 } 416 } 417 } 418 } 419 return new ArrayList(set); 420 } 421 422 423 /** 424 * Gets the <code>State</code> at the given index. This may 425 * replace a <code>String</code> at 426 * the current spot with an actual <code>State</code> instance. 427 * 428 * @param i the index into the state machine 429 * 430 * @return the <code>State</code> at the given index. 431 */ 432 protected State getState(int i) { 433 State state = null; 434 if (stateMachine[i] instanceof String) { 435 state = getState((String) stateMachine[i]); 436 if (tokenizeOnLookup) { 437 stateMachine[i] = state; 438 } 439 } else { 440 state = (State) stateMachine[i]; 441 } 442 return state; 443 } 444 445 /** 446 * Gets the <code>State</code> based upon the <code>String</code>. 447 * 448 * @param s the string to parse 449 * 450 * @return the parsed <code>State</code> 451 */ 452 protected State getState(String s) { 453 StringTokenizer tokenizer = new StringTokenizer(s, " "); 454 return getState(tokenizer.nextToken(), tokenizer); 455 } 456 457 /** 458 * Gets the <code>State</code> based upon the <code>type</code> 459 * and <code>tokenizer<code>. 460 * 461 * @param type one of <code>STATE</code> or <code>PHONE</code> 462 * @param tokenizer a <code>StringTokenizer</code> containing the 463 * <code>State</code> 464 * 465 * @return the parsed <code>State</code> 466 */ 467 protected State getState(String type, StringTokenizer tokenizer) { 468 if (type.equals(STATE)) { 469 int index = Integer.parseInt(tokenizer.nextToken()); 470 String c = tokenizer.nextToken(); 471 int qtrue = Integer.parseInt(tokenizer.nextToken()); 472 int qfalse = Integer.parseInt(tokenizer.nextToken()); 473 return new DecisionState(index, c.charAt(0), qtrue, qfalse); 474 } else if (type.equals(PHONE)) { 475 return new FinalState(tokenizer.nextToken()); 476 } 477 return null; 478 } 479 480 /** 481 * Makes a character array that looks like "000#word#000". 482 * 483 * @param word the original word 484 * 485 * @return the padded word 486 */ 487 protected char[] getFullBuff(String word) { 488 char[] full_buff = new char[word.length() + (2 * WINDOW_SIZE)]; 489 490 // Make full_buff look like "000#word#000" 491 // 492 for (int i = 0; i < (WINDOW_SIZE - 1); i++) 493 { 494 full_buff[i] = '0'; 495 } 496 full_buff[WINDOW_SIZE - 1] = '#'; 497 word.getChars(0,word.length(),full_buff,WINDOW_SIZE); 498 for (int i = 0; i < (WINDOW_SIZE - 1); i++) 499 { 500 full_buff[full_buff.length - i - 1] = '0'; 501 } 502 full_buff[full_buff.length - WINDOW_SIZE] = '#'; 503 return full_buff; 504 } 505 506 /** 507 * Calculates the phone list for a given word. If a phone list cannot 508 * be determined, <code>null</code> is returned. This particular 509 * implementation ignores the part of speech. 510 * 511 * @param word the word to find 512 * @param partOfSpeech the part of speech. 513 * 514 * @return the list of phones for word or <code>null</code> 515 */ 516 public String[] getPhones(String word, String partOfSpeech) { 517 ArrayList<String> phoneList = new ArrayList<String>(); 518 State currentState; 519 Integer startIndex; 520 int stateIndex; 521 char c; 522 523 // Create "000#word#000" 524 // 525 char[] full_buff = getFullBuff(word); 526 527 // For each character in the word, create a WINDOW_SIZE 528 // context on each size of the character, and then ask the 529 // state machine what's next. It's magic. BTW, this goes 530 // through the word from beginning to end. Flite goes through 531 // it from end to beginning. There doesn't seem to be a 532 // difference in the result. 533 // 534 for (int pos = 0; pos < word.length(); pos++) { 535 for (int i = 0; i < WINDOW_SIZE; i++) { 536 fval_buff[i] = full_buff[pos + i]; 537 fval_buff[i + WINDOW_SIZE] = 538 full_buff[i + pos + 1 + WINDOW_SIZE]; 539 } 540 c = word.charAt(pos); 541 startIndex = (Integer) letterIndex.get(Character.toString(c)); 542 if (startIndex == null) { 543 continue; 544 } 545 stateIndex = startIndex.intValue(); 546 currentState = getState(stateIndex); 547 while (!(currentState instanceof FinalState)) { 548 stateIndex = 549 ((DecisionState) 550 currentState).getNextState(fval_buff); 551 currentState = getState(stateIndex); 552 } 553 ((FinalState) currentState).append(phoneList); 554 } 555 return (String[]) phoneList.toArray(new String[0]); 556 } 557 558 /** 559 * Compares this LTS to another for debugging purposes. 560 * 561 * @param other the other LTS to compare to 562 * 563 * @return <code>true</code> if these are equivalent 564 */ 565 public boolean compare(LetterToSoundImpl other) { 566 567 // compare letter index table 568 // 569 for (Iterator i = letterIndex.keySet().iterator(); i.hasNext(); ) { 570 String key = (String) i.next(); 571 Integer thisIndex = (Integer) letterIndex.get(key); 572 Integer otherIndex = (Integer) other.letterIndex.get(key); 573 if (!thisIndex.equals(otherIndex)) { 574 System.out.println("Bad Index for " + key); 575 return false; 576 } 577 } 578 579 // compare states 580 // 581 for (int i = 0; i < stateMachine.length; i++) { 582 State state = getState(i); 583 State otherState = other.getState(i); 584 if (!state.compare(otherState)) { 585 System.out.println("Bad state " + i); 586 return false; 587 } 588 } 589 590 return true; 591 } 592 593 /** 594 * A marker interface for the states in the LTS state machine. 595 * 596 * @see DecisionState 597 * @see FinalState 598 */ 599 static interface State { 600 public void writeBinary(DataOutputStream dos) throws IOException; 601 public boolean compare(State other); 602 } 603 604 605 /** 606 * A <code>State</code> that represents a decision to be made. 607 * 608 * @see FinalState 609 */ 610 static class DecisionState implements State { 611 final static int TYPE = 1; 612 int index; 613 char c; 614 int qtrue; 615 int qfalse; 616 617 /** 618 * Class constructor. 619 * 620 * @param index the index into a string for comparison to c 621 * @param c the character to match in a string at index 622 * @param qtrue the state to go to in the state machine on a match 623 * @param qfalse the state to go to in the state machine on no match 624 */ 625 public DecisionState(int index, char c, int qtrue, int qfalse) { 626 this.index = index; 627 this.c = c; 628 this.qtrue = qtrue; 629 this.qfalse = qfalse; 630 } 631 632 /** 633 * Gets the next state to go to based upon the given character 634 * sequence. 635 * 636 * @param chars the characters for comparison 637 * 638 * @ret an index into the state machine. 639 */ 640 public int getNextState(char[] chars) { 641 return (chars[index] == c) ? qtrue : qfalse; 642 } 643 644 /** 645 * Outputs this <code>State</code> as though it came from the 646 * text input file. 647 * 648 * @return a <code>String</code> describing this <code>State</code>. 649 */ 650 public String toString() { 651 return STATE + " " + Integer.toString(index) 652 + " " + Character.toString(c) 653 + " " + Integer.toString(qtrue) 654 + " " + Integer.toString(qfalse); 655 } 656 657 /** 658 * Writes this <code>State</code> to the given output stream. 659 * 660 * @param dos the data output stream 661 * 662 * @throws IOException if an error occurs 663 */ 664 public void writeBinary(DataOutputStream dos) throws IOException { 665 dos.writeInt(TYPE); 666 dos.writeInt(index); 667 dos.writeChar(c); 668 dos.writeInt(qtrue); 669 dos.writeInt(qfalse); 670 } 671 672 /** 673 * Loads a <code>DecisionState</code> object from the given 674 * input stream. 675 * 676 * @param dis the data input stream 677 * @return a newly constructed decision state 678 * 679 * @throws IOException if an error occurs 680 */ 681 public static State loadBinary(DataInputStream dis) 682 throws IOException { 683 int index = dis.readInt(); 684 char c = dis.readChar(); 685 int qtrue = dis.readInt(); 686 int qfalse = dis.readInt(); 687 return new DecisionState(index, c, qtrue, qfalse); 688 } 689 690 /** 691 * Compares this state to another state for debugging purposes. 692 * 693 * @param other the other state to compare against 694 * 695 * @return true if the states are equivalent 696 */ 697 public boolean compare(State other) { 698 if (other instanceof DecisionState) { 699 DecisionState otherState = (DecisionState) other; 700 return index == otherState.index && 701 c == otherState.c && 702 qtrue == otherState.qtrue && 703 qfalse == otherState.qfalse; 704 } 705 return false; 706 } 707 } 708 709 710 /** 711 * A <code>State</code> that represents a final state in the 712 * state machine. It contains one or more phones from the 713 * phone table. 714 * 715 * @see DecisionState 716 */ 717 static class FinalState implements State { 718 final static int TYPE = 2; 719 String[] phoneList; 720 721 /** 722 * Class constructor. The string "epsilon" is used to indicate 723 * an empty list. 724 * 725 * @param phones the phones for this state 726 */ 727 public FinalState(String phones) { 728 if (phones.equals("epsilon")) { 729 phoneList = null; 730 } else { 731 int i = phones.indexOf('-'); 732 if (i != -1) { 733 phoneList = new String[2]; 734 phoneList[0] = phones.substring(0, i); 735 phoneList[1] = phones.substring(i + 1); 736 } else { 737 phoneList = new String[1]; 738 phoneList[0] = phones; 739 } 740 } 741 } 742 743 /** 744 * Class constructor. 745 * 746 * @param phones an array of phones for this state 747 */ 748 public FinalState(String[] phones) { 749 phoneList = phones; 750 } 751 752 /** 753 * Appends the phone list for this state to the given 754 * <code>ArrayList</code>. 755 * 756 * @param array the array to append to 757 */ 758 public void append(ArrayList array) { 759 if (phoneList == null) { 760 return; 761 } else { 762 for (int i = 0; i < phoneList.length; i++) { 763 array.add(phoneList[i]); 764 } 765 } 766 } 767 768 /** 769 * Outputs this <code>State</code> as though it came from the 770 * text input file. The string "epsilon" is used to indicate 771 * an empty list. 772 * 773 * @return a <code>String</code> describing this <code>State</code> 774 */ 775 public String toString() { 776 if (phoneList == null) { 777 return PHONE + " epsilon"; 778 } else if (phoneList.length == 1) { 779 return PHONE + " " + phoneList[0]; 780 } else { 781 return PHONE + " " + phoneList[0] + "-" + phoneList[1]; 782 } 783 } 784 785 /** 786 * Compares this state to another state for debugging 787 * purposes. 788 * 789 * @param other the other state to compare against 790 * 791 * @return <code>true</code> if the states are equivalent 792 */ 793 public boolean compare(State other) { 794 if (other instanceof FinalState) { 795 FinalState otherState = (FinalState) other; 796 if (phoneList == null) { 797 return otherState.phoneList == null; 798 } else { 799 for (int i = 0; i < phoneList.length; i++) { 800 if (!phoneList[i].equals(otherState.phoneList[i])) { 801 return false; 802 } 803 } 804 return true; 805 } 806 } 807 return false; 808 } 809 810 811 /** 812 * Writes this state to the given output stream. 813 * 814 * @param dos the data output stream 815 * 816 * @throws IOException if an error occurs 817 */ 818 public void writeBinary(DataOutputStream dos) throws IOException { 819 dos.writeInt(TYPE); 820 if (phoneList == null) { 821 dos.writeInt(0); 822 } else { 823 dos.writeInt(phoneList.length); 824 for (int i = 0; i < phoneList.length; i++) { 825 dos.writeInt(phonemeTable.indexOf(phoneList[i])); 826 } 827 } 828 } 829 830 /** 831 * Loads a FinalState object from the given input stream 832 * 833 * @param dis the data input stream 834 * 835 * @return a newly constructed final state 836 * 837 * @throws IOException if an error occurs 838 */ 839 public static State loadBinary(DataInputStream dis) 840 throws IOException { 841 String[] phoneList; 842 int phoneListLength = dis.readInt(); 843 844 if (phoneListLength == 0) { 845 phoneList = null; 846 } else { 847 phoneList = new String[phoneListLength]; 848 } 849 for (int i = 0; i < phoneListLength; i++) { 850 int index = dis.readInt(); 851 phoneList[i] = (String) phonemeTable.get(index); 852 } 853 return new FinalState(phoneList); 854 } 855 } 856 857 858 /** 859 * Translates between text and binary forms of the CMU6 LTS rules. 860 */ 861 public static void main(String[] args) { 862 LexiconImpl lex, lex2; 863 boolean showTimes = false; 864 String srcPath = "."; 865 String destPath = "."; 866 String name = "cmulex_lts"; 867 868 try { 869 if (args.length > 0) { 870 BulkTimer timer = new BulkTimer(); 871 timer.start(); 872 for (int i = 0 ; i < args.length; i++) { 873 if (args[i].equals("-src")) { 874 srcPath = args[++i]; 875 } else if (args[i].equals("-dest")) { 876 destPath = args[++i]; 877 } else if (args[i].equals("-name") 878 && i < args.length -1) { 879 name = args[++i]; 880 } else if (args[i].equals("-generate_binary")) { 881 882 System.out.println("Loading " + name); 883 timer.start("load_text"); 884 LetterToSoundImpl text = new LetterToSoundImpl( 885 new URL("file:" + srcPath + "/" 886 + name + ".txt"), 887 false); 888 timer.stop("load_text"); 889 890 System.out.println("Dumping " + name); 891 timer.start("dump_binary"); 892 text.dumpBinary(destPath + "/" + name + ".bin"); 893 timer.stop("dump_binary"); 894 895 } else if (args[i].equals("-compare")) { 896 897 timer.start("load_text"); 898 LetterToSoundImpl text = new LetterToSoundImpl( 899 new URL("file:./" + name + ".txt"), false); 900 timer.stop("load_text"); 901 902 timer.start("load_binary"); 903 LetterToSoundImpl binary = new LetterToSoundImpl( 904 new URL("file:./" + name + ".bin"), true); 905 timer.stop("load_binary"); 906 907 timer.start("compare"); 908 if (!text.compare(binary)) { 909 System.out.println("NOT EQUIVALENT"); 910 } else { 911 System.out.println("ok"); 912 } 913 timer.stop("compare"); 914 } else if (args[i].equals("-showtimes")) { 915 showTimes = true; 916 } else { 917 System.out.println("Unknown option " + args[i]); 918 } 919 } 920 timer.stop(); 921 if (showTimes) { 922 timer.show("LTS loading and dumping"); 923 } 924 } else { 925 System.out.println("Options: "); 926 System.out.println(" -src path"); 927 System.out.println(" -dest path"); 928 System.out.println(" -compare"); 929 System.out.println(" -generate_binary"); 930 System.out.println(" -showTimes"); 931 } 932 } catch (IOException ioe) { 933 System.err.println(ioe); 934 } 935 } 936}