001/** 002 * Portions Copyright 2001 Sun Microsystems, Inc. 003 * Portions Copyright 1999-2001 Language Technologies Institute, 004 * Carnegie Mellon University. 005 * All Rights Reserved. Use is subject to license terms. 006 * 007 * See the file "license.terms" for information on usage and 008 * redistribution of this file, and for a DISCLAIMER OF ALL 009 * WARRANTIES. 010 */ 011package com.sun.speech.freetts.en.us; 012 013import java.io.BufferedReader; 014import java.io.IOException; 015import java.io.InputStream; 016import java.io.InputStreamReader; 017import java.net.URL; 018import java.util.StringTokenizer; 019 020/** 021 * Implements a finite state machine that checks if a given string 022 * is pronounceable. If it is pronounceable, the method 023 * <code>accept()</code> will return true. 024 */ 025public class PronounceableFSM { 026 027 private static final String VOCAB_SIZE = "VOCAB_SIZE"; 028 private static final String NUM_OF_TRANSITIONS = "NUM_OF_TRANSITIONS"; 029 private static final String TRANSITIONS = "TRANSITIONS"; 030 031 032 /** 033 * The vocabulary size. 034 */ 035 protected int vocabularySize; 036 037 038 /** 039 * The transitions of this FSM 040 */ 041 protected int[] transitions; 042 043 044 /** 045 * Whether we should scan the input string from the front. 046 */ 047 protected boolean scanFromFront; 048 049 050 /** 051 * Constructs a PronounceableFSM with information in the given URL. 052 * 053 * @param url the URL that contains the FSM specification 054 * @param scanFromFront indicates whether this FSM should scan the input 055 * string from the front, or from the back 056 */ 057 public PronounceableFSM(URL url, boolean scanFromFront) throws IOException { 058 this.scanFromFront = scanFromFront; 059 InputStream is = url.openStream(); 060 loadText(is); 061 is.close(); 062 } 063 064 065 /** 066 * Constructs a PronounceableFSM with the given attributes. 067 * 068 * @param vocabularySize the vocabulary size of the FSM 069 * @param transitions the transitions of the FSM 070 * @param scanFromFront indicates whether this FSM should scan the input 071 * string from the front, or from the back 072 */ 073 public PronounceableFSM(int vocabularySize, int[] transitions, 074 boolean scanFromFront) { 075 this.vocabularySize = vocabularySize; 076 this.transitions = transitions; 077 this.scanFromFront = scanFromFront; 078 } 079 080 081 /** 082 * Loads the ASCII specification of this FSM from the given InputStream. 083 * 084 * @param is the input stream to load from 085 * 086 * @throws IOException if an error occurs on input. 087 */ 088 private void loadText(InputStream is) throws IOException { 089 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 090 String line = null; 091 while ((line = reader.readLine()) != null) { 092 if (!line.startsWith("***")) { 093 if (line.startsWith(VOCAB_SIZE)) { 094 vocabularySize = parseLastInt(line); 095 } else if (line.startsWith(NUM_OF_TRANSITIONS)) { 096 int transitionsSize = parseLastInt(line); 097 transitions = new int[transitionsSize]; 098 } else if (line.startsWith(TRANSITIONS)) { 099 StringTokenizer st = new StringTokenizer(line); 100 String transition = st.nextToken(); 101 int i = 0; 102 while (st.hasMoreTokens() && i < transitions.length) { 103 transition = st.nextToken().trim(); 104 transitions[i++] = Integer.parseInt(transition); 105 } 106 } 107 } 108 } 109 reader.close(); 110 } 111 112 113 /** 114 * Returns the integer value of the last integer in the given string. 115 * 116 * @param line the line to parse the integer from 117 * 118 * @return an integer 119 */ 120 private int parseLastInt(String line) { 121 String lastInt = line.trim().substring(line.lastIndexOf(" ")); 122 return Integer.parseInt(lastInt.trim()); 123 } 124 125 126 /** 127 * Causes this FSM to transition to the next state given 128 * the current state and input symbol. 129 * 130 * @param state the current state 131 * @param symbol the input symbol 132 */ 133 private int transition(int state, int symbol) { 134 for (int i = state; i < transitions.length; i++) { 135 if ((transitions[i] % vocabularySize) == symbol) { 136 return (transitions[i] / vocabularySize); 137 } 138 } 139 return -1; 140 } 141 142 143 /** 144 * Checks to see if this finite state machine accepts the given 145 * input string. 146 * 147 * @param inputString the input string to be tested 148 * 149 * @return true if this FSM accepts, false if it rejects 150 */ 151 public boolean accept(String inputString) { 152 int symbol; 153 int state = transition(0, '#'); 154 int leftEnd = inputString.length() - 1; 155 int start = (scanFromFront) ? 0 : leftEnd; 156 157 for (int i = start; 0 <= i && i <= leftEnd; ) { 158 char c = inputString.charAt(i); 159 if (c == 'n' || c == 'm') { 160 symbol = 'N'; 161 } else if ("aeiouy".indexOf(c) != -1) { 162 symbol = 'V'; 163 } else { 164 symbol = c; 165 } 166 state = transition(state, symbol); 167 if (state == -1) { 168 return false; 169 } else if (symbol == 'V') { 170 return true; 171 } 172 if (scanFromFront) { 173 i++; 174 } else { 175 i--; 176 } 177 } 178 return false; 179 } 180} 181 182 183 184 185 186