001/**
002 * Portions Copyright 2001 Sun Microsystems, Inc.
003 * Portions Copyright 1999-2001 Language Technologies Institute, 
004 * Carnegie Mellon University.
005 * All Rights Reserved.  Use is subject to license terms.
006 * 
007 * See the file "license.terms" for information on usage and
008 * redistribution of this file, and for a DISCLAIMER OF ALL 
009 * WARRANTIES.
010 */
011package com.sun.speech.freetts.en.us;
012
013import java.io.BufferedReader;
014import java.io.IOException;
015import java.io.InputStream;
016import java.io.InputStreamReader;
017import java.net.URL;
018import java.util.StringTokenizer;
019
020/**
021 * Implements a finite state machine that checks if a given string
022 * is pronounceable. If it is pronounceable, the method
023 * <code>accept()</code> will return true.
024 */
025public class PronounceableFSM {
026
027    private static final String VOCAB_SIZE = "VOCAB_SIZE";
028    private static final String NUM_OF_TRANSITIONS = "NUM_OF_TRANSITIONS";
029    private static final String TRANSITIONS = "TRANSITIONS";
030
031
032    /**
033     * The vocabulary size.
034     */
035    protected int vocabularySize;
036
037
038    /**
039     * The transitions of this FSM
040     */
041    protected int[] transitions;
042
043
044    /**
045     * Whether we should scan the input string from the front.
046     */
047    protected boolean scanFromFront;
048
049
050    /**
051     * Constructs a PronounceableFSM with information in the given URL.
052     *
053     * @param url the URL that contains the FSM specification
054     * @param scanFromFront indicates whether this FSM should scan the input
055     * string from the front, or from the back
056     */
057    public PronounceableFSM(URL url, boolean scanFromFront) throws IOException {
058        this.scanFromFront = scanFromFront;
059        InputStream is = url.openStream();
060        loadText(is);
061        is.close();
062    }
063
064
065    /**
066     * Constructs a PronounceableFSM with the given attributes.
067     *
068     * @param vocabularySize the vocabulary size of the FSM
069     * @param transitions the transitions of the FSM
070     * @param scanFromFront indicates whether this FSM should scan the input
071     * string from the front, or from the back
072     */
073    public PronounceableFSM(int vocabularySize, int[] transitions,
074                            boolean scanFromFront) {
075        this.vocabularySize = vocabularySize;
076        this.transitions = transitions;
077        this.scanFromFront = scanFromFront;
078    }
079
080
081    /**
082     * Loads the ASCII specification of this FSM from the given InputStream.
083     *
084     * @param is the input stream to load from
085     *
086     * @throws IOException if an error occurs on input.
087     */
088    private void loadText(InputStream is) throws IOException {
089        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
090        String line = null;
091        while ((line = reader.readLine()) != null) {
092            if (!line.startsWith("***")) {
093                if (line.startsWith(VOCAB_SIZE)) {
094                    vocabularySize = parseLastInt(line);
095                } else if (line.startsWith(NUM_OF_TRANSITIONS)) {
096                    int transitionsSize = parseLastInt(line);
097                    transitions = new int[transitionsSize];
098                } else if (line.startsWith(TRANSITIONS)) {
099                    StringTokenizer st = new StringTokenizer(line);
100                    String transition = st.nextToken();
101                    int i = 0;
102                    while (st.hasMoreTokens() && i < transitions.length) {
103                        transition = st.nextToken().trim();
104                        transitions[i++] = Integer.parseInt(transition);
105                    }
106                }
107            }
108        }
109        reader.close();
110    }
111
112
113    /**
114     * Returns the integer value of the last integer in the given string.
115     *
116     * @param line the line to parse the integer from
117     *
118     * @return an integer
119     */
120    private int parseLastInt(String line) {
121        String lastInt = line.trim().substring(line.lastIndexOf(" "));
122        return Integer.parseInt(lastInt.trim());
123    }
124
125
126    /**
127     * Causes this FSM to transition to the next state given
128     * the current state and input symbol.
129     *
130     * @param state the current state
131     * @param symbol the input symbol
132     */
133    private int transition(int state, int symbol) {
134        for (int i = state; i < transitions.length; i++) {
135            if ((transitions[i] % vocabularySize) == symbol) {
136                return (transitions[i] / vocabularySize);
137            }
138        }
139        return -1;
140    }
141
142
143    /**
144     * Checks to see if this finite state machine accepts the given
145     * input string.
146     *
147     * @param inputString the input string to be tested
148     *
149     * @return true if this FSM accepts, false if it rejects
150     */
151    public boolean accept(String inputString) {
152        int symbol;
153        int state = transition(0, '#');
154        int leftEnd = inputString.length() - 1;
155        int start = (scanFromFront) ? 0 : leftEnd;
156        
157        for (int i = start; 0 <= i && i <= leftEnd; ) {
158            char c = inputString.charAt(i);
159            if (c == 'n' || c == 'm') {
160                symbol = 'N';
161            } else if ("aeiouy".indexOf(c) != -1) {
162                symbol = 'V';
163            } else {
164                symbol = c;
165            }
166            state = transition(state, symbol);
167            if (state == -1) {
168                return false;
169            } else if (symbol == 'V') {
170                return true;
171            }
172            if (scanFromFront) {
173                i++;
174            } else {
175                i--;
176            }
177        }
178        return false;
179    }
180}
181
182
183
184
185
186