001/**
002 * Portions Copyright 2001 Sun Microsystems, Inc.
003 * Portions Copyright 1999-2001 Language Technologies Institute, 
004 * Carnegie Mellon University.
005 * All Rights Reserved.  Use is subject to license terms.
006 * 
007 * See the file "license.terms" for information on usage and
008 * redistribution of this file, and for a DISCLAIMER OF ALL 
009 * WARRANTIES.
010 */
011package com.sun.speech.freetts.en.us;
012
013import java.io.IOException;
014import java.net.URL;
015import java.util.List;
016
017import com.sun.speech.freetts.VoiceManager;
018import com.sun.speech.freetts.lexicon.LexiconImpl;
019import com.sun.speech.freetts.util.BulkTimer;
020
021/**
022 * Provides a CMU lexicon-specific implementation of a Lexicon that is
023 * stored in a text file.
024 */
025public class CMULexicon extends LexiconImpl {
026    
027    /**
028     * Vowels
029     */
030    static final private String VOWELS = "aeiou";
031
032    /**
033     * Glides/Liquids
034     */
035    static final private String GLIDES_LIQUIDS = "wylr";
036
037    /**
038     * Nasals
039     */
040    static final private String NASALS = "nm";
041
042    /**
043     * Voiced Obstruents
044     */
045    static final private String VOICED_OBSTRUENTS = "bdgjlmnnnrvwyz";
046
047    /**
048     * Creates a CMULexicon based upon the given compiled and addenda
049     * DBs and the given letter to sound rules
050     *
051     * @param compiledURL the compiled database is loaded from here
052     * @param addendaURL the database addenda is loaded from here
053     * @param letterToSoundURL the letter to sound rules are loaded
054     *          from here
055     * @param binary if <code>true</code> the input data are loaded as
056     *          binary ; otherwise if <code>false</code> the input
057     *          data are loaded as text.
058     *
059     */
060    public CMULexicon(URL compiledURL,
061                       URL addendaURL,
062                       URL  letterToSoundURL,
063                       boolean binary) {
064        setLexiconParameters(compiledURL, addendaURL, letterToSoundURL, binary);
065    }
066
067    /**
068     * Creates the default CMU Lexicon which is a binary lexicon
069     */
070    public CMULexicon() {
071        this("cmulex");
072    }
073
074    /**
075     * Creates the CMU Lexicon which is a binary lexicon
076     *
077     * @param basename the basename for the lexicon.
078     */
079    public CMULexicon(String basename) {
080        this(basename, true);
081    }
082
083    public CMULexicon(String basename, boolean useBinaryIO) {
084        java.net.URLClassLoader classLoader =
085                VoiceManager.getVoiceClassLoader();
086        String type = (useBinaryIO ? "bin" : "txt");
087
088        URL letterToSoundURL = classLoader.getResource(
089                "com/sun/speech/freetts/en/us/" + basename + "_lts." + type);
090        URL compiledURL = classLoader.getResource(
091                "com/sun/speech/freetts/en/us/" + basename
092                + "_compiled." + type);
093        URL addendaURL = classLoader.getResource(
094                "com/sun/speech/freetts/en/us/" + basename
095                + "_addenda." + type);
096
097        /* Just another try with possibly a different class loader
098         * if the above didn't work.
099         */
100        if (letterToSoundURL == null) {
101            Class cls = CMULexicon.class;
102            letterToSoundURL = cls.getResource(basename + "_lts." + type);
103            compiledURL = cls.getResource(basename + "_compiled." + type);
104            addendaURL = cls.getResource(basename + "_addenda." + type);
105            if (letterToSoundURL == null) {
106                System.err.println(
107                    "CMULexicon: Oh no!  Couldn't find lexicon data!");
108            }
109        }
110        
111        setLexiconParameters(compiledURL, addendaURL,
112                letterToSoundURL, useBinaryIO);
113    }
114    
115    /**
116     * Get the CMULexicon.
117     *
118     * @param useBinaryIO if true use binary IO to load DB
119     *
120     * @throws IOException if problems occurred while reading the data
121     */ 
122    static public CMULexicon getInstance( boolean useBinaryIO) 
123                                                throws IOException {
124        return getInstance("cmulex", useBinaryIO);
125    }
126
127    /**
128     * Get the CMULexicon.
129     *
130     * @param useBinaryIO if true use binary IO to load DB
131     *
132     * @throws IOException if problems occurred while reading the data
133     */ 
134    static public CMULexicon getInstance(String basename, boolean useBinaryIO) 
135                                                throws IOException {
136        CMULexicon lexicon = new CMULexicon(basename, useBinaryIO);
137        lexicon.load();
138        return lexicon;
139    }
140
141        
142    /**
143     * Determines if the currentPhone represents a new syllable
144     * boundary.
145     *
146     * @param syllablePhones the phones in the current syllable so far
147     * @param wordPhones the phones for the whole word
148     * @param currentWordPhone the word phone in question
149     *
150     * @return <code>true</code> if the word phone in question is on a
151     *     syllable boundary; otherwise <code>false</code>.
152     */
153    public boolean isSyllableBoundary(List syllablePhones,
154                                      String[] wordPhones,
155                                      int currentWordPhone) {
156        if (currentWordPhone >= wordPhones.length) {
157            return true;
158        } else if (isSilence(wordPhones[currentWordPhone])) {
159            return true;
160        } else if (!hasVowel(wordPhones, currentWordPhone)) { // rest of word 
161            return false;
162        } else if (!hasVowel(syllablePhones)) { // current syllable
163            return false;
164        } else if (isVowel(wordPhones[currentWordPhone])) {
165            return true;
166        } else if (currentWordPhone == (wordPhones.length - 1)) {
167            return false;
168        } else {
169            int p, n, nn;
170            p = getSonority(
171                (String) syllablePhones.get(syllablePhones.size() - 1));
172            n = getSonority(wordPhones[currentWordPhone]);
173            nn = getSonority(wordPhones[currentWordPhone + 1]);
174            if ((p <= n) && (n <= nn)) {
175                return true;
176            } else {
177                return false;
178            }
179        }
180    }
181    
182    /**
183     * Determines if the given phone represents a silent phone.
184     *
185     * @param phone the phone to test
186     *
187     * @return <code>true</code> if the phone represents a silent
188     *          phone; otherwise <code>false</code>. 
189     */
190    static protected boolean isSilence(String phone) {
191        return phone.equals("pau");
192    }
193
194    /**
195     * Determines if there is a vowel in the remainder of the array, 
196     * starting at the given index.
197     *
198     * @param phones the set of phones to check
199     * @param index start checking at this index
200     *
201     * @return <code>true</code> if a vowel is found; 
202     *          otherwise <code>false</code>. 
203     */
204    static protected boolean hasVowel(String[] phones, int index) {
205        for (int i = index; i < phones.length; i++) {
206            if (isVowel(phones[i])) {
207                return true;
208            }
209        }
210        return false;
211    }
212    
213    /**
214     * Determines if there is a vowel in given list of phones.
215     *
216     * @param phones the list of phones
217     *
218     * @return <code>true</code> if a vowel is found; 
219     *          otherwise <code>false</code>. 
220     */
221    static protected boolean hasVowel(List phones) {
222        for (int i = 0; i < phones.size(); i++) {
223            if (isVowel((String) phones.get(i))) {
224                return true;
225            }
226        }
227        return false;
228    }
229    
230    /**
231     * Determines if the given phone is a vowel
232     *
233     * @param phone the phone to test
234     *
235     * @return <code>true</code> if phone is a vowel
236     *          otherwise <code>false</code>. 
237     */
238    static protected boolean isVowel(String phone) {
239        return VOWELS.indexOf(phone.substring(0,1)) != -1;
240    }
241
242    /**
243     * Determines the sonority for the given phone.
244     * 
245     * @param phone the phone of interest
246     * 
247     * @return an integer that classifies phone transitions
248     */
249    static protected int getSonority(String phone) {
250        if (isVowel(phone) || isSilence(phone)) {
251            return 5;
252        } else if (GLIDES_LIQUIDS.indexOf(phone.substring(0,1)) != -1) {
253            return 4; 
254        } else if (NASALS.indexOf(phone.substring(0,1)) != -1) {
255            return 3;
256        } else if (VOICED_OBSTRUENTS.indexOf(phone.substring(0,1)) != -1) {
257            return 2;
258        } else {
259            return 1;
260        }
261    }    
262
263    /**
264     * Provides test code for the CMULexicon.
265     * <br><b>Usage:</b><br>
266     * <pre>
267     *  com.sun.speech.freetts.en.us.CMULexicon [options]
268     *
269     * Where options is any combination of:
270     *
271     * -src path
272     * -dest path
273     * -generate_binary [base_name]
274     * -compare
275     * -showtimes
276     *
277     * </pre>
278     */
279    public static void main(String[] args) {
280        LexiconImpl lex, lex2;
281        boolean showTimes = false;
282        String srcPath = ".";
283        String destPath = ".";
284        String baseName = "cmulex";
285
286        try {
287            if (args.length > 0) {
288                BulkTimer.LOAD.start();
289                for (int i = 0 ; i < args.length; i++) {
290                    if (args[i].equals("-src")) {
291                        srcPath = args[++i];
292                    } else if (args[i].equals("-dest")) {
293                        destPath = args[++i];
294                    } else if (args[i].equals("-name")
295                               && i < args.length - 1) {
296                        baseName = args[++i];
297                    } else if (args[i].equals("-generate_binary")) {
298
299                         System.out.println("Loading " + baseName);
300                         String path = "file:" + srcPath + "/" + baseName;
301                         lex = new CMULexicon(
302                             new URL(path + "_compiled.txt"),
303                             new URL(path + "_addenda.txt"),
304                             new URL(path + "_lts.txt"),
305                             false);
306                         BulkTimer.LOAD.start("load_text");
307                         lex.load();
308                         BulkTimer.LOAD.stop("load_text");
309
310                         System.out.println("Dumping " + baseName);
311                         BulkTimer.LOAD.start("dump_text");
312                         lex.dumpBinary(destPath + "/" + baseName);
313                         BulkTimer.LOAD.stop("dump_text");
314
315                    } else if (args[i].equals("-compare")) {
316
317                        BulkTimer.LOAD.start("load_text");
318                        lex = CMULexicon.getInstance(baseName, false);
319                        BulkTimer.LOAD.stop("load_text");
320
321                        BulkTimer.LOAD.start("load_binary");
322                        lex2 = CMULexicon.getInstance(baseName, true);
323                        BulkTimer.LOAD.stop("load_binary");
324
325                        BulkTimer.LOAD.start("compare");
326                        lex.compare(lex2);
327                        BulkTimer.LOAD.stop("compare");
328                    } else if (args[i].equals("-showtimes")) {
329                        showTimes = true;
330                    } else {
331                        System.out.println("Unknown option " + args[i]);
332                    }
333                }
334                BulkTimer.LOAD.stop();
335                if (showTimes) {
336                    BulkTimer.LOAD.show("CMULexicon loading and dumping");
337                }
338            } else {
339                System.out.println("Options: ");
340                System.out.println("    -src path");
341                System.out.println("    -dest path");
342                System.out.println("    -compare");
343                System.out.println("    -generate_binary");
344                System.out.println("    -showtimes");
345            }
346        } catch (IOException ioe) {
347            System.err.println(ioe);
348        }
349    }
350}