001/**
002 * Portions Copyright 2001-2005 Sun Microsystems, Inc.
003 * Portions Copyright 1999-2001 Language Technologies Institute, 
004 * Carnegie Mellon University.
005 * All Rights Reserved.  Use is subject to license terms.
006 * 
007 * See the file "license.terms" for information on usage and
008 * redistribution of this file, and for a DISCLAIMER OF ALL 
009 * WARRANTIES.
010 */
011package com.sun.speech.freetts;
012
013import java.io.BufferedReader;
014import java.io.FileInputStream;
015import java.io.FileReader;
016import java.io.IOException;
017import java.io.InputStream;
018import java.io.InputStreamReader;
019import java.net.URL;
020import java.util.logging.ConsoleHandler;
021import java.util.logging.Handler;
022import java.util.logging.Level;
023import java.util.logging.Logger;
024
025import javax.sound.sampled.AudioFileFormat;
026import javax.sound.sampled.AudioSystem;
027
028import com.sun.speech.freetts.audio.AudioPlayer;
029import com.sun.speech.freetts.audio.JavaClipAudioPlayer;
030import com.sun.speech.freetts.audio.MultiFileAudioPlayer;
031import com.sun.speech.freetts.audio.NullAudioPlayer;
032import com.sun.speech.freetts.audio.RawFileAudioPlayer;
033import com.sun.speech.freetts.audio.SingleFileAudioPlayer;
034
035/**
036 * Standalone utility that directly interacts with a CMUDiphoneVoice.
037 */
038public class FreeTTS {
039    /** Logger instance. */
040    private static final Logger LOGGER =
041        Logger.getLogger(FreeTTS.class.getName());
042
043    /** Version number. */
044    public final static String VERSION = "FreeTTS 1.2.2";
045    private Voice voice;
046    private static AudioPlayer audioPlayer = null;
047    private boolean silent = false;
048    private String audioFile = null;
049    private boolean multiAudio = false;
050    private boolean streamingAudio = false;
051    private InputMode inputMode = InputMode.INTERACTIVE;
052
053    /**
054     * Constructs a default FreeTTS with the kevin16 voice.
055     */
056    public FreeTTS() {
057        VoiceManager voiceManager = VoiceManager.getInstance();
058        voiceManager.getVoice("kevin16");
059    }
060
061    /**
062     * Creates a FreeTTS object with the given Voice.
063     * 
064     * @param voice
065     *            the voice to use
066     */
067    public FreeTTS(Voice voice) {
068        this.voice = voice;
069    }
070
071    /**
072     * Starts this FreeTTS Synthesizer by loading the void and creating a new
073     * AudioPlayer.
074     */
075    public void startup() {
076        voice.allocate();
077        if (!getSilentMode()) {
078            if (audioFile != null) {
079                AudioFileFormat.Type type = getAudioType(audioFile);
080                if (type != null) {
081                    if (multiAudio) {
082                        audioPlayer = new MultiFileAudioPlayer(
083                                getBasename(audioFile), type);
084                    } else
085                        audioPlayer = new SingleFileAudioPlayer(
086                                getBasename(audioFile), type);
087                } else {
088                    try {
089                        audioPlayer = new RawFileAudioPlayer(audioFile);
090                    } catch (IOException ioe) {
091                        System.out.println("Can't open " + audioFile + " "
092                                + ioe);
093                    }
094                }
095            } else if (!streamingAudio) {
096                audioPlayer = new JavaClipAudioPlayer();
097            } else {
098                try {
099                    audioPlayer = voice.getDefaultAudioPlayer();
100                } catch (InstantiationException e) {
101                    e.printStackTrace();
102                }
103            }
104        }
105
106        if (audioPlayer == null) {
107            audioPlayer = new NullAudioPlayer();
108        }
109
110        voice.setAudioPlayer(audioPlayer);
111    }
112
113    /**
114     * Returns the audio type based upon the extension of the given file
115     * 
116     * @param file
117     *            the file of interest
118     * 
119     * @return the audio type of the file or null if it is a non-supported type
120     */
121    private AudioFileFormat.Type getAudioType(String file) {
122        AudioFileFormat.Type[] types = AudioSystem.getAudioFileTypes();
123        String extension = getExtension(file);
124
125        for (int i = 0; i < types.length; i++) {
126            if (types[i].getExtension().equals(extension)) {
127                return types[i];
128            }
129        }
130        return null;
131    }
132
133    /**
134     * Given a filename returns the extension for the file
135     * 
136     * @param path
137     *            the path to extract the extension from
138     * 
139     * @return the extension or <code>null</code> if none
140     */
141    private static String getExtension(String path) {
142        int index = path.lastIndexOf(".");
143        if (index == -1) {
144            return null;
145        } else {
146            return path.substring(index + 1);
147        }
148    }
149
150    /**
151     * Given a filename returns the basename for the file
152     * 
153     * @param path
154     *            the path to extract the basename from
155     * 
156     * @return the basename of the file
157     */
158    private static String getBasename(String path) {
159        int index = path.lastIndexOf(".");
160        if (index == -1) {
161            return path;
162        } else {
163            return path.substring(0, index);
164        }
165    }
166
167    /**
168     * Shuts down this FreeTTS synthesizer by closing the AudioPlayer and voice.
169     */
170    public void shutdown() {
171        try {
172            audioPlayer.close();
173        } catch (IOException e) {
174            LOGGER.warning("error closing the audio player: " + e.getMessage());
175        }
176        voice.deallocate();
177    }
178
179    /**
180     * Converts the given text to speech based using processing options
181     * currently set in FreeTTS.
182     * 
183     * @param text
184     *            the text to speak
185     * 
186     * @return true if the utterance was played properly
187     */
188    public boolean textToSpeech(String text) {
189        return voice.speak(text);
190    }
191
192    /**
193     * Converts the given text to speech based using processing options
194     * currently set in FreeTTS.
195     * 
196     * @param text
197     *            the text to speak
198     * 
199     * @return true if the utterance was played properly
200     */
201    private boolean batchTextToSpeech(String text) {
202        boolean ok;
203        voice.startBatch();
204        ok = textToSpeech(text);
205        voice.endBatch();
206        return ok;
207    }
208
209    /**
210     * Reads the file pointed to by the given path and renders each line as
211     * speech individually.
212     */
213    private boolean lineToSpeech(String path) {
214        boolean ok = true;
215        voice.startBatch();
216        try {
217            BufferedReader reader = new BufferedReader(new FileReader(path));
218            String line;
219
220            while ((line = reader.readLine()) != null && ok) {
221                ok = textToSpeech(line);
222            }
223            reader.close();
224        } catch (IOException ioe) {
225            LOGGER.severe("can't read " + path);
226            throw new Error(ioe);
227        }
228        voice.endBatch();
229
230        return ok;
231
232    }
233
234    /**
235     * Returns the voice used by FreeTTS.
236     * 
237     * @return the voice used by freetts
238     */
239    protected Voice getVoice() {
240        return voice;
241    }
242
243    /**
244     * Converts the text contained in the given stream to speech.
245     * 
246     * @param is
247     *            the stream containing the text to speak
248     */
249    public boolean streamToSpeech(InputStream is) {
250        boolean ok;
251        voice.startBatch();
252        ok = voice.speak(is);
253        voice.endBatch();
254        return ok;
255    }
256
257    /**
258     * Converts the text contained in the given path to speech.
259     * 
260     * @param urlPath
261     *            the file containing the text to speak
262     * 
263     * @return true if the utterance was played properly
264     */
265    public boolean urlToSpeech(String urlPath) {
266        boolean ok = false;
267        try {
268            URL url = new URL(urlPath);
269            InputStream is = url.openStream();
270            ok = streamToSpeech(is);
271        } catch (IOException ioe) {
272            System.err.println("Can't read data from " + urlPath);
273        }
274        return ok;
275    }
276
277    /**
278     * Converts the text contained in the given path to speech.
279     * 
280     * @param filePath
281     *            the file containing the text to speak
282     * 
283     * @return true if the utterance was played properly
284     */
285    public boolean fileToSpeech(String filePath) {
286        boolean ok = false;
287        try {
288            InputStream is = new FileInputStream(filePath);
289            ok = streamToSpeech(is);
290        } catch (IOException ioe) {
291            System.err.println("Can't read data from " + filePath);
292        }
293        return ok;
294    }
295
296    /**
297     * Turns audio playing on and off.
298     * 
299     * @param silent
300     *            if true, don't play audio
301     */
302    public void setSilentMode(boolean silent) {
303        this.silent = silent;
304    }
305
306    /**
307     * Gets silent mode.
308     * 
309     * @return true if in silent mode
310     * 
311     * @see #setSilentMode
312     */
313    public boolean getSilentMode() {
314        return this.silent;
315    }
316
317    /**
318     * Sets the input mode.
319     * 
320     * @param inputMode
321     *            the input mode
322     */
323    public void setInputMode(InputMode inputMode) {
324        this.inputMode = inputMode;
325    }
326
327    /**
328     * Returns the InputMode.
329     * 
330     * @return the input mode
331     * 
332     * @see #setInputMode
333     */
334    public InputMode getInputMode() {
335        return this.inputMode;
336    }
337
338    /**
339     * Sets the audio file .
340     * 
341     * @param audioFile
342     *            the audioFile
343     */
344    public void setAudioFile(String audioFile) {
345        this.audioFile = audioFile;
346    }
347
348    /**
349     * Sets multi audio. If true, and an audio file has been set output will be
350     * sent to multiple files
351     * 
352     * @param multiAudio
353     *            if <code>true</code> send output to multiple files.
354     */
355    public void setMultiAudio(boolean multiAudio) {
356        this.multiAudio = multiAudio;
357    }
358
359    /**
360     * Sets streaming audio. If true, output will be sent to
361     * 
362     * @param streamingAudio
363     *            if <code>true</code> stream audio
364     */
365    public void setStreamingAudio(boolean streamingAudio) {
366        this.streamingAudio = streamingAudio;
367    }
368
369    /**
370     * Prints the usage message for FreeTTS.
371     */
372    static void usage(String voices) {
373        System.out.println(VERSION);
374        System.out.println("Usage:");
375        System.out.println("    -detailedMetrics: turn on detailed metrics");
376        System.out.println("    -dumpAudio file : dump audio to file ");
377        System.out.println("    -dumpAudioTypes : dump the possible"
378                + " output types");
379        System.out.println("    -dumpMultiAudio file : dump audio to file ");
380        System.out.println("    -dumpRelations  : dump the relations ");
381        System.out.println("    -dumpUtterance  : dump the final utterance");
382        System.out
383                .println("    -dumpASCII file : dump the final wave to file as ASCII");
384        System.out.println("    -file file      : speak text from given file");
385        System.out.println("    -lines file     : render lines from a file");
386        System.out.println("    -help           : shows usage information");
387        System.out.println("    -voiceInfo      : print detailed voice info");
388        System.out.println("    -metrics        : turn on metrics");
389        System.out.println("    -run  name      : sets the name of the run");
390        System.out.println("    -silent         : don't say anything");
391        System.out.println("    -streaming      : use streaming audio player");
392        System.out.println("    -text say me    : speak given text");
393        System.out.println("    -url path       : speak text from given URL");
394        System.out.println("    -verbose        : verbose output");
395        System.out.println("    -version        : shows version number");
396        System.out.println("    -voice VOICE    : " + voices);
397    }
398
399    /**
400     * Starts interactive mode on the given FreeTTS. Reads text from the console
401     * and gives it to FreeTTS to speak. terminates on end of file.
402     * 
403     * @param freetts
404     *            the engine
405     */
406    private static void interactiveMode(FreeTTS freetts) {
407        try {
408            while (true) {
409                String text;
410                BufferedReader reader;
411                reader = new BufferedReader(new InputStreamReader(System.in));
412                System.out.print("Enter text: ");
413                System.out.flush();
414                text = reader.readLine();
415                if ((text == null) || (text.length() == 0)) {
416                    freetts.shutdown();
417                    System.exit(0);
418                } else {
419                    freetts.batchTextToSpeech(text);
420                }
421            }
422        } catch (IOException e) {
423        }
424    }
425
426    /**
427     * Dumps the possible audio output file types
428     */
429    private static void dumpAudioTypes() {
430        AudioFileFormat.Type[] types = AudioSystem.getAudioFileTypes();
431
432        for (int i = 0; i < types.length; i++) {
433            System.out.println(types[i].getExtension());
434        }
435    }
436
437    /**
438     * The main entry point for FreeTTS.
439     */
440    public static void main(String[] args) {
441
442        String text = null;
443        String inFile = null;
444        boolean dumpAudioTypes = false;
445        Voice voice = null;
446
447        VoiceManager voiceManager = VoiceManager.getInstance();
448        String voices = voiceManager.toString();
449
450        // find out what Voice to use first
451        for (int i = 0; i < args.length; i++) {
452            if (args[i].equals("-voice")) {
453                if (++i < args.length) {
454                    String voiceName = args[i];
455                    if (voiceManager.contains(voiceName)) {
456                        voice = voiceManager.getVoice(voiceName);
457                    } else {
458                        System.out.println("Invalid voice: " + voiceName);
459                        System.out.println("  Valid voices are " + voices);
460                        System.exit(1);
461                    }
462                } else {
463                    usage(voices);
464                    System.exit(1);
465                }
466                break;
467            }
468        }
469
470        if (voice == null) { // default Voice is kevin16
471            voice = voiceManager.getVoice("kevin16");
472        }
473
474        if (voice == null) {
475            throw new Error("The specified voice is not defined");
476        }
477        FreeTTS freetts = new FreeTTS(voice);
478
479        for (int i = 0; i < args.length; i++) {
480            if (args[i].equals("-metrics")) {
481                voice.setMetrics(true);
482            } else if (args[i].equals("-detailedMetrics")) {
483                voice.setDetailedMetrics(true);
484            } else if (args[i].equals("-silent")) {
485                freetts.setSilentMode(true);
486            } else if (args[i].equals("-streaming")) {
487                freetts.setStreamingAudio(true);
488            } else if (args[i].equals("-verbose")) {
489                Handler handler = new ConsoleHandler();
490                handler.setLevel(Level.ALL);
491                Logger.getLogger("com.sun").addHandler(handler);
492                Logger.getLogger("com.sun").setLevel(Level.ALL);
493            } else if (args[i].equals("-dumpUtterance")) {
494                voice.setDumpUtterance(true);
495            } else if (args[i].equals("-dumpAudioTypes")) {
496                dumpAudioTypes = true;
497            } else if (args[i].equals("-dumpRelations")) {
498                voice.setDumpRelations(true);
499            } else if (args[i].equals("-dumpASCII")) {
500                if (++i < args.length) {
501                    voice.setWaveDumpFile(args[i]);
502                } else {
503                    usage(voices);
504                }
505            } else if (args[i].equals("-dumpAudio")) {
506                if (++i < args.length) {
507                    freetts.setAudioFile(args[i]);
508                } else {
509                    usage(voices);
510                }
511            } else if (args[i].equals("-dumpMultiAudio")) {
512                if (++i < args.length) {
513                    freetts.setAudioFile(args[i]);
514                    freetts.setMultiAudio(true);
515                } else {
516                    usage(voices);
517                }
518            } else if (args[i].equals("-version")) {
519                System.out.println(VERSION);
520            } else if (args[i].equals("-voice")) {
521                // do nothing here, just skip the voice name
522                i++;
523            } else if (args[i].equals("-help")) {
524                usage(voices);
525                System.exit(0);
526            } else if (args[i].equals("-voiceInfo")) {
527                System.out.println(VoiceManager.getInstance().getVoiceInfo());
528                System.exit(0);
529            } else if (args[i].equals("-text")) {
530                freetts.setInputMode(InputMode.TEXT);
531                // add the rest of the args as text
532                StringBuffer sb = new StringBuffer();
533                for (int j = i + 1; j < args.length; j++) {
534                    sb.append(args[j]);
535                    sb.append(" ");
536                }
537                text = sb.toString();
538                break;
539            } else if (args[i].equals("-file")) {
540                if (++i < args.length) {
541                    inFile = args[i];
542                    freetts.setInputMode(InputMode.FILE);
543                } else {
544                    usage(voices);
545                }
546            } else if (args[i].equals("-lines")) {
547                if (++i < args.length) {
548                    inFile = args[i];
549                    freetts.setInputMode(InputMode.LINES);
550                } else {
551                    usage(voices);
552                }
553            } else if (args[i].equals("-url")) {
554                if (++i < args.length) {
555                    inFile = args[i];
556                    freetts.setInputMode(InputMode.URL);
557                } else {
558                    usage(voices);
559                }
560            } else if (args[i].equals("-run")) {
561                if (++i < args.length) {
562                    voice.setRunTitle(args[i]);
563                } else {
564                    usage(voices);
565                }
566            } else {
567                System.out.println("Unknown option:" + args[i]);
568            }
569        }
570
571        if (dumpAudioTypes) {
572            dumpAudioTypes();
573        }
574
575        freetts.startup();
576
577        if (freetts.getInputMode() == InputMode.TEXT) {
578            freetts.batchTextToSpeech(text);
579        } else if (freetts.getInputMode() == InputMode.FILE) {
580            freetts.fileToSpeech(inFile);
581        } else if (freetts.getInputMode() == InputMode.URL) {
582            freetts.urlToSpeech(inFile);
583        } else if (freetts.getInputMode() == InputMode.LINES) {
584            freetts.lineToSpeech(inFile);
585        } else {
586            interactiveMode(freetts);
587        }
588
589        if (freetts.getVoice().isMetrics() && !freetts.getSilentMode()) {
590            // [[[ TODO: get first byte timer times back in ]]]
591            // freetts.getFirstByteTimer().showTimes();
592            // freetts.getFirstSoundTimer().showTimes();
593        }
594
595        freetts.shutdown();
596        System.exit(0);
597    }
598}