001/** 002 * Portions Copyright 2001-2005 Sun Microsystems, Inc. 003 * Portions Copyright 1999-2001 Language Technologies Institute, 004 * Carnegie Mellon University. 005 * All Rights Reserved. Use is subject to license terms. 006 * 007 * See the file "license.terms" for information on usage and 008 * redistribution of this file, and for a DISCLAIMER OF ALL 009 * WARRANTIES. 010 */ 011package com.sun.speech.freetts; 012 013import java.io.BufferedReader; 014import java.io.FileInputStream; 015import java.io.FileReader; 016import java.io.IOException; 017import java.io.InputStream; 018import java.io.InputStreamReader; 019import java.net.URL; 020import java.util.logging.ConsoleHandler; 021import java.util.logging.Handler; 022import java.util.logging.Level; 023import java.util.logging.Logger; 024 025import javax.sound.sampled.AudioFileFormat; 026import javax.sound.sampled.AudioSystem; 027 028import com.sun.speech.freetts.audio.AudioPlayer; 029import com.sun.speech.freetts.audio.JavaClipAudioPlayer; 030import com.sun.speech.freetts.audio.MultiFileAudioPlayer; 031import com.sun.speech.freetts.audio.NullAudioPlayer; 032import com.sun.speech.freetts.audio.RawFileAudioPlayer; 033import com.sun.speech.freetts.audio.SingleFileAudioPlayer; 034 035/** 036 * Standalone utility that directly interacts with a CMUDiphoneVoice. 037 */ 038public class FreeTTS { 039 /** Logger instance. */ 040 private static final Logger LOGGER = 041 Logger.getLogger(FreeTTS.class.getName()); 042 043 /** Version number. */ 044 public final static String VERSION = "FreeTTS 1.2.2"; 045 private Voice voice; 046 private static AudioPlayer audioPlayer = null; 047 private boolean silent = false; 048 private String audioFile = null; 049 private boolean multiAudio = false; 050 private boolean streamingAudio = false; 051 private InputMode inputMode = InputMode.INTERACTIVE; 052 053 /** 054 * Constructs a default FreeTTS with the kevin16 voice. 055 */ 056 public FreeTTS() { 057 VoiceManager voiceManager = VoiceManager.getInstance(); 058 voiceManager.getVoice("kevin16"); 059 } 060 061 /** 062 * Creates a FreeTTS object with the given Voice. 063 * 064 * @param voice 065 * the voice to use 066 */ 067 public FreeTTS(Voice voice) { 068 this.voice = voice; 069 } 070 071 /** 072 * Starts this FreeTTS Synthesizer by loading the void and creating a new 073 * AudioPlayer. 074 */ 075 public void startup() { 076 voice.allocate(); 077 if (!getSilentMode()) { 078 if (audioFile != null) { 079 AudioFileFormat.Type type = getAudioType(audioFile); 080 if (type != null) { 081 if (multiAudio) { 082 audioPlayer = new MultiFileAudioPlayer( 083 getBasename(audioFile), type); 084 } else 085 audioPlayer = new SingleFileAudioPlayer( 086 getBasename(audioFile), type); 087 } else { 088 try { 089 audioPlayer = new RawFileAudioPlayer(audioFile); 090 } catch (IOException ioe) { 091 System.out.println("Can't open " + audioFile + " " 092 + ioe); 093 } 094 } 095 } else if (!streamingAudio) { 096 audioPlayer = new JavaClipAudioPlayer(); 097 } else { 098 try { 099 audioPlayer = voice.getDefaultAudioPlayer(); 100 } catch (InstantiationException e) { 101 e.printStackTrace(); 102 } 103 } 104 } 105 106 if (audioPlayer == null) { 107 audioPlayer = new NullAudioPlayer(); 108 } 109 110 voice.setAudioPlayer(audioPlayer); 111 } 112 113 /** 114 * Returns the audio type based upon the extension of the given file 115 * 116 * @param file 117 * the file of interest 118 * 119 * @return the audio type of the file or null if it is a non-supported type 120 */ 121 private AudioFileFormat.Type getAudioType(String file) { 122 AudioFileFormat.Type[] types = AudioSystem.getAudioFileTypes(); 123 String extension = getExtension(file); 124 125 for (int i = 0; i < types.length; i++) { 126 if (types[i].getExtension().equals(extension)) { 127 return types[i]; 128 } 129 } 130 return null; 131 } 132 133 /** 134 * Given a filename returns the extension for the file 135 * 136 * @param path 137 * the path to extract the extension from 138 * 139 * @return the extension or <code>null</code> if none 140 */ 141 private static String getExtension(String path) { 142 int index = path.lastIndexOf("."); 143 if (index == -1) { 144 return null; 145 } else { 146 return path.substring(index + 1); 147 } 148 } 149 150 /** 151 * Given a filename returns the basename for the file 152 * 153 * @param path 154 * the path to extract the basename from 155 * 156 * @return the basename of the file 157 */ 158 private static String getBasename(String path) { 159 int index = path.lastIndexOf("."); 160 if (index == -1) { 161 return path; 162 } else { 163 return path.substring(0, index); 164 } 165 } 166 167 /** 168 * Shuts down this FreeTTS synthesizer by closing the AudioPlayer and voice. 169 */ 170 public void shutdown() { 171 try { 172 audioPlayer.close(); 173 } catch (IOException e) { 174 LOGGER.warning("error closing the audio player: " + e.getMessage()); 175 } 176 voice.deallocate(); 177 } 178 179 /** 180 * Converts the given text to speech based using processing options 181 * currently set in FreeTTS. 182 * 183 * @param text 184 * the text to speak 185 * 186 * @return true if the utterance was played properly 187 */ 188 public boolean textToSpeech(String text) { 189 return voice.speak(text); 190 } 191 192 /** 193 * Converts the given text to speech based using processing options 194 * currently set in FreeTTS. 195 * 196 * @param text 197 * the text to speak 198 * 199 * @return true if the utterance was played properly 200 */ 201 private boolean batchTextToSpeech(String text) { 202 boolean ok; 203 voice.startBatch(); 204 ok = textToSpeech(text); 205 voice.endBatch(); 206 return ok; 207 } 208 209 /** 210 * Reads the file pointed to by the given path and renders each line as 211 * speech individually. 212 */ 213 private boolean lineToSpeech(String path) { 214 boolean ok = true; 215 voice.startBatch(); 216 try { 217 BufferedReader reader = new BufferedReader(new FileReader(path)); 218 String line; 219 220 while ((line = reader.readLine()) != null && ok) { 221 ok = textToSpeech(line); 222 } 223 reader.close(); 224 } catch (IOException ioe) { 225 LOGGER.severe("can't read " + path); 226 throw new Error(ioe); 227 } 228 voice.endBatch(); 229 230 return ok; 231 232 } 233 234 /** 235 * Returns the voice used by FreeTTS. 236 * 237 * @return the voice used by freetts 238 */ 239 protected Voice getVoice() { 240 return voice; 241 } 242 243 /** 244 * Converts the text contained in the given stream to speech. 245 * 246 * @param is 247 * the stream containing the text to speak 248 */ 249 public boolean streamToSpeech(InputStream is) { 250 boolean ok; 251 voice.startBatch(); 252 ok = voice.speak(is); 253 voice.endBatch(); 254 return ok; 255 } 256 257 /** 258 * Converts the text contained in the given path to speech. 259 * 260 * @param urlPath 261 * the file containing the text to speak 262 * 263 * @return true if the utterance was played properly 264 */ 265 public boolean urlToSpeech(String urlPath) { 266 boolean ok = false; 267 try { 268 URL url = new URL(urlPath); 269 InputStream is = url.openStream(); 270 ok = streamToSpeech(is); 271 } catch (IOException ioe) { 272 System.err.println("Can't read data from " + urlPath); 273 } 274 return ok; 275 } 276 277 /** 278 * Converts the text contained in the given path to speech. 279 * 280 * @param filePath 281 * the file containing the text to speak 282 * 283 * @return true if the utterance was played properly 284 */ 285 public boolean fileToSpeech(String filePath) { 286 boolean ok = false; 287 try { 288 InputStream is = new FileInputStream(filePath); 289 ok = streamToSpeech(is); 290 } catch (IOException ioe) { 291 System.err.println("Can't read data from " + filePath); 292 } 293 return ok; 294 } 295 296 /** 297 * Turns audio playing on and off. 298 * 299 * @param silent 300 * if true, don't play audio 301 */ 302 public void setSilentMode(boolean silent) { 303 this.silent = silent; 304 } 305 306 /** 307 * Gets silent mode. 308 * 309 * @return true if in silent mode 310 * 311 * @see #setSilentMode 312 */ 313 public boolean getSilentMode() { 314 return this.silent; 315 } 316 317 /** 318 * Sets the input mode. 319 * 320 * @param inputMode 321 * the input mode 322 */ 323 public void setInputMode(InputMode inputMode) { 324 this.inputMode = inputMode; 325 } 326 327 /** 328 * Returns the InputMode. 329 * 330 * @return the input mode 331 * 332 * @see #setInputMode 333 */ 334 public InputMode getInputMode() { 335 return this.inputMode; 336 } 337 338 /** 339 * Sets the audio file . 340 * 341 * @param audioFile 342 * the audioFile 343 */ 344 public void setAudioFile(String audioFile) { 345 this.audioFile = audioFile; 346 } 347 348 /** 349 * Sets multi audio. If true, and an audio file has been set output will be 350 * sent to multiple files 351 * 352 * @param multiAudio 353 * if <code>true</code> send output to multiple files. 354 */ 355 public void setMultiAudio(boolean multiAudio) { 356 this.multiAudio = multiAudio; 357 } 358 359 /** 360 * Sets streaming audio. If true, output will be sent to 361 * 362 * @param streamingAudio 363 * if <code>true</code> stream audio 364 */ 365 public void setStreamingAudio(boolean streamingAudio) { 366 this.streamingAudio = streamingAudio; 367 } 368 369 /** 370 * Prints the usage message for FreeTTS. 371 */ 372 static void usage(String voices) { 373 System.out.println(VERSION); 374 System.out.println("Usage:"); 375 System.out.println(" -detailedMetrics: turn on detailed metrics"); 376 System.out.println(" -dumpAudio file : dump audio to file "); 377 System.out.println(" -dumpAudioTypes : dump the possible" 378 + " output types"); 379 System.out.println(" -dumpMultiAudio file : dump audio to file "); 380 System.out.println(" -dumpRelations : dump the relations "); 381 System.out.println(" -dumpUtterance : dump the final utterance"); 382 System.out 383 .println(" -dumpASCII file : dump the final wave to file as ASCII"); 384 System.out.println(" -file file : speak text from given file"); 385 System.out.println(" -lines file : render lines from a file"); 386 System.out.println(" -help : shows usage information"); 387 System.out.println(" -voiceInfo : print detailed voice info"); 388 System.out.println(" -metrics : turn on metrics"); 389 System.out.println(" -run name : sets the name of the run"); 390 System.out.println(" -silent : don't say anything"); 391 System.out.println(" -streaming : use streaming audio player"); 392 System.out.println(" -text say me : speak given text"); 393 System.out.println(" -url path : speak text from given URL"); 394 System.out.println(" -verbose : verbose output"); 395 System.out.println(" -version : shows version number"); 396 System.out.println(" -voice VOICE : " + voices); 397 } 398 399 /** 400 * Starts interactive mode on the given FreeTTS. Reads text from the console 401 * and gives it to FreeTTS to speak. terminates on end of file. 402 * 403 * @param freetts 404 * the engine 405 */ 406 private static void interactiveMode(FreeTTS freetts) { 407 try { 408 while (true) { 409 String text; 410 BufferedReader reader; 411 reader = new BufferedReader(new InputStreamReader(System.in)); 412 System.out.print("Enter text: "); 413 System.out.flush(); 414 text = reader.readLine(); 415 if ((text == null) || (text.length() == 0)) { 416 freetts.shutdown(); 417 System.exit(0); 418 } else { 419 freetts.batchTextToSpeech(text); 420 } 421 } 422 } catch (IOException e) { 423 } 424 } 425 426 /** 427 * Dumps the possible audio output file types 428 */ 429 private static void dumpAudioTypes() { 430 AudioFileFormat.Type[] types = AudioSystem.getAudioFileTypes(); 431 432 for (int i = 0; i < types.length; i++) { 433 System.out.println(types[i].getExtension()); 434 } 435 } 436 437 /** 438 * The main entry point for FreeTTS. 439 */ 440 public static void main(String[] args) { 441 442 String text = null; 443 String inFile = null; 444 boolean dumpAudioTypes = false; 445 Voice voice = null; 446 447 VoiceManager voiceManager = VoiceManager.getInstance(); 448 String voices = voiceManager.toString(); 449 450 // find out what Voice to use first 451 for (int i = 0; i < args.length; i++) { 452 if (args[i].equals("-voice")) { 453 if (++i < args.length) { 454 String voiceName = args[i]; 455 if (voiceManager.contains(voiceName)) { 456 voice = voiceManager.getVoice(voiceName); 457 } else { 458 System.out.println("Invalid voice: " + voiceName); 459 System.out.println(" Valid voices are " + voices); 460 System.exit(1); 461 } 462 } else { 463 usage(voices); 464 System.exit(1); 465 } 466 break; 467 } 468 } 469 470 if (voice == null) { // default Voice is kevin16 471 voice = voiceManager.getVoice("kevin16"); 472 } 473 474 if (voice == null) { 475 throw new Error("The specified voice is not defined"); 476 } 477 FreeTTS freetts = new FreeTTS(voice); 478 479 for (int i = 0; i < args.length; i++) { 480 if (args[i].equals("-metrics")) { 481 voice.setMetrics(true); 482 } else if (args[i].equals("-detailedMetrics")) { 483 voice.setDetailedMetrics(true); 484 } else if (args[i].equals("-silent")) { 485 freetts.setSilentMode(true); 486 } else if (args[i].equals("-streaming")) { 487 freetts.setStreamingAudio(true); 488 } else if (args[i].equals("-verbose")) { 489 Handler handler = new ConsoleHandler(); 490 handler.setLevel(Level.ALL); 491 Logger.getLogger("com.sun").addHandler(handler); 492 Logger.getLogger("com.sun").setLevel(Level.ALL); 493 } else if (args[i].equals("-dumpUtterance")) { 494 voice.setDumpUtterance(true); 495 } else if (args[i].equals("-dumpAudioTypes")) { 496 dumpAudioTypes = true; 497 } else if (args[i].equals("-dumpRelations")) { 498 voice.setDumpRelations(true); 499 } else if (args[i].equals("-dumpASCII")) { 500 if (++i < args.length) { 501 voice.setWaveDumpFile(args[i]); 502 } else { 503 usage(voices); 504 } 505 } else if (args[i].equals("-dumpAudio")) { 506 if (++i < args.length) { 507 freetts.setAudioFile(args[i]); 508 } else { 509 usage(voices); 510 } 511 } else if (args[i].equals("-dumpMultiAudio")) { 512 if (++i < args.length) { 513 freetts.setAudioFile(args[i]); 514 freetts.setMultiAudio(true); 515 } else { 516 usage(voices); 517 } 518 } else if (args[i].equals("-version")) { 519 System.out.println(VERSION); 520 } else if (args[i].equals("-voice")) { 521 // do nothing here, just skip the voice name 522 i++; 523 } else if (args[i].equals("-help")) { 524 usage(voices); 525 System.exit(0); 526 } else if (args[i].equals("-voiceInfo")) { 527 System.out.println(VoiceManager.getInstance().getVoiceInfo()); 528 System.exit(0); 529 } else if (args[i].equals("-text")) { 530 freetts.setInputMode(InputMode.TEXT); 531 // add the rest of the args as text 532 StringBuffer sb = new StringBuffer(); 533 for (int j = i + 1; j < args.length; j++) { 534 sb.append(args[j]); 535 sb.append(" "); 536 } 537 text = sb.toString(); 538 break; 539 } else if (args[i].equals("-file")) { 540 if (++i < args.length) { 541 inFile = args[i]; 542 freetts.setInputMode(InputMode.FILE); 543 } else { 544 usage(voices); 545 } 546 } else if (args[i].equals("-lines")) { 547 if (++i < args.length) { 548 inFile = args[i]; 549 freetts.setInputMode(InputMode.LINES); 550 } else { 551 usage(voices); 552 } 553 } else if (args[i].equals("-url")) { 554 if (++i < args.length) { 555 inFile = args[i]; 556 freetts.setInputMode(InputMode.URL); 557 } else { 558 usage(voices); 559 } 560 } else if (args[i].equals("-run")) { 561 if (++i < args.length) { 562 voice.setRunTitle(args[i]); 563 } else { 564 usage(voices); 565 } 566 } else { 567 System.out.println("Unknown option:" + args[i]); 568 } 569 } 570 571 if (dumpAudioTypes) { 572 dumpAudioTypes(); 573 } 574 575 freetts.startup(); 576 577 if (freetts.getInputMode() == InputMode.TEXT) { 578 freetts.batchTextToSpeech(text); 579 } else if (freetts.getInputMode() == InputMode.FILE) { 580 freetts.fileToSpeech(inFile); 581 } else if (freetts.getInputMode() == InputMode.URL) { 582 freetts.urlToSpeech(inFile); 583 } else if (freetts.getInputMode() == InputMode.LINES) { 584 freetts.lineToSpeech(inFile); 585 } else { 586 interactiveMode(freetts); 587 } 588 589 if (freetts.getVoice().isMetrics() && !freetts.getSilentMode()) { 590 // [[[ TODO: get first byte timer times back in ]]] 591 // freetts.getFirstByteTimer().showTimes(); 592 // freetts.getFirstSoundTimer().showTimes(); 593 } 594 595 freetts.shutdown(); 596 System.exit(0); 597 } 598}