001/** 002 * Portions Copyright 2001 Sun Microsystems, Inc. 003 * Portions Copyright 1999-2001 Language Technologies Institute, 004 * Carnegie Mellon University. 005 * All Rights Reserved. Use is subject to license terms. 006 * 007 * See the file "license.terms" for information on usage and 008 * redistribution of this file, and for a DISCLAIMER OF ALL 009 * WARRANTIES. 010 */ 011package com.sun.speech.freetts; 012 013import java.io.BufferedReader; 014import java.io.IOException; 015import java.io.InputStream; 016import java.io.InputStreamReader; 017import java.io.PrintWriter; 018import java.io.Reader; 019import java.net.URL; 020import java.util.ArrayList; 021import java.util.Collections; 022import java.util.HashMap; 023import java.util.Iterator; 024import java.util.List; 025import java.util.Locale; 026import java.util.Map; 027import java.util.logging.Level; 028import java.util.logging.Logger; 029 030import org.w3c.dom.Document; 031import org.w3c.dom.Node; 032import org.w3c.dom.Text; 033 034import com.sun.speech.freetts.audio.AudioPlayer; 035import com.sun.speech.freetts.lexicon.Lexicon; 036import com.sun.speech.freetts.relp.LPCResult; 037import com.sun.speech.freetts.util.BulkTimer; 038import com.sun.speech.freetts.util.Utilities; 039 040 041/** 042 * Performs text-to-speech using a series of 043 * <code>UtteranceProcessors</code>. It is the main conduit to the FreeTTS 044 * speech synthesizer. It can perform TTS on ASCII text, 045 * a JSML document, an <code>InputStream</code>, or a 046 * <code>FreeTTSSpeakable</code>, by invoking the method <code>speak</code>. 047 * 048 * <p>Before a Voice can perform TTS, it must have a 049 * <code>Lexicon</code>, from which it gets the vocabulary, and 050 * an <code>AudioPlayer</code>, to which it sends the synthesized output. 051 * 052 * <p><b>Example</b> (using the <code>CMUDiphoneVoice</code>, 053 * <code>CMULexicon</code> and <code>JavaClipAudioPlayer</code>): 054 * 055 * <pre> 056 * Voice voice = new CMUDiphoneVoice(); 057 * 058 * // sets the Lexicon 059 * voice.setLexicon(new CMULexicon()); 060 * 061 * // sets the AudioPlayer 062 * voice.setAudioPlayer(new JavaClipAudioPlayer()); 063 * 064 * // loads the Voice 065 * voice.allocate(); 066 * 067 * // start talking 068 * voice.speak("I can talk forever without getting tired!"); 069 * </pre> 070 * 071 * 072 * <p>A user can override the AudioPlayer to use by defining the 073 * "com.sun.speech.freetts.voice.defaultAudioPlayer" system property. 074 * The value of this property must be the name of a class that 075 * implements the AudioPlayer interface, and which also has a no-arg 076 * constructor. 077 * 078 * @see VoiceManager 079 * @see VoiceDirectory 080 */ 081public abstract class Voice implements UtteranceProcessor, Dumpable { 082 /** Logger instance. */ 083 private static final Logger LOGGER = 084 Logger.getLogger(Voice.class.getName()); 085 086 /** 087 * Constant that describes the name of the unit database used by 088 * this voice. 089 */ 090 public final static String DATABASE_NAME = "databaseName"; 091 092 private List<UtteranceProcessor> utteranceProcessors; 093 private Map featureProcessors; 094 private FeatureSetImpl features; 095 private boolean metrics = false; 096 private boolean detailedMetrics = false; 097 private boolean dumpUtterance = false; 098 private boolean dumpRelations = false; 099 private String runTitle = "unnamed run"; 100 private Lexicon lexicon = null; 101 private AudioPlayer defaultAudioPlayer = null; 102 private AudioPlayer audioPlayer = null; 103 private UtteranceProcessor audioOutput; 104 private OutputQueue outputQueue = null; 105 private String waveDumpFile = null; 106 private BulkTimer runTimer = new BulkTimer(); 107 private BulkTimer threadTimer = new BulkTimer(); 108 private boolean externalOutputQueue = false; 109 private boolean externalAudioPlayer = false; 110 111 112 private float nominalRate = 150; // nominal speaking rate for this voice 113 private float pitch = 100; // pitch baseline (hertz) 114 private float range = 10; // pitch range (hertz) 115 private float pitchShift = 1; // F0 Shift 116 private float volume = 0.8f; // the volume (range 0 to 1) 117 private float durationStretch = 1f; // the duration stretch 118 119 private boolean loaded = false; 120 121 private String name = "default_name"; 122 private Age age = Age.DONT_CARE; 123 private Gender gender = Gender.DONT_CARE; 124 private String description = "default description"; 125 private Locale locale = Locale.getDefault(); 126 private String domain = "general"; 127 private String style = "standard"; 128 private String organization = "unknown"; 129 130 /** 131 * Prefix for System property names. 132 */ 133 public final static String PROP_PREFIX = "com.sun.speech.freetts.voice."; 134 135 /** 136 * Feature name for the silence phone string. 137 */ 138 public final static String FEATURE_SILENCE = "silence"; 139 140 /** 141 * Feature name for the join type string. 142 */ 143 public final static String FEATURE_JOIN_TYPE = "join_type"; 144 145 /** 146 * Feature name for the default AudioPlayer class to use. 147 */ 148 public final static String DEFAULT_AUDIO_PLAYER = 149 PROP_PREFIX + "defaultAudioPlayer"; 150 151 152 /** 153 * The default class to use for the DEFAULT_AUDIO_PLAYER. 154 */ 155 public final static String DEFAULT_AUDIO_PLAYER_DEFAULT = 156 "com.sun.speech.freetts.audio.JavaStreamingAudioPlayer"; 157 158 159 /** 160 * Creates a new Voice. Utterances are sent to an 161 * output queue to be rendered as audio. Utterances are placed 162 * on the queue by an output thread. This 163 * queue is usually created via a call to 'createOutputThread,' 164 * which creates a thread that waits on the queue and sends the 165 * output to the audio player associated with this voice. If 166 * the queue is null, the output is rendered in the calling 167 * thread. 168 * 169 * @see #createOutputThread 170 */ 171 public Voice() { 172 /* Make the utteranceProcessors a synchronized list to avoid 173 * some threading issues. 174 */ 175 utteranceProcessors = Collections.synchronizedList(new ArrayList()); 176 features = new FeatureSetImpl(); 177 featureProcessors = new HashMap(); 178 179 try { 180 nominalRate = Float.parseFloat( 181 Utilities.getProperty(PROP_PREFIX + "speakingRate","150")); 182 pitch = Float.parseFloat( 183 Utilities.getProperty(PROP_PREFIX + "pitch","100")); 184 range = Float.parseFloat( 185 Utilities.getProperty(PROP_PREFIX + "range","10")); 186 volume = Float.parseFloat( 187 Utilities.getProperty(PROP_PREFIX + "volume","1.0")); 188 } catch (SecurityException se) { 189 // can't get properties, just use defaults 190 } 191 outputQueue = null; 192 audioPlayer = null; 193 defaultAudioPlayer = null; 194 } 195 196 /** 197 * Creates a new Voice like above, except that it also 198 * stores the properties of the voice. 199 * @param name the name of the voice 200 * @param gender the gender of the voice 201 * @param age the age of the voice 202 * @param description a human-readable string providing a 203 * description that can be displayed for the users. 204 * @param locale the locale of the voice 205 * @param domain the domain of this voice. For example, 206 * @param organization the organization which created the voice 207 * "general", "time", or 208 * "weather". 209 * 210 * @see #Voice() 211 */ 212 public Voice(String name, Gender gender, Age age, 213 String description, Locale locale, String domain, 214 String organization) { 215 this(); 216 setName(name); 217 setGender(gender); 218 setAge(age); 219 setDescription(description); 220 setLocale(locale); 221 setDomain(domain); 222 setOrganization(organization); 223 } 224 225 226 /** 227 * Speaks the given text. 228 * 229 * @param text the text to speak 230 * 231 * @return <code>true</code> if the given text is spoken properly; 232 * otherwise <code>false</code> 233 */ 234 public boolean speak(String text) { 235 return speak(new FreeTTSSpeakableImpl(text)); 236 } 237 238 239 /** 240 * Speaks the given document. 241 * 242 * @param doc the JSML document to speak 243 * 244 * @return <code>true</code> if the given document is spoken properly; 245 * otherwise <code>false</code> 246 */ 247 public boolean speak(Document doc) { 248 return speak(new FreeTTSSpeakableImpl(doc)); 249 } 250 251 252 /** 253 * Speaks the input stream. 254 * 255 * @param inputStream the inputStream to speak 256 * 257 * @return <code>true</code> if the given input stream is spoken properly; 258 * otherwise <code>false</code> 259 */ 260 public boolean speak(InputStream inputStream) { 261 return speak(new FreeTTSSpeakableImpl(inputStream)); 262 } 263 264 265 /** 266 * Speak the given queue item. This is a synchronous method that 267 * does not return until the speakable is completely 268 * spoken or has been cancelled. 269 * 270 * @param speakable the item to speak 271 * 272 * @return <code>true</code> if the utterance was spoken properly, 273 * <code>false</code> otherwise 274 */ 275 public boolean speak(FreeTTSSpeakable speakable) { 276 if (LOGGER.isLoggable(Level.FINE)) { 277 LOGGER.fine("speak(FreeTTSSpeakable) called"); 278 } 279 boolean ok = true; 280 boolean posted = false; 281 282 getAudioPlayer().startFirstSampleTimer(); 283 284 for (Iterator i = tokenize(speakable); 285 !speakable.isCompleted() && i.hasNext() ; ) { 286 try { 287 Utterance utterance = (Utterance) i.next(); 288 if (utterance != null) { 289 processUtterance(utterance); 290 posted = true; 291 } 292 } catch (ProcessException pe) { 293 ok = false; 294 } 295 } 296 if (ok && posted) { 297 runTimer.start("WaitAudio"); 298 ok = speakable.waitCompleted(); 299 runTimer.stop("WaitAudio"); 300 } 301 if (LOGGER.isLoggable(Level.FINE)) { 302 LOGGER.fine("speak(FreeTTSSpeakable) completed"); 303 } 304 return ok; 305 } 306 307 308 /** 309 * @deprecated As of FreeTTS 1.2, replaced by {@link #allocate}. 310 */ 311 public void load() { 312 allocate(); 313 } 314 315 /** 316 * Allocate this Voice. It loads the lexicon and the 317 * audio output handler, and creates an audio output thread by 318 * invoking <code>createOutputThread()</code>, if 319 * one is not already created. It then calls the <code>loader()</code> 320 * method to load Voice-specific data, which include utterance processors. 321 */ 322 public void allocate() { 323 if (isLoaded()) { 324 return; 325 } 326 BulkTimer.LOAD.start(); 327 328 329 if (!lexicon.isLoaded()) { 330 try { 331 lexicon.load(); 332 } catch (IOException ioe) { 333 LOGGER.severe("Can't load voice " + ioe); 334 throw new Error(ioe); 335 } 336 } 337 338 try { 339 audioOutput = getAudioOutput(); 340 } catch (IOException ioe) { 341 LOGGER.severe("Can't load audio output handler for voice " + ioe); 342 throw new Error(ioe); 343 } 344 if (outputQueue == null) { 345 outputQueue = createOutputThread(); 346 } 347 try { 348 loader(); 349 } catch (IOException ioe) { 350 LOGGER.severe("Can't load voice " + ioe); 351 throw new Error(ioe); 352 } 353 BulkTimer.LOAD.stop(); 354 if (isMetrics()) { 355 BulkTimer.LOAD.show("loading " + toString() + " for " + 356 getRunTitle()); 357 } 358 setLoaded(true); 359 } 360 361 362 /** 363 * Returns true if this voice is loaded. 364 * 365 * @return <code>true</code> if the voice is loaded; 366 * otherwise <code>false</code> 367 */ 368 public boolean isLoaded() { 369 return loaded; 370 } 371 372 /** 373 * Sets the loaded state 374 * 375 * @param loaded the new loaded state 376 * otherwise <code>false</code> 377 */ 378 protected void setLoaded(boolean loaded) { 379 this.loaded = loaded; 380 } 381 382 /** 383 * Processes the given Utterance by passing it to each 384 * UtteranceProcessor managed by this Voice. The 385 * UtteranceProcessors are called in the order they were added to 386 * the Voice. 387 * 388 * @param u the Utterance to process 389 * 390 * @throws ProcessException if an exception occurred while performing 391 * operations on the Utterance 392 */ 393 public void processUtterance(Utterance u) throws ProcessException { 394 UtteranceProcessor[] processors; 395 396 if (utteranceProcessors == null) { 397 return; 398 } 399 if (u == null) { 400 throw new ProcessException("Utterance is null."); 401 } 402 403 runTimer.start("processing"); 404 processors = new UtteranceProcessor[utteranceProcessors.size()]; 405 processors = (UtteranceProcessor[]) 406 utteranceProcessors.toArray(processors); 407 408 if (LOGGER.isLoggable(Level.FINE)) { 409 LOGGER.fine("Processing Utterance: " + u.getString("input_text")); 410 } 411 try { 412 for (int i = 0; i < processors.length && 413 !u.getSpeakable().isCompleted(); i++) { 414 runProcessor(processors[i], u, runTimer); 415 } 416 if (!u.getSpeakable().isCompleted()) { 417 if (outputQueue == null) { 418 if (LOGGER.isLoggable(Level.FINE)) { 419 LOGGER.fine("To AudioOutput"); 420 } 421 outputUtterance(u, runTimer); 422 } else { 423 runTimer.start("..post"); 424 outputQueue.post(u); 425 runTimer.stop("..post"); 426 } 427 } 428 } catch (ProcessException pe) { 429 System.err.println("Processing Utterance: " + pe); 430 } catch (Exception e) { 431 System.err.println("Trouble while processing utterance " + e); 432 e.printStackTrace(); 433 u.getSpeakable().cancelled(); 434 } 435 436 if (LOGGER.isLoggable(Level.FINE)) { 437 LOGGER.fine("Done Processing Utterance: " 438 + u.getString("input_text")); 439 } 440 runTimer.stop("processing"); 441 442 if (dumpUtterance) { 443 u.dump("Utterance"); 444 } 445 if (dumpRelations) { 446 u.dumpRelations("Utterance"); 447 } 448 449 dumpASCII(u); 450 } 451 452 453 /** 454 * Dumps the wave for the given utterance. 455 * 456 * @param utterance the utterance of interest 457 */ 458 private void dumpASCII(Utterance utterance) { 459 if (waveDumpFile != null) { 460 LPCResult lpcResult = 461 (LPCResult) utterance.getObject("target_lpcres"); 462 try { 463 if (waveDumpFile.equals("-")) { 464 lpcResult.dumpASCII(); 465 } else { 466 lpcResult.dumpASCII(waveDumpFile); 467 } 468 } catch (IOException ioe) { 469 LOGGER.severe("Can't dump file to " + waveDumpFile + " " + ioe); 470 throw new Error(ioe); 471 } 472 } 473 } 474 475 476 /** 477 * Creates an output thread that will asynchronously 478 * output utterances that are generated by this voice (and other 479 * voices). 480 * 481 * @return the queue where utterances should be placed. 482 */ 483 public static OutputQueue createOutputThread() { 484 final OutputQueue queue = new OutputQueue(); 485 Thread t = new Thread() { 486 public void run() { 487 Utterance utterance = null; 488 do { 489 utterance = queue.pend(); 490 if (utterance != null) { 491 Voice voice = utterance.getVoice(); 492 if (LOGGER.isLoggable(Level.FINE)) { 493 LOGGER.fine("OUT: " 494 + utterance.getString("input_text")); 495 } 496 voice.outputUtterance(utterance, voice.threadTimer); 497 } 498 } while (utterance != null); 499 } 500 }; 501 t.setDaemon(true); 502 t.start(); 503 return queue; 504 } 505 506 507 /** 508 * Sends the given utterance to the audio output processor 509 * associated with this voice. If the queue item associated with 510 * this utterance is completed, then this set of utterances has 511 * been cancelled or otherwise aborted and the utterance should 512 * not be output. 513 * 514 * @param utterance the utterance to be output 515 * @param timer the timer for gathering performance metrics 516 * 517 * @return true if the utterance was output properly; otherwise 518 * false 519 */ 520 private boolean outputUtterance(Utterance utterance, BulkTimer timer) { 521 boolean ok = true; 522 FreeTTSSpeakable speakable = utterance.getSpeakable(); 523 524 if (!speakable.isCompleted()) { 525 if (utterance.isFirst()) { 526 getAudioPlayer().reset(); 527 speakable.started(); 528 if (LOGGER.isLoggable(Level.FINE)) { 529 LOGGER.fine(" --- started ---"); 530 } 531 } 532 533 // log(" utt: " + utterance.getString("input_text")); 534 try { 535 if (!speakable.isCompleted()) { 536 runProcessor(audioOutput, utterance, timer); 537 } else { 538 ok = false; 539 } 540 } catch (ProcessException pe) { 541 ok = false; 542 } 543 if (ok && utterance.isLast()) { 544 getAudioPlayer().drain(); 545 speakable.completed(); 546 if (LOGGER.isLoggable(Level.FINE)) { 547 LOGGER.fine(" --- completed ---"); 548 } 549 } else if (!ok) { 550 // getAudioPlayer().drain(); 551 speakable.cancelled(); 552 if (LOGGER.isLoggable(Level.FINE)) { 553 LOGGER.fine(" --- cancelled ---"); 554 } 555 } else { 556 if (LOGGER.isLoggable(Level.FINE)) { 557 LOGGER.fine(" --- not last: " + speakable.getText() 558 + " --- "); 559 } 560 } 561 if (LOGGER.isLoggable(Level.FINE)) { 562 LOGGER.fine("Calling speakable.completed() on " 563 + speakable.getText()); 564 } 565 } else { 566 ok = false; 567 if (LOGGER.isLoggable(Level.FINE)) { 568 LOGGER.fine("STRANGE: speakable already completed: " 569 + speakable.getText()); 570 } 571 } 572 return ok; 573 } 574 575 576 /** 577 * Runs the given utterance processor. 578 * 579 * @param processor the processor to run. If the processor 580 * is null, it is ignored 581 * @param utterance the utterance to process 582 * 583 * @throws ProcessException if an exceptin occurs while processing 584 * the utterance 585 */ 586 private void runProcessor(UtteranceProcessor processor, 587 Utterance utterance, BulkTimer timer) 588 throws ProcessException { 589 if (processor != null) { 590 String processorName = ".." + processor.toString(); 591 if (LOGGER.isLoggable(Level.FINE)) { 592 LOGGER.fine(" Running " + processorName); 593 } 594 timer.start(processorName); 595 processor.processUtterance(utterance); 596 timer.stop(processorName); 597 } 598 } 599 600 601 /** 602 * Returns the tokenizer associated with this voice. 603 * 604 * @return the tokenizer 605 */ 606 public abstract Tokenizer getTokenizer(); 607 608 609 /** 610 * Return the list of UtteranceProcessor instances. Applications 611 * should use this to obtain and modify the contents of the 612 * UtteranceProcessor list. 613 * 614 * @return a List containing UtteranceProcessor instances 615 */ 616 public List<UtteranceProcessor> getUtteranceProcessors() { 617 return utteranceProcessors; 618 } 619 620 621 /** 622 * Returns the feature set associated with this voice. 623 * 624 * @return the feature set. 625 */ 626 public FeatureSet getFeatures() { 627 return features; 628 } 629 630 631 /** 632 * Starts a batch of utterances. Utterances are sometimes 633 * batched in groups for timing purposes. 634 * 635 * @see #endBatch 636 */ 637 public void startBatch() { 638 runTimer.setVerbose(detailedMetrics); 639 runTimer.start(); 640 } 641 642 643 /** 644 * Ends a batch of utterances. 645 * 646 * @see #startBatch 647 */ 648 public void endBatch() { 649 runTimer.stop(); 650 651 if (metrics) { 652 runTimer.show(getRunTitle() + " run"); 653 threadTimer.show(getRunTitle() + " thread"); 654 getAudioPlayer().showMetrics(); 655 long totalMemory = Runtime.getRuntime().totalMemory(); 656 LOGGER.info 657 ("Memory Use : " 658 + (totalMemory - Runtime.getRuntime().freeMemory()) / 1024 659 + "k of " + totalMemory / 1024 + "k"); 660 } 661 } 662 663 /** 664 * Sets the output queue for this voice. If no output queue is set 665 * for the voice when the voice is loaded, a queue and thread will 666 * be created when the voice is loaded. If the outputQueue is set 667 * by an external entity by calling setOutputQueue, the caller is 668 * responsible for shutting down the output thread. That is, if 669 * you call 'setOutputQueue' then you are responsible for shutting 670 * down the output thread on your own. This is necessary since the 671 * output queue may be shared by a number of voices. 672 * 673 * <p>Utterances are placed on the 674 * queue to be output by an output thread. This queue is 675 * usually created via a call to 'createOutputThread' which 676 * creates a thread that waits on the queue and sends the 677 * output to the audio player associated with this voice. If 678 * the queue is null, the output is rendered in the calling 679 * thread. 680 * 681 * @param queue the output queue 682 */ 683 public void setOutputQueue(OutputQueue queue) { 684 externalOutputQueue = true; 685 outputQueue = queue; 686 } 687 688 /** 689 * Returns the output queue associated with this voice. 690 * 691 * @return the output queue associated with this voice 692 */ 693 public OutputQueue getOutputQueue() { 694 return outputQueue; 695 } 696 697 /** 698 * Loads voice specific data. Subclasses of voice should 699 * implement this to perform class specific loading. 700 */ 701 protected abstract void loader() throws IOException; 702 703 /** 704 * tokenizes the given the queue item. 705 * 706 * @return an iterator that will yield a series of utterances 707 */ 708 private Iterator tokenize(FreeTTSSpeakable speakable) { 709 return new FreeTTSSpeakableTokenizer(speakable).iterator(); 710 } 711 712 /** 713 * Converts the document to a string (a placeholder for more 714 * sophisticated logic to be done). 715 * 716 * @param dom the jsml document 717 * 718 * @return the document as a string. 719 */ 720 private String documentToString(Document dom) { 721 StringBuffer buf = new StringBuffer(); 722 linearize(dom, buf); 723 return buf.toString(); 724 } 725 726 /** 727 * Appends the text for this node to the given StringBuffer. 728 * 729 * @param n the node to traverse in depth-first order 730 * @param buf the buffer to append text to 731 */ 732 private void linearize(Node n, StringBuffer buf) { 733 StringBuffer endText = processNode(n, buf); 734 for (Node child = n.getFirstChild(); 735 child != null; 736 child = child.getNextSibling()) { 737 linearize(child, buf); 738 } 739 740 if (endText != null) { 741 buf.append(endText); 742 } 743 } 744 745 /** 746 * Adds text for just this node and returns any text that might 747 * be needed to undo the effects of this node after it is 748 * processed. 749 * 750 * @param n the node to traverse in depth-first order 751 * @param buf the buffer to append text to 752 * 753 * @return a <code>String</code> containing text to undo the 754 * effects of the node 755 */ 756 protected StringBuffer processNode(Node n, StringBuffer buf) { 757 StringBuffer endText = null; 758 759 int type = n.getNodeType(); 760 switch (type) { 761 case Node.ATTRIBUTE_NODE: 762 break; 763 764 case Node.DOCUMENT_NODE: 765 break; 766 767 case Node.ELEMENT_NODE: 768 // endText = processElement((Element) n, buf); 769 break; 770 771 case Node.TEXT_NODE: 772 buf.append(((Text) n).getData()); 773 break; 774 775 // Pass processing instructions (e.g., <?blah?> 776 // right on to the synthesizer. These types of things 777 // probably should not be used. Instead the 'engine' 778 // element is probably the best thing to do. 779 // 780 case Node.PROCESSING_INSTRUCTION_NODE: 781 break; 782 783 // The document type had better be JSML. 784 // 785 case Node.DOCUMENT_TYPE_NODE: 786 break; 787 788 // I think NOTATION nodes are only DTD's. 789 // 790 case Node.NOTATION_NODE: 791 break; 792 793 // Should not get COMMENTS because the JSMLParser 794 // ignores them. 795 // 796 case Node.COMMENT_NODE: 797 break; 798 799 // Should not get CDATA because the JSMLParser is 800 // coalescing. 801 // 802 case Node.CDATA_SECTION_NODE: 803 break; 804 805 // Should not get ENTITY related notes because 806 // entities are expanded by the JSMLParser 807 // 808 case Node.ENTITY_NODE: 809 case Node.ENTITY_REFERENCE_NODE: 810 break; 811 812 // Should not get DOCUMENT_FRAGMENT nodes because I 813 // [[[WDW]]] think they are only created via the API's 814 // and cannot be defined via content. 815 // 816 case Node.DOCUMENT_FRAGMENT_NODE: 817 break; 818 819 default: 820 break; 821 } 822 823 return endText; 824 } 825 826 /** 827 * Dumps the voice in textual form. 828 * 829 * @param output where to send the formatted output 830 * @param pad the initial padding 831 * @param title the title to print when dumping out 832 */ 833 public void dump(PrintWriter output, int pad, String title) { 834 Utilities.dump(output, pad, title); 835 features.dump(output, pad + 4, title + " Features"); 836 dumpProcessors(output, pad + 4, title + " Processors"); 837 } 838 839 840 /** 841 * Dumps the voice processors. 842 * 843 * @param output where to send the formatted output 844 * @param pad the initial padding 845 * @param title the title to print when dumping out 846 */ 847 public void dumpProcessors(PrintWriter output, int pad, String title) { 848 UtteranceProcessor[] processors; 849 if (utteranceProcessors == null) { 850 return; 851 } 852 853 processors = new UtteranceProcessor[utteranceProcessors.size()]; 854 processors = (UtteranceProcessor[]) 855 utteranceProcessors.toArray(processors); 856 857 Utilities.dump(output, pad, title); 858 for (int i = 0; i < processors.length; i++) { 859 Utilities.dump(output, pad + 4, processors[i].toString()); 860 } 861 } 862 863 864 /** 865 * Returns a language/voice specific Feature Processor. 866 * 867 * @param name the name of the processor 868 * 869 * @return the processor associated with the name or null if none 870 * could be found 871 */ 872 public FeatureProcessor getFeatureProcessor(String name) { 873 return (FeatureProcessor) featureProcessors.get(name); 874 } 875 876 /** 877 * Adds a language/voice specific Feature Processor to the set of 878 * FeatureProcessors supported by this voice. 879 * 880 * @param name the name of the processor 881 * @param fp the processor 882 */ 883 public void addFeatureProcessor(String name, FeatureProcessor fp) { 884 featureProcessors.put(name, fp); 885 } 886 887 /** 888 * Gets the state of the metrics mode. 889 * 890 * @return true if metrics mode is on 891 */ 892 public boolean isMetrics() { 893 return metrics; 894 } 895 896 /** 897 * Sets the metrics mode. 898 * 899 * @param metrics true if metrics mode should be on 900 */ 901 public void setMetrics(boolean metrics) { 902 this.metrics = metrics; 903 if (LOGGER.isLoggable(Level.FINE)) { 904 LOGGER.fine("Metrics mode is " + metrics); 905 } 906 } 907 908 /** 909 * Gets the state of the detailedMetrics mode. 910 * 911 * @return true if detailedMetrics mode is on 912 */ 913 public boolean isDetailedMetrics() { 914 return detailedMetrics; 915 } 916 917 /** 918 * Sets the state of the detailedMetrics mode. 919 * 920 * @param detailedMetrics true if detailedMetrics mode should be on 921 */ 922 public void setDetailedMetrics(boolean detailedMetrics) { 923 this.detailedMetrics = detailedMetrics; 924 if (LOGGER.isLoggable(Level.FINE)) { 925 LOGGER.fine("DetailedMetrics mode is " + detailedMetrics); 926 } 927 } 928 929 /** 930 * Gets the state of the dumpUtterance mode. 931 * 932 * @return true if dumpUtterance mode is on 933 */ 934 public boolean isDumpUtterance() { 935 return dumpUtterance; 936 } 937 938 /** 939 * Sets the state of the dumpUtterance mode. 940 * 941 * @param dumpUtterance true if dumpUtterance mode should be on 942 */ 943 public void setDumpUtterance(boolean dumpUtterance) { 944 this.dumpUtterance = dumpUtterance; 945 if (LOGGER.isLoggable(Level.FINE)) { 946 LOGGER.fine("DumpUtterance mode is " + dumpUtterance); 947 } 948 } 949 950 /** 951 * Gets the state of the dumpRelations mode. 952 * 953 * @return true if dumpRelations mode is on 954 */ 955 public boolean isDumpRelations() { 956 return dumpRelations; 957 } 958 959 /** 960 * Sets the state of the dumpRelations mode. 961 * 962 * @param dumpRelations true if dumpRelations mode should be on 963 */ 964 public void setDumpRelations(boolean dumpRelations) { 965 this.dumpRelations = dumpRelations; 966 if (LOGGER.isLoggable(Level.FINE)) { 967 LOGGER.fine("DumpRelations mode is " + dumpRelations); 968 } 969 } 970 971 /** 972 * Sets the title for this run. 973 * 974 * @param runTitle the title for the run 975 */ 976 public void setRunTitle(String runTitle) { 977 this.runTitle = runTitle; 978 } 979 980 /** 981 * Gets the title for this run. 982 * 983 * @return the title for the run 984 */ 985 public String getRunTitle() { 986 return runTitle; 987 } 988 989 /** 990 * Given a phoneme and a feature name, returns the feature. 991 * 992 * @param phone the phoneme of interest 993 * @param featureName the name of the feature of interest 994 * 995 * @return the feature with the given name 996 */ 997 public String getPhoneFeature(String phone, String featureName) { 998 return null; 999 } 1000 1001 /** 1002 * Shuts down the voice processing. 1003 */ 1004 public void deallocate() { 1005 setLoaded(false); 1006 1007 if (!externalAudioPlayer) { 1008 if (audioPlayer != null) { 1009 try { 1010 audioPlayer.close(); 1011 } catch (IOException e) { 1012 LOGGER.warning(e.getMessage()); 1013 } 1014 audioPlayer = null; 1015 } 1016 } 1017 1018 if (!externalOutputQueue) { 1019 outputQueue.close(); 1020 } 1021 } 1022 1023 /** 1024 * Sets the baseline pitch. 1025 * 1026 * @param hertz the baseline pitch in hertz 1027 */ 1028 public void setPitch(float hertz) { 1029 this.pitch = hertz; 1030 } 1031 1032 /** 1033 * Retreives the baseline pitch. 1034 * 1035 * @return the baseline pitch in hertz 1036 */ 1037 public float getPitch() { 1038 return pitch; 1039 } 1040 1041 /** 1042 * Sets the pitch range. 1043 * 1044 * @param range the range in hertz 1045 */ 1046 public void setPitchRange(float range) { 1047 this.range = range; 1048 } 1049 1050 /** 1051 * Gets the pitch range. 1052 * 1053 * @return the range in hertz 1054 */ 1055 public float getPitchRange() { 1056 return range; 1057 } 1058 1059 /** 1060 * Sets the pitch shift 1061 * 1062 * @param shift the pitch shift (1.0 is no shift) 1063 */ 1064 public void setPitchShift(float shift) { 1065 this.pitchShift = shift; 1066 } 1067 1068 /** 1069 * Gets the pitch shift. 1070 * 1071 * @return the pitch shift 1072 */ 1073 public float getPitchShift() { 1074 return pitchShift; 1075 } 1076 1077 /** 1078 * Sets the duration stretch 1079 * 1080 * @param stretch the duration stretch (1.0 is no stretch) 1081 */ 1082 public void setDurationStretch(float stretch) { 1083 this.durationStretch = stretch; 1084 } 1085 1086 /** 1087 * Gets the duration Stretch 1088 * 1089 * @return the duration stretch 1090 */ 1091 public float getDurationStretch() { 1092 return durationStretch; 1093 } 1094 1095 /** 1096 * Sets the rate of speech. 1097 * 1098 * @param wpm words per minute 1099 */ 1100 public void setRate(float wpm) { 1101 if (wpm > 0 && wpm < 1000) { 1102 setDurationStretch(nominalRate / wpm); 1103 } 1104 } 1105 1106 /** 1107 * Gets the rate of speech. 1108 * 1109 * @return words per minute 1110 */ 1111 public float getRate() { 1112 return durationStretch * nominalRate; 1113 } 1114 1115 1116 /** 1117 * Sets the volume. 1118 * 1119 * @param vol the volume (0 to 1.0) 1120 */ 1121 public void setVolume(float vol) { 1122 volume = vol; 1123 } 1124 1125 /** 1126 * Gets the volume. 1127 * 1128 * @return the volume (0 to 1.0) 1129 */ 1130 public float getVolume() { 1131 return volume; 1132 } 1133 1134 /** 1135 * Gets the lexicon for this voice. 1136 * 1137 * @return the lexicon (or null if there is no lexicon) 1138 */ 1139 public Lexicon getLexicon() { 1140 return lexicon; 1141 } 1142 1143 /** 1144 * Sets the lexicon to be used by this voice. 1145 * 1146 * @param lexicon the lexicon to use 1147 */ 1148 public void setLexicon(Lexicon lexicon) { 1149 this.lexicon = lexicon; 1150 1151 } 1152 1153 /** 1154 * Sets the dumpfile for this voice. 1155 * 1156 * @param waveDumpFile the dumpfile 1157 */ 1158 public void setWaveDumpFile(String waveDumpFile) { 1159 this.waveDumpFile = waveDumpFile; 1160 } 1161 1162 /** 1163 * Gets the dumpfile for this voice. 1164 * 1165 * @return the dumpfile 1166 */ 1167 public String getWaveDumpFile() { 1168 return waveDumpFile; 1169 } 1170 1171 /** 1172 * Sets the audio player associated with this voice. The caller is 1173 * responsible for closing this player. 1174 * 1175 * @param player the audio player 1176 */ 1177 public void setAudioPlayer(AudioPlayer player) { 1178 audioPlayer = player; 1179 externalAudioPlayer = true; 1180 } 1181 1182 /** 1183 * Gets the default audio player for this voice. The return 1184 * value will be non-null only if the DEFAULT_AUDIO_PLAYER 1185 * system property has been set to the name of an AudioPlayer 1186 * class, and that class is able to be instantiated via a 1187 * no arg constructor. getAudioPlayer will automatically set 1188 * the audio player for this voice to the default audio player 1189 * if the audio player has not yet been set. 1190 * 1191 * @see #DEFAULT_AUDIO_PLAYER 1192 * @see #getAudioPlayer 1193 * @return the default AudioPlayer 1194 */ 1195 public AudioPlayer getDefaultAudioPlayer() throws InstantiationException { 1196 if (defaultAudioPlayer != null) { 1197 return defaultAudioPlayer; 1198 } 1199 1200 String className = Utilities.getProperty( 1201 DEFAULT_AUDIO_PLAYER, DEFAULT_AUDIO_PLAYER_DEFAULT); 1202 1203 try { 1204 Class cls = Class.forName(className); 1205 defaultAudioPlayer = (AudioPlayer) cls.newInstance(); 1206 return defaultAudioPlayer; 1207 } catch (ClassNotFoundException e) { 1208 throw new InstantiationException("Can't find class " + className); 1209 } catch (IllegalAccessException e) { 1210 throw new InstantiationException("Can't find class " + className); 1211 } catch (ClassCastException e) { 1212 throw new InstantiationException(className + " cannot be cast " 1213 + "to AudioPlayer"); 1214 } 1215 } 1216 1217 /** 1218 * Gets the audio player associated with this voice. If the 1219 * audio player has not yet been set, the value will default 1220 * to the return value of getDefaultAudioPlayer. 1221 * 1222 * @see #getDefaultAudioPlayer 1223 * @return the audio player 1224 */ 1225 public AudioPlayer getAudioPlayer() { 1226 if (audioPlayer == null) { 1227 try { 1228 audioPlayer = getDefaultAudioPlayer(); 1229 } catch (InstantiationException e) { 1230 e.printStackTrace(); 1231 } 1232 } 1233 return audioPlayer; 1234 } 1235 1236 /** 1237 * Get a resource for this voice. 1238 * By default, the voice is searched for in the package 1239 * to which the voice class belongs. Subclasses are free to 1240 * override this behaviour. 1241 */ 1242 protected URL getResource(String resource) { 1243 return this.getClass().getResource(resource); 1244 } 1245 1246 /** 1247 * Set the name of this voice. 1248 * [[[TODO: any standard format to the name?]]] 1249 * 1250 * @param name the name to assign this voice 1251 */ 1252 protected void setName(String name) { 1253 this.name = name; 1254 } 1255 1256 1257 /** 1258 * Get the name of this voice. 1259 * 1260 * @return the name 1261 */ 1262 public String getName() { 1263 return name; 1264 } 1265 1266 /** 1267 * Returns the name of this Voice. 1268 * 1269 * @return the name of this Voice 1270 */ 1271 public String toString() { 1272 return getName(); 1273 } 1274 1275 /** 1276 * Set the gender of this voice. 1277 * 1278 * @param gender the gender to assign 1279 */ 1280 protected void setGender(Gender gender) { 1281 this.gender = gender; 1282 } 1283 1284 /** 1285 * Get the gender of this voice. 1286 * 1287 * @return the gender of this voice 1288 */ 1289 public Gender getGender() { 1290 return gender; 1291 } 1292 1293 /** 1294 * Set the age of this voice. 1295 * 1296 * @param age the age to assign 1297 */ 1298 protected void setAge(Age age) { 1299 this.age = age; 1300 } 1301 1302 /** 1303 * Get the age of this voice. 1304 * 1305 * @return the age of this voice 1306 */ 1307 public Age getAge() { 1308 return age; 1309 } 1310 1311 /** 1312 * Set the description of this voice. 1313 * 1314 * @param description the human readable description to assign 1315 */ 1316 protected void setDescription(String description) { 1317 this.description = description; 1318 } 1319 1320 /** 1321 * Get the description of this voice. 1322 * 1323 * @return the human readable description of this voice 1324 */ 1325 public String getDescription() { 1326 return description; 1327 } 1328 1329 /** 1330 * Set the locale of this voice. 1331 * 1332 * @param locale the locale of this voice. 1333 */ 1334 protected void setLocale(Locale locale) { 1335 this.locale = locale; 1336 } 1337 1338 /** 1339 * Get the locale of this voice. 1340 * 1341 * @return the locale of this voice. 1342 */ 1343 public Locale getLocale() { 1344 return locale; 1345 } 1346 1347 /** 1348 * Set the domain of this voice. 1349 * 1350 * @param domain the domain of this voice. For example, 1351 * "general", "time", or 1352 * "weather". 1353 */ 1354 protected void setDomain(String domain) { 1355 this.domain = domain; 1356 } 1357 1358 /** 1359 * Get the domain of this voice. 1360 * 1361 * @return the domain of this voice. For example, 1362 * "general", "time", or 1363 * "weather". 1364 */ 1365 public String getDomain() { 1366 return domain; 1367 } 1368 1369 /** 1370 * Sets the voice style. This parameter is designed for human 1371 * interpretation. Values might include "business", "casual", 1372 * "robotic", "breathy" 1373 * 1374 * @param style the stile of this voice. 1375 */ 1376 public void setStyle(String style) { 1377 this.style = style; 1378 } 1379 1380 /** 1381 * Gets the voice style. This parameter is designed for human 1382 * interpretation. Values might include "business", "casual", 1383 * "robotic", "breathy". 1384 */ 1385 public String getStyle() { 1386 return style; 1387 } 1388 1389 /** 1390 * Sets the organization which created this voice. For example 1391 * "cmu", "sun", ... 1392 * 1393 * @param organization the name of the organization 1394 */ 1395 protected void setOrganization(String organization) { 1396 this.organization = organization; 1397 } 1398 1399 /** 1400 * Gets the organization which created this voice. For example 1401 * "cmu", "sun", ... 1402 * 1403 * @return the name of the organization 1404 */ 1405 public String getOrganization() { 1406 return organization; 1407 } 1408 1409 /** 1410 * Returns the AudioOutput processor to be used by this voice. 1411 * Derived voices typically override this to customize behaviors. 1412 * 1413 * @return the audio output processor 1414 * 1415 * @throws IOException if an IO error occurs while getting 1416 * processor 1417 */ 1418 protected abstract UtteranceProcessor getAudioOutput() throws IOException ; 1419 1420 /** 1421 * Tokenizes a FreeTTSSpeakable 1422 */ 1423 private class FreeTTSSpeakableTokenizer { 1424 FreeTTSSpeakable speakable; 1425 Tokenizer tok = getTokenizer(); 1426 1427 /** 1428 * Constructor. 1429 * 1430 * @param speakable the queue item to be pretokenized 1431 */ 1432 public FreeTTSSpeakableTokenizer(FreeTTSSpeakable speakable) { 1433 this.speakable = speakable; 1434 if (speakable.isPlainText()) { 1435 tok.setInputText(speakable.getText()); 1436 } else if (speakable.isStream()) { 1437 Reader reader = new BufferedReader( 1438 new InputStreamReader(speakable.getInputStream())); 1439 tok.setInputReader(reader); 1440 } else if (speakable.isDocument()) { 1441 tok.setInputText(documentToString(speakable.getDocument())); 1442 } 1443 } 1444 1445 /** 1446 * Returns an iterator for this text item. 1447 */ 1448 public Iterator iterator() { 1449 return new Iterator() { 1450 boolean first = true; 1451 Token savedToken = null; 1452 1453 /** 1454 * Determines if there are more utterances 1455 * 1456 * @return true if there are more tokens 1457 */ 1458 public boolean hasNext() { 1459 return savedToken != null || tok.hasMoreTokens(); 1460 } 1461 1462 /** 1463 * Returns the next utterance. 1464 * 1465 * @return the next utterance (as an object) or 1466 * null if there is are no utterances left 1467 */ 1468 public Object next() { 1469 ArrayList tokenList = new ArrayList(); 1470 Utterance utterance = null; 1471 1472 if (savedToken != null) { 1473 tokenList.add(savedToken); 1474 savedToken = null; 1475 } 1476 1477 while (tok.hasMoreTokens()) { 1478 Token token = tok.getNextToken(); 1479 if ((token.getWord().length() == 0) || 1480 (tokenList.size() > 500) || 1481 tok.isBreak()) { 1482 savedToken = token; 1483 break; 1484 } 1485 tokenList.add(token); 1486 } 1487 utterance = new Utterance(Voice.this, tokenList); 1488 utterance.setSpeakable(speakable); 1489 utterance.setFirst(first); 1490 first = false; 1491 boolean isLast = 1492 (!tok.hasMoreTokens() && 1493 (savedToken == null || 1494 savedToken.getWord().length() == 0)); 1495 utterance.setLast(isLast); 1496 return utterance; 1497 } 1498 1499 public void remove() { 1500 throw new UnsupportedOperationException("remove"); 1501 } 1502 }; 1503 } 1504 } 1505} 1506 1507 1508 1509 1510