001/** 002 * Portions Copyright 2004 DFKI GmbH. 003 * Portions Copyright 2001 Sun Microsystems, Inc. 004 * Portions Copyright 1999-2001 Language Technologies Institute, 005 * Carnegie Mellon University. 006 * All Rights Reserved. Use is subject to license terms. 007 * 008 * See the file "license.terms" for information on usage and 009 * redistribution of this file, and for a DISCLAIMER OF ALL 010 * WARRANTIES. 011 */ 012package de.dfki.lt.freetts; 013 014import java.io.IOException; 015import java.net.URL; 016import java.util.Locale; 017 018import com.sun.speech.freetts.Age; 019import com.sun.speech.freetts.Gender; 020import com.sun.speech.freetts.PartOfSpeech; 021import com.sun.speech.freetts.PartOfSpeechImpl; 022import com.sun.speech.freetts.PhoneSet; 023import com.sun.speech.freetts.PhoneSetImpl; 024import com.sun.speech.freetts.Tokenizer; 025import com.sun.speech.freetts.UtteranceProcessor; 026import com.sun.speech.freetts.Voice; 027import com.sun.speech.freetts.clunits.ClusterUnitPitchmarkGenerator; 028import com.sun.speech.freetts.clunits.ClusterUnitSelector; 029import com.sun.speech.freetts.en.us.CMULexicon; 030import com.sun.speech.freetts.en.us.FeatureProcessors; 031import com.sun.speech.freetts.lexicon.Lexicon; 032import com.sun.speech.freetts.relp.AudioOutput; 033import com.sun.speech.freetts.relp.SampleInfo; 034import com.sun.speech.freetts.relp.UnitConcatenator; 035 036/** 037 * A simple dummy voice as a starting point for non-US-English 038 * cluster unit voices. All NLP stuff would need to be implemented 039 * in order for this to become a full TTS voice. 040 */ 041public class ClusterUnitVoice extends Voice implements ConcatenativeVoice { 042 private PhoneSet phoneSet; 043 protected URL database; 044 protected URL phonesetURL; 045 protected URL partOfSpeechURL; 046 protected ClusterUnitSelector unitSelector; 047 private ClusterUnitNamer unitNamer; 048 public ClusterUnitVoice(String name, Gender gender, Age age, 049 String description, Locale locale, String domain, 050 String organization, Lexicon lexicon, URL database) { 051 this(name, gender, age, description, locale, domain, 052 organization, lexicon, database, null, null, null); 053 } 054 055 /** 056 * Creates a ClusterUnitVoice 057 * 058 * @param database the database of the voice 059 * @param unitNamer specifies the name of the Units (if null, an 060 * ldom naming scheme will be used: 'ae_afternoon') 061 * @param phonesetURL leads to the phoneset, which will be used 062 * for the FeatureProcessors (can be null) 063 * @param partOfSpeechURL leads to the pos-textfile which will be used 064 * for the FeatureProcessors (can be null) 065 */ 066 public ClusterUnitVoice(String name, Gender gender, Age age, 067 String description, Locale locale, String domain, 068 String organization, Lexicon lexicon, URL database, 069 ClusterUnitNamer unitNamer, URL phonesetURL, URL partOfSpeechURL) { 070 071 //TODO: do something useful with the lexicon 072 super(name, gender, age, description, locale, 073 domain, organization); 074 setRate(150f); 075 setPitch(100F); 076 setPitchRange(12F); 077 if (lexicon != null) { 078 setLexicon(lexicon); 079 } else { 080 // Use a small dummy lexicon 081 setLexicon(new CMULexicon("cmutimelex")); 082 } 083 this.database = database; 084 this.unitNamer = unitNamer; 085 this.phonesetURL = phonesetURL; 086 this.partOfSpeechURL = partOfSpeechURL; 087 try { 088 unitSelector = new ClusterUnitSelector(getDatabase(), unitNamer); 089 } catch (IOException ioe) { 090 ioe.printStackTrace(); 091 } 092 } 093 094 /** 095 * Get the sample info for the underlying database. 096 * @return the sample info object 097 */ 098 public SampleInfo getSampleInfo() { 099 return unitSelector.getSampleInfo(); 100 } 101 102 public Tokenizer getTokenizer() { 103 return null; 104 } 105 106 107 protected void loader() throws IOException { 108 setupFeatureProcessors(); 109 } 110 111 112 protected UtteranceProcessor getAudioOutput() throws IOException { 113 return new AudioOutput(); 114 } 115 116 /** 117 * Gets the url to the database that defines the unit data for this 118 * voice. 119 * 120 * @return a url to the database 121 */ 122 public URL getDatabase() { 123 return database; 124 } 125 126 /** 127 * Returns the unit selector to be used by this voice. 128 * Derived voices typically override this to customize behaviors. 129 * This voice uses a cluster unit selector as the unit selector. 130 * 131 * @return the post lexical processor 132 * 133 * @throws IOException if an IO error occurs while getting 134 * processor 135 */ 136 public UtteranceProcessor getUnitSelector() throws IOException { 137 return unitSelector; 138 } 139 140 /** 141 * Returns the pitch mark generator to be used by this voice. 142 * Derived voices typically override this to customize behaviors. 143 * There is no default unit selector 144 * 145 * @return the post lexical processor 146 * 147 * @throws IOException if an IO error occurs while getting 148 * processor 149 */ 150 public UtteranceProcessor getPitchmarkGenerator() throws IOException { 151 return new ClusterUnitPitchmarkGenerator(); 152 } 153 154 /** 155 * Returns the unit concatenator to be used by this voice. 156 * Derived voices typically override this to customize behaviors. 157 * There is no default unit selector 158 * 159 * @return the post lexical processor 160 * 161 * @throws IOException if an IO error occurs while getting 162 * processor 163 */ 164 public UtteranceProcessor getUnitConcatenator() throws IOException { 165 return new UnitConcatenator(); 166 } 167 168 protected void setupFeatureProcessors() throws IOException { 169 if(phonesetURL != null){ 170 phoneSet = new PhoneSetImpl(phonesetURL); 171 } 172 if(partOfSpeechURL != null){ 173 PartOfSpeech pos = new PartOfSpeechImpl(partOfSpeechURL, 174 "content"); 175 addFeatureProcessor("gpos", new FeatureProcessors.Gpos(pos)); 176 } 177 178 179 180 181 addFeatureProcessor("word_break", new FeatureProcessors.WordBreak()); 182 addFeatureProcessor("word_punc", new FeatureProcessors.WordPunc()); 183 addFeatureProcessor("word_numsyls",new FeatureProcessors.WordNumSyls()); 184 addFeatureProcessor("ssyl_in", new FeatureProcessors.StressedSylIn()); 185 addFeatureProcessor("syl_in", new FeatureProcessors.SylIn()); 186 addFeatureProcessor("syl_out", new FeatureProcessors.SylOut()); 187 addFeatureProcessor("ssyl_out", new 188 FeatureProcessors.StressedSylOut()); 189 addFeatureProcessor("syl_break", new FeatureProcessors.SylBreak()); 190 addFeatureProcessor("old_syl_break", new FeatureProcessors.SylBreak()); 191 addFeatureProcessor("num_digits", new FeatureProcessors.NumDigits()); 192 addFeatureProcessor("month_range", new FeatureProcessors.MonthRange()); 193 addFeatureProcessor("token_pos_guess", 194 new FeatureProcessors.TokenPosGuess()); 195 addFeatureProcessor("segment_duration", 196 new FeatureProcessors.SegmentDuration()); 197 addFeatureProcessor("sub_phrases", new FeatureProcessors.SubPhrases()); 198 addFeatureProcessor("asyl_in", new FeatureProcessors.AccentedSylIn()); 199 addFeatureProcessor("last_accent", new FeatureProcessors.LastAccent()); 200 addFeatureProcessor("pos_in_syl", new FeatureProcessors.PosInSyl()); 201 addFeatureProcessor("position_type", new 202 FeatureProcessors.PositionType()); 203 204 addFeatureProcessor("ph_cplace", new FeatureProcessors.PH_CPlace()); 205 addFeatureProcessor("ph_ctype", new FeatureProcessors.PH_CType()); 206 addFeatureProcessor("ph_cvox", new FeatureProcessors.PH_CVox()); 207 addFeatureProcessor("ph_vc", new FeatureProcessors.PH_VC()); 208 addFeatureProcessor("ph_vfront", new FeatureProcessors.PH_VFront()); 209 addFeatureProcessor("ph_vheight", new FeatureProcessors.PH_VHeight()); 210 addFeatureProcessor("ph_vlng", new FeatureProcessors.PH_VLength()); 211 addFeatureProcessor("ph_vrnd", new FeatureProcessors.PH_VRnd()); 212 213 addFeatureProcessor("seg_coda_fric", new 214 FeatureProcessors.SegCodaFric()); 215 addFeatureProcessor("seg_onset_fric", new 216 FeatureProcessors.SegOnsetFric()); 217 218 addFeatureProcessor("seg_coda_stop", new 219 FeatureProcessors.SegCodaStop()); 220 addFeatureProcessor("seg_onset_stop", new 221 FeatureProcessors.SegOnsetStop()); 222 223 addFeatureProcessor("seg_coda_nasal", new 224 FeatureProcessors.SegCodaNasal()); 225 addFeatureProcessor("seg_onset_nasal", new 226 FeatureProcessors.SegOnsetNasal()); 227 228 addFeatureProcessor("seg_coda_glide", new 229 FeatureProcessors.SegCodaGlide()); 230 addFeatureProcessor("seg_onset_glide", new 231 FeatureProcessors.SegOnsetGlide()); 232 233 addFeatureProcessor("seg_onsetcoda", new 234 FeatureProcessors.SegOnsetCoda()); 235 addFeatureProcessor("syl_codasize", new 236 FeatureProcessors.SylCodaSize()); 237 addFeatureProcessor("syl_onsetsize", new 238 FeatureProcessors.SylOnsetSize()); 239 addFeatureProcessor("accented", new FeatureProcessors.Accented()); 240 } 241 242 /** 243 * Given a phoneme and a feature name, return the feature 244 * 245 * @param phone the phoneme of interest 246 * @param featureName the name of the feature of interest 247 * 248 * @return the feature with the given name 249 */ 250 public String getPhoneFeature(String phone, String featureName) { 251 if (phoneSet != null) 252 return phoneSet.getPhoneFeature(phone, featureName); 253 else 254 return null; 255 } 256 257}