001/** 002 * Portions Copyright 2001 Sun Microsystems, Inc. 003 * Portions Copyright 1999-2001 Language Technologies Institute, 004 * Carnegie Mellon University. 005 * All Rights Reserved. Use is subject to license terms. 006 * 007 * See the file "license.terms" for information on usage and 008 * redistribution of this file, and for a DISCLAIMER OF ALL 009 * WARRANTIES. 010 */ 011package com.sun.speech.freetts.en; 012 013import com.sun.speech.freetts.UtteranceProcessor; 014import com.sun.speech.freetts.Voice; 015import com.sun.speech.freetts.Relation; 016import com.sun.speech.freetts.Utterance; 017import com.sun.speech.freetts.ProcessException; 018import com.sun.speech.freetts.PathExtractorImpl; 019import com.sun.speech.freetts.PathExtractor; 020import com.sun.speech.freetts.Item; 021import java.util.StringTokenizer; 022import java.util.NoSuchElementException; 023import java.util.List; 024import java.util.ArrayList; 025import java.io.BufferedReader; 026import java.io.InputStreamReader; 027import java.io.IOException; 028import java.net.URL; 029 030 031/** 032 * Calculates the F0 curve for an utterance based on the Black and 033 * Hunt article "Generating F0 Contours from ToBI Labels Using Linear 034 * Regression," ICSLP96, vol. 3, pp 1385-1388, Philadelphia, 035 * PA. 1996. 036 */ 037public class ContourGenerator implements UtteranceProcessor { 038 private final static PathExtractor endPath = 039 new PathExtractorImpl("R:SylStructure.daughter.R:Segment.p.end", 040 true); 041 private final static PathExtractor lastDaughterEndPath = 042 new PathExtractorImpl("R:SylStructure.daughtern.end", 043 true); 044 private final static PathExtractor postBreakPath = 045 new PathExtractorImpl("R:SylStructure.daughter.R:Segment.p.name", 046 true); 047 private final static PathExtractor preBreakPath = 048 new PathExtractorImpl("R:SylStructure.daughtern.R:Segment.n.name", 049 true); 050 private final static PathExtractor vowelMidPath = 051 new PathExtractorImpl("R:Segment.p.end", 052 true); 053 private final static PathExtractor localF0Shift = 054 new PathExtractorImpl( 055 "R:SylStructure.parent.R:Token.parent.local_f0_shift", true); 056 private final static PathExtractor localF0Range = 057 new PathExtractorImpl( 058 "R:SylStructure.parent.R:Token.parent.local_f0_range", true); 059 060 private final float modelMean; 061 private final float modelStddev; 062 private F0ModelTerm[] terms = { null }; 063 064 /** 065 * Creates a ContourGenerator utterance processor. 066 * 067 * @param url source of the data 068 * @param modelMean the average frequency 069 * @param modelStddev the std deviation of the frequency 070 * 071 * @throws IOException if an error occurs while loading data 072 */ 073 public ContourGenerator(URL url, 074 float modelMean, float modelStddev) 075 throws IOException { 076 this.modelMean = modelMean; 077 this.modelStddev = modelStddev; 078 079 List termsList = new ArrayList(); 080 081 String line; 082 BufferedReader reader = new BufferedReader( 083 new InputStreamReader(url.openStream())); 084 line = reader.readLine(); 085 while (line != null) { 086 if (!line.startsWith("***")) { 087 parseAndAdd(termsList, line); 088 } 089 line = reader.readLine(); 090 } 091 terms = (F0ModelTerm[]) termsList.toArray(terms); 092 reader.close(); 093 } 094 095 /** 096 * Generates the F0 contour for the utterance. 097 * 098 * @param utterance the utterance to process 099 * 100 * @throws ProcessException if an <code>IOException</code> is 101 * thrown during the processing of the utterance 102 */ 103 public void processUtterance(Utterance utterance) throws ProcessException { 104 float lend = 0.0f; 105 float mean; 106 float stddev; 107 float localMean; 108 float localStddev; 109 Object tval; 110 111 mean = utterance.getVoice().getPitch(); 112 mean *= utterance.getVoice().getPitchShift(); 113 stddev = utterance.getVoice().getPitchRange(); 114 115 Relation target = utterance.createRelation(Relation.TARGET); 116 for (Item syllable = 117 utterance.getRelation(Relation.SYLLABLE).getHead(); 118 syllable != null; 119 syllable = syllable.getNext()) { 120 121 if (syllable.getItemAs(Relation.SYLLABLE_STRUCTURE).hasDaughters()) { 122 123 tval = localF0Shift.findFeature(syllable); 124 localMean = Float.parseFloat(tval.toString()); 125 126 if (localMean == 0.0) { 127 localMean = mean; 128 } else { 129 localMean *= mean; 130 } 131 132 tval = localF0Range.findFeature(syllable); 133 localStddev = Float.parseFloat(tval.toString()); 134 135 if (localStddev == 0.0) { 136 localStddev = stddev; 137 } 138 139 Interceptor interceptor = applyLrModel(syllable); 140 if (isPostBreak(syllable)) { 141 lend = mapF0(interceptor.start, localMean, localStddev); 142 } 143 144 Float val = (Float) endPath.findFeature(syllable); 145 // assert val != null; 146 // don't mind null ptr exception 147 addTargetPoint(target, val.floatValue(), 148 mapF0((interceptor.start + lend) / 2.0f, 149 localMean, localStddev)); 150 addTargetPoint(target, vowelMid(syllable), 151 mapF0(interceptor.mid, localMean, localStddev)); 152 lend = mapF0(interceptor.end, localMean, localStddev); 153 if (isPreBreak(syllable)) { 154 Float eval = (Float) lastDaughterEndPath.findFeature( 155 syllable); 156 addTargetPoint(target, eval.floatValue(), 157 mapF0(interceptor.end, localMean, localStddev)); 158 } 159 } 160 } 161 162 if (utterance.getRelation(Relation.SEGMENT).getHead() != null) { 163 Item first = target.getHead(); 164 if (first == null) { 165 addTargetPoint(target, 0, mean); 166 } else if (first.getFeatures().getFloat("pos") > 0) { 167 Item newItem = first.prependItem(null); 168 newItem.getFeatures().setFloat("pos", 0.0f); 169 newItem.getFeatures().setFloat( 170 "f0", first.getFeatures().getFloat("f0")); 171 } 172 Item last = (Item) target.getTail(); 173 Item lastSegment 174 = utterance.getRelation(Relation.SEGMENT).getTail(); 175 float segEnd = 0.0f; 176 177 if (lastSegment != null) { 178 segEnd = lastSegment.getFeatures().getFloat("end"); 179 } 180 181 if (last.getFeatures().getFloat("pos") < segEnd) { 182 addTargetPoint(target, segEnd, last.getFeatures(). 183 getFloat("f0")); 184 } 185 } 186 } 187 188 /** 189 * Applies the linear regression model. 190 * 191 * @param syllable the syllable to process 192 193 * @return the 3 points for the syllable as an <code>Interceptor</code> 194 */ 195 private Interceptor applyLrModel(Item syllable) { 196 float fv = 0.0f; 197 Interceptor interceptor = new Interceptor(); 198 interceptor.start = terms[0].start; 199 interceptor.mid = terms[0].mid; 200 interceptor.end = terms[0].end; 201 202 for (int i = 1; i < terms.length; i++) { 203 Object value = terms[i].findFeature(syllable); 204 if (terms[i].type != null) { 205 if (value.toString().equals(terms[i].type)) { 206 fv = 1.0f; 207 } else { 208 fv = 0.0f; 209 } 210 } else { 211 fv = Float.parseFloat(value.toString()); 212 } 213 214 interceptor.start += fv * terms[i].start; 215 interceptor.mid += fv * terms[i].mid; 216 interceptor.end += fv * terms[i].end; 217 } 218 219 return interceptor; 220 } 221 222 /** 223 * Returns the time point mid way in vowel in this syllable. 224 * 225 * @param syllable the syllable of interest 226 * 227 * @return the time point mid way in vowel in this syllable 228 */ 229 private final float vowelMid(Item syllable) { 230 Voice voice = syllable.getUtterance().getVoice(); 231 Item firstSeg = syllable.getItemAs( 232 Relation.SYLLABLE_STRUCTURE).getDaughter(); 233 Item segment; 234 float val; 235 236 for (segment = firstSeg; segment != null; segment =segment.getNext()) { 237 // TODO refactor phone feature stuff like this so that 238 // it can be understood. 239 if ("+".equals(voice.getPhoneFeature(segment.toString(), "vc"))) { 240 val = (segment.getFeatures().getFloat("end") + 241 ((Float) vowelMidPath.findFeature(segment)).floatValue()) / 2.0f; 242 return val; 243 } 244 } 245 246 if (firstSeg == null) { 247 val = 0.0f; 248 } else { 249 val = (firstSeg.getFeatures().getFloat("end") + 250 ((Float) vowelMidPath.findFeature(firstSeg)).floatValue()) 251 / 2.0f; 252 } 253 254 return val; 255 } 256 257 /** 258 * Adds the target point at the given time to the given frequency 259 * to the given relation. 260 * 261 * @param target the target of interest 262 * @param pos the time 263 * @param f0 the frequency 264 */ 265 private void addTargetPoint(Relation target, float pos, float f0) { 266 Item item = target.appendItem(); 267 item.getFeatures().setFloat("pos", pos); 268 if (f0 > 500.0) { 269 item.getFeatures().setFloat("f0", 500.0f); 270 } else if (f0 < 50.0) { 271 item.getFeatures().setFloat("f0", 50.0f); 272 } else { 273 item.getFeatures().setFloat("f0", f0); 274 } 275 } 276 277 /** 278 * Determines if this syllable is following a break. 279 * 280 * @param syllable the syllable to check 281 * 282 * @return <code>true</code> if this syllable is following a 283 * break; otherwise <code>false</code>. 284 */ 285 private final boolean isPostBreak(Item syllable) { 286 return ((syllable.getPrevious() == null) || 287 "pau".equals(postBreakPath.findFeature(syllable))); 288 } 289 290 /** 291 * Determines if this syllable is before a break. 292 * 293 * @param syllable the syllable to check 294 * 295 * @return <code>true</code> if this syllable is before a 296 * break; otherwise <code>false</code>. 297 */ 298 private final boolean isPreBreak(Item syllable) { 299 return ((syllable.getNext() == null) || 300 "pau".equals(preBreakPath.findFeature(syllable))); 301 } 302 303 /** 304 * Maps the given value to the curve. 305 * 306 * @param val the value to map 307 * 308 * @return the mapped value 309 */ 310 private final float mapF0(float val, float mean, float stddev) { 311 return ((((val - modelMean)/ modelStddev) * stddev) + mean); 312 } 313 314 /** 315 * Parses the line into an F0ModelTerm. 316 * 317 * @param list resulting F0ModelTerm is added to this list 318 * @param line the string to parse 319 */ 320 protected void parseAndAdd(List list, String line) { 321 try { 322 StringTokenizer tokenizer = new StringTokenizer(line," "); 323 String feature = tokenizer.nextToken(); 324 float start = Float.parseFloat(tokenizer.nextToken()); 325 float mid = Float.parseFloat(tokenizer.nextToken()); 326 float end = Float.parseFloat(tokenizer.nextToken()); 327 String type = tokenizer.nextToken(); 328 329 if (type.equals("null")) { 330 type = null; 331 } 332 333 list.add(new F0ModelTerm(feature, start, mid, end, type)); 334 } catch (NoSuchElementException nsee) { 335 throw new Error("ContourGenerator: Error while parsing F0ModelTerm " 336 + nsee.getMessage()); 337 } catch (NumberFormatException nfe) { 338 throw new Error("ContourGenerator: Bad float format " 339 + nfe.getMessage()); 340 } 341 } 342 343 /** 344 * Returns the string representation of the object. 345 * 346 * @return the string representation of the object 347 */ 348 public String toString() { 349 return "ContourGenerator"; 350 } 351} 352 353/** 354 * Represents a single term for the F0 model 355 */ 356class F0ModelTerm { 357 PathExtractor path; 358 float start; 359 float mid; 360 float end; 361 String type; 362 363 /** 364 * Constructs an F0ModelTerm. 365 * 366 * @param feature the feature of the term 367 * @param start the starting point of the term 368 * @param mid the mid-point of the term 369 * @param end the end point of the term 370 * @param type the type of the term 371 */ 372 F0ModelTerm(String feature, float start, float mid, 373 float end, String type) { 374 path = new PathExtractorImpl(feature, true); 375 this.start = start; 376 this.mid = mid; 377 this.end = end; 378 this.type = type; 379 } 380 381 /** 382 * Find the feature associated with the given item 383 * 384 * @param item the item of interest 385 * 386 * @return the object representing the feature. 387 */ 388 public Object findFeature(Item item) { 389 return path.findFeature(item); 390 } 391 392 /** 393 * Returns the string representation of the object 394 * 395 * @return the string representation of the object 396 */ 397 public String toString() { 398 return path.toString(); 399 } 400} 401 402/** 403 * Represents an interceptor. 404 */ 405class Interceptor { 406 float start; 407 float mid; 408 float end; 409 410 /** 411 * Constructs the default interceptor 412 */ 413 Interceptor() { 414 start = 0.0f; 415 mid = 0.0f; 416 end = 0.0f; 417 } 418 419 /** 420 * Returns the string representation of the object. 421 * 422 * @return the string representation of the object 423 */ 424 public String toString() { 425 return Float.toString(start) + " " + 426 Float.toString(mid) + " " + 427 Float.toString(end); 428 } 429}