001/** 002 * Portions Copyright 2001-2003 Sun Microsystems, Inc. 003 * Portions Copyright 1999-2001 Language Technologies Institute, 004 * Carnegie Mellon University. 005 * All Rights Reserved. Use is subject to license terms. 006 * 007 * See the file "license.terms" for information on usage and 008 * redistribution of this file, and for a DISCLAIMER OF ALL 009 * WARRANTIES. 010 */ 011package com.sun.speech.freetts.clunits; 012import java.io.IOException; 013import java.net.URL; 014import java.util.logging.Level; 015import java.util.logging.Logger; 016 017import com.sun.speech.freetts.FeatureSet; 018import com.sun.speech.freetts.FeatureSetImpl; 019import com.sun.speech.freetts.Item; 020import com.sun.speech.freetts.PathExtractor; 021import com.sun.speech.freetts.PathExtractorImpl; 022import com.sun.speech.freetts.ProcessException; 023import com.sun.speech.freetts.Relation; 024import com.sun.speech.freetts.Utterance; 025import com.sun.speech.freetts.UtteranceProcessor; 026import com.sun.speech.freetts.Voice; 027import com.sun.speech.freetts.cart.CART; 028import com.sun.speech.freetts.clunits.ClusterUnitDatabase.UnitOriginInfo; 029import com.sun.speech.freetts.relp.Sample; 030import com.sun.speech.freetts.relp.SampleInfo; 031import com.sun.speech.freetts.relp.SampleSet; 032 033import de.dfki.lt.freetts.ClusterUnitNamer; 034 035 036/** 037 * Generates the Unit Relation of an Utterance from the 038 * Segment Relation. 039 * 040 */ 041public class ClusterUnitSelector implements UtteranceProcessor { 042 /** Logger instance. */ 043 private static final Logger LOGGER = 044 Logger.getLogger(ClusterUnitSelector.class.getName()); 045 046 private final static PathExtractor DNAME = new PathExtractorImpl( 047 "R:SylStructure.parent.parent.name", true); 048 private ClusterUnitDatabase clunitDB; 049 private ClusterUnitNamer unitNamer; 050 051 /** 052 * Constructs a ClusterUnitSelector. 053 * 054 * @param url the URL for the unit database. If the URL path ends 055 * with a '.bin' it is assumed that the DB is a binary database, 056 * otherwise, its assumed that its a text database1 057 * 058 * @throws IOException if an error occurs while loading the 059 * database 060 * 061 */ 062 public ClusterUnitSelector(URL url) throws IOException { 063 this(url, null); 064 } 065 066 /** 067 * Constructs a ClusterUnitSelector. 068 * 069 * @param url the URL for the unit database. If the URL path ends 070 * with a '.bin' it is assumed that the DB is a binary database, 071 * otherwise, its assumed that its a text database1 072 * @param unitNamer an optional unit namer, specifying how the cluster 073 * units are called in the voice database referenced by url. If this is null, 074 * an ldom unit naming scheme will be used (e.g., 'ae_afternoon' for the 075 * phoneme 'ae' in the word 'afternoon'. 076 * 077 * @throws IOException if an error occurs while loading the 078 * database 079 * 080 */ 081 public ClusterUnitSelector(URL url, ClusterUnitNamer unitNamer) throws IOException { 082 if (url == null) { 083 throw new IOException("Can't load cluster unit database"); 084 } 085 boolean binary = url.getPath().endsWith(".bin"); 086 clunitDB = new ClusterUnitDatabase(url, binary); 087 this.unitNamer = unitNamer; 088 089 } 090 091 092 /** 093 * Get the sample info for the underlying database. 094 * @return the sample info object 095 */ 096 public SampleInfo getSampleInfo() { 097 return clunitDB.getSampleInfo(); 098 } 099 100 /** 101 * Generates the Unit Relation from the Segment Relation. 102 * <br><b>Implementation note:</b><br> 103 * Populates the segment relation with segment names of the form: 104 * XX_YY where XX is the segment name (typically a phoneme) 105 * and YY is the word that the segment is in (stripped and 106 * lower case). 107 * 108 * The first step in cluster unit selection is to determine the unit 109 * type for each unit in the utterance. The unit type for 110 * selection in the simple talking clock example (cmu_time_awb) is 111 * done per phone. The unit type consists of the phone 112 * name followed by the word the phone comes from (e.g., n_now for 113 * the phone 'n' in the word 'now'). 114 * 115 * Invoke the Viterbi algorithm (via a viterbi class) that 116 * selects the proper units for the segment and adds that to 117 * each segment item. 118 * 119 * For each segment, create a unit and attach features based 120 * upon the selected units. 121 * 122 * @param utterance the utterance to generate the Unit Relation 123 * 124 * @throws ProcessException if an IOException is thrown during the 125 * processing of the utterance 126 * 127 */ 128 public void processUtterance(Utterance utterance) throws ProcessException { 129 Viterbi vd; 130 Relation segs = utterance.getRelation(Relation.SEGMENT); 131 132 utterance.setObject(SampleInfo.UTT_NAME, 133 clunitDB.getSampleInfo()); 134 utterance.setObject("sts_list", clunitDB.getSts()); 135 136 vd = new Viterbi(segs, clunitDB); 137 138 for (Item s = segs.getHead(); s != null; s = s.getNext()) { 139 setUnitName(s); 140 } 141 142 // Carry out the CART lookup for the target costs, and the viterbi 143 // search for finding the best path (join costs) through the candidates. 144 vd.decode(); 145 146 // Now associate the candidate units in the best path 147 // with the items in the segment relation. 148 if (!vd.result("selected_unit")) { 149 LOGGER.severe("clunits: can't find path"); 150 throw new Error(); 151 } 152 153 // If optimal coupling was used, the join points must now be copied 154 // from the path elements to the actual items in the segment relation. 155 vd.copyFeature("unit_prev_move"); 156 vd.copyFeature("unit_this_move"); 157 158 // Based on this data, create a Unit relation giving the details of the 159 // units to concatenate. 160 Relation unitRelation = utterance.createRelation(Relation.UNIT); 161 162 for (Item s = segs.getHead(); s != null; s = s.getNext()) { 163 Item unit = unitRelation.appendItem(); 164 FeatureSet unitFeatureSet = unit.getFeatures(); 165 int unitEntry = s.getFeatures().getInt("selected_unit"); 166 167 // The item name is the segment name 168 unitFeatureSet.setString("name", s.getFeatures().getString("name")); 169 170 int unitStart; 171 int unitEnd; 172 String clunitName = s.getFeatures().getString("clunit_name"); 173 174 if (s.getFeatures().isPresent("unit_this_move")) { 175 unitStart = s.getFeatures().getInt("unit_this_move"); 176 } else { 177 unitStart = clunitDB.getStart(unitEntry); 178 } 179 180 if (s.getNext() != null && 181 s.getNext().getFeatures().isPresent("unit_prev_move")) { 182 unitEnd = s.getNext().getFeatures().getInt("unit_prev_move"); 183 } else { 184 unitEnd = clunitDB.getEnd(unitEntry); 185 } 186 187 unitFeatureSet.setInt("unit_entry", unitEntry); 188 ClusterUnit clunit = new ClusterUnit(clunitDB, 189 clunitName, unitStart, unitEnd); 190 unitFeatureSet.setObject("unit", clunit); 191 if (true) { 192 unitFeatureSet.setInt("unit_start", clunit.getStart()); 193 unitFeatureSet.setInt("unit_end", clunit.getEnd()); 194 unitFeatureSet.setInt("instance", unitEntry - clunitDB.getUnitIndex(clunitName, 0)); 195 } // add the rest of these things for debugging. 196 197 if (LOGGER.isLoggable(Level.FINE)) { 198 LOGGER.fine(" sr " + clunitDB.getSampleInfo().getSampleRate() + " " + 199 s.getFeatures().getFloat("end") + " " + 200 (int) (s.getFeatures().getFloat("end") * 201 clunitDB.getSampleInfo().getSampleRate())); 202 } 203 unitFeatureSet.setInt("target_end", 204 (int) (s.getFeatures().getFloat("end") 205 * clunitDB.getSampleInfo().getSampleRate())); 206 207 // Associate debug info about unit origin if available: 208 UnitOriginInfo unitOrigin = clunitDB.getUnitOriginInfo(unitEntry); 209 if (unitOrigin != null) { 210 unitFeatureSet.setString("origin", unitOrigin.originFile); 211 unitFeatureSet.setFloat("origin_start", unitOrigin.originStart); 212 unitFeatureSet.setFloat("origin_end", unitOrigin.originEnd); 213 } 214 215 } 216 } 217 218 219 /** 220 * Sets the cluster unit name given the segment. 221 * 222 * @param seg the segment item that gets the name 223 */ 224 protected void setUnitName(Item seg) { 225 if (unitNamer != null) { 226 unitNamer.setUnitName(seg); 227 return; 228 } 229 // default to LDOM naming scheme 'ae_afternoon': 230 String cname = null; 231 232 String segName = seg.getFeatures().getString("name"); 233 234 Voice voice = seg.getUtterance().getVoice(); 235 String silenceSymbol = voice.getPhoneFeature("silence", "symbol"); 236 if (silenceSymbol == null) 237 silenceSymbol = "pau"; 238 if (segName.equals(silenceSymbol)) { 239 cname = silenceSymbol + "_" + seg.findFeature("p.name"); 240 } else { 241 // remove single quotes from name 242 String dname = ((String) DNAME.findFeature(seg)).toLowerCase(); 243 cname = segName + "_" + stripQuotes(dname); 244 } 245 seg.getFeatures().setString("clunit_name", cname); 246 } 247 248 249 /** 250 * Strips quotes from the given string. 251 * 252 * @param s the string to strip quotes from 253 * 254 * @return a string with all single quotes removed 255 */ 256 private String stripQuotes(String s) { 257 StringBuffer sb = new StringBuffer(s.length()); 258 for (int i = 0; i < s.length(); i++) { 259 char c = s.charAt(i); 260 if (c != '\'') { 261 sb.append(c); 262 } 263 } 264 return sb.toString(); 265 } 266 267 268 /** 269 * Retrieves the string representation of this object. 270 * 271 * @return the string representation of this object 272 */ 273 public String toString() { 274 return "ClusterUnitSelector"; 275 } 276 277 /** 278 * Provides support for the Viterbi Algorithm. 279 * 280 * Implementation Notes 281 * <p> 282 * For each candidate for the current unit, calculate the cost 283 * between it and the first candidate in the next unit. Save 284 * only the path that has the least cost. By default, if two 285 * candidates come from units that are adjacent in the 286 * database, the cost is 0 (i.e., they were spoken together, 287 * so they are a perfect match). 288 * <p> 289 * 290 * Repeat the previous process for each candidate in the next 291 * unit, creating a list of least cost paths between the 292 * candidates between the current unit and the unit following 293 * it. 294 * <p> 295 * 296 * Toss out all candidates in the current unit that are not 297 * included in a path. 298 * <p> 299 * 300 * Move to the next unit and repeat the process. 301 */ 302 static class Viterbi { 303 private int numStates = -1; 304 private boolean bigIsGood = false; 305 private ViterbiPoint timeline = null; 306 private ViterbiPoint lastPoint = null; 307 private FeatureSet f = null; 308 private ClusterUnitDatabase clunitDB; 309 310 /** 311 * Creates a Viterbi class to process the given utterance. 312 * A queue of ViterbiPoints corresponding to the Items in the Relation segs 313 * is built up. 314 * 315 */ 316 public Viterbi(Relation segs, ClusterUnitDatabase db) { 317 ViterbiPoint last = null; 318 clunitDB = db; 319 f = new FeatureSetImpl(); 320 for (Item s = segs.getHead(); true; s = s.getNext()) { 321 ViterbiPoint n = new ViterbiPoint(s); 322 // The number of ViterbiPaths associated with each ViterbiPoint 323 // is determined using the variable numStates. 324 // TODO: Where can numStates be set? 325 if (numStates > 0) { 326 n.initPathArray(numStates); 327 } 328 if (last != null) { // continue to build up the queue 329 last.next = n; 330 } else { // timeline is the start of the queue 331 timeline = n; 332 } 333 last = n; 334 335 if (s == null) { // no further segments, leave loop 336 lastPoint = n; 337 break; 338 } 339 } 340 341 if (LOGGER.isLoggable(Level.FINE)) { 342 LOGGER.fine("num states " + numStates); 343 } 344 345 if (numStates == 0) { // its a general beam search 346 timeline.paths = new ViterbiPath(); 347 } 348 349 if (numStates == -1) { // dynamic number of states (# cands) 350 timeline.initPathArray(1); 351 } 352 } 353 354 /** 355 * Sets the given feature to the given value. 356 * 357 * @param name the name of the feature 358 * @param obj the new value. 359 */ 360 public void setFeature(String name, Object obj) { 361 f.setObject(name, obj); 362 } 363 364 /** 365 * Gets the value for the given feature. 366 * 367 * @param name the name of the feature 368 * 369 * @return the value of the feature 370 */ 371 public Object getFeature(String name) { 372 return f.getObject(name); 373 } 374 375 /** 376 * Carry out a Viterbi search in for a prepared queue of ViterbiPoints. 377 * In a nutshell, each Point represents a target item (a target segment); 378 * for each target Point, a number of Candidate units in the voice database 379 * are determined; a Path structure is built up, based on local best transitions. 380 * Concretely, a Path consists of a (possibly empty) previous Path, a current Candidate, 381 * and a Score. This Score is a quality measure of the Path; it is calculated as the 382 * sum of the previous Path's score, the Candidate's score, and the Cost of joining 383 * the Candidate to the previous Path's Candidate. At each step, only one Path 384 * leading to each Candidate is retained, viz. the Path with the best Score. 385 * All that is left to do is to call result() to get the best-rated 386 * path from among the paths associated with the last Point, and to associate 387 * the resulting Candidates with the segment items they will realise. 388 */ 389 void decode() { 390 for (ViterbiPoint p = timeline; p.next != null; p = p.next) { 391 // The candidates for the current item: 392 p.cands = getCandidate(p.item); 393 if (LOGGER.isLoggable(Level.FINE)) { 394 LOGGER.fine("decode " + p.cands); 395 } 396 if (numStates != 0) { 397 if (numStates == -1) { 398 // put as many (empty) path elements into p.next as there are candidates in p 399 p.next.initDynamicPathArray(p.cands); 400 } 401 402 // Now go through all existing paths and all candidates for the current item; 403 // tentatively extend each existing path to each of the candidates, 404 // but only retain the 405 // Attention: p.numStates is not numStates! 406 // numStates = a general flag indicating which type of viterbi search 407 // to use (only -1 seems to be implemented); 408 // p.numStates = the number of paths in p.statePaths, i.e. p.numStates==p.statePaths.length 409 for (int i = 0; i < p.numStates; i++) { 410 if ((p == timeline && i == 0) || 411 (p.statePaths[i] != null)) { 412 // We are at the very beginning of the search, or have a usable path to extend 413 // debug(" dc p " + p); 414 for (ViterbiCandidate c = p.cands; 415 c != null; c = c.next) { 416 // For the candidate c, create a path extending the previous path 417 // p.statePaths[i] to that candidate: 418 ViterbiPath np = getPath(p.statePaths[i], c); 419 // Compare this path to the existing best path (if any) leading to 420 // candidate c; only retain the one with the better score. 421 // TODO: why should the paths leading to the candidates realising p 422 // be stored in p.next? 423 addPaths(p.next, np); 424 } 425 } 426 } 427 } else { 428 System.err.println( 429 "Viterbi.decode: general beam search not implemented"); 430 } 431 } 432 } 433 434 435 /** 436 * Try to add paths to the given point. 437 * 438 * @param point the point to add the paths to 439 * @param paths the path 440 */ 441 void addPaths(ViterbiPoint point, ViterbiPath path) { 442 ViterbiPath nextPath; 443 for (ViterbiPath p = path; p != null; p = nextPath) { 444 nextPath = p.next; 445 addPath(point, p); 446 } 447 } 448 449 /** 450 * Add the new path to the state path if it is 451 * better than the current path. In this, state means 452 * the position of the candidate associated with this 453 * path in the candidate queue 454 * for the corresponding segment item. In other words, 455 * this method uses newPath as the one path leading to 456 * the candidate newPath.candidate, if it has a better 457 * score than the previously best path leading to that candidate. 458 * 459 * @param point where the path is added 460 * @param newPath the path to add if its score is best 461 */ 462 void addPath(ViterbiPoint point, ViterbiPath newPath) { 463 if (point.statePaths[newPath.state] == null) { 464 // we don't have one yet, so this is best 465 point.statePaths[newPath.state] = newPath; 466 } else if (isBetterThan(newPath.score, 467 point.statePaths[newPath.state].score)) { 468 // its better than what we already have 469 point.statePaths[newPath.state] = newPath; 470 } else { 471 // its not better that what we already have 472 // so we just forget about it. 473 } 474 } 475 476 /** 477 * See if a is better than b. Goodness is defined 478 * by 'bigIsGood'. 479 * 480 * @param a value to check 481 * @param b value to check. 482 * 483 * return true if a is better than b. 484 */ 485 private boolean isBetterThan(int a, int b) { 486 if (bigIsGood) { 487 return a > b; 488 } else { 489 return a < b; 490 } 491 } 492 493 /** 494 * Find the best path through the decoder, adding the feature 495 * name to the candidate. 496 * 497 * @param feature the feature to add 498 * @return true if a best path was found 499 */ 500 boolean result(String feature) { 501 ViterbiPath path; 502 503 if (timeline == null || timeline.next == null) { 504 return true; // null case succeeds 505 } 506 path = findBestPath(); 507 508 if (path == null) { 509 return false; 510 } 511 512 for (; path != null; path = path.from) { 513 if (path.candidate != null) { 514 path.candidate.item.getFeatures().setObject(feature, 515 path.candidate.value); 516 } 517 } 518 return true; 519 } 520 521 /** 522 * Given a feature, copy the value associated with feature 523 * name from the path to each item in the path. 524 * 525 * @param feature the name of the feature. 526 */ 527 void copyFeature(String feature) { 528 ViterbiPath path = findBestPath(); 529 if (path == null) { 530 return; // nothing to copy, empty stream or no solution 531 } 532 533 for (; path != null; path = path.from) { 534 if (path.candidate != null && path.isPresent(feature)) { 535 path.candidate.item.getFeatures().setObject(feature, 536 path.getFeature(feature)); 537 } 538 } 539 } 540 541 /** 542 * Finds the best (queue of) candidate(s) for a given (segment) item. 543 * This traverses a CART tree for target cluster selection as described in 544 * the paper introducing the clunits algorithm. This corresponds to the 545 * "target costs" described for general unit selection. 546 * @return the first candidate in the queue of candidate units for this item. 547 */ 548 private ViterbiCandidate getCandidate(Item item) { 549 // TODO: This should better be called getCandidates() (plural form), 550 // because what it does is find all the candidates for the item 551 // and return the head of the queue. 552 String unitType = item.getFeatures().getString("clunit_name"); 553 CART cart = clunitDB.getTree(unitType); 554 // Here, the unit candidates are selected. 555 int[] clist = (int[]) cart.interpret(item); 556 // Now, clist is an array of instance numbers for the units of type 557 // unitType that belong to the best cluster according to the CART. 558 559 ViterbiCandidate p; 560 ViterbiCandidate all; 561 ViterbiCandidate gt; 562 563 all = null; 564 for (int i = 0; i < clist.length; i++) { 565 p = new ViterbiCandidate(); 566 p.next = all; // link them reversely: the first in clist will be at the end of the queue 567 p.item = item; // The item is the same for all these candidates in the queue. 568 p.score = 0; 569 // remember the absolute unit index: 570 p.setInt(clunitDB.getUnitIndex(unitType, clist[i])); 571 all = p; 572 // this is OK 573 if (LOGGER.isLoggable(Level.FINE)) { 574 LOGGER.fine(" gc adding " + clist[i]); 575 } 576 } 577 578 // Take into account candidates for previous item? 579 // Depending on the setting of EXTEND_SELECTIONS in the database, 580 // look the first candidates for the preceding item, 581 // and add the units following these (which are not yet candidates) 582 // as candidates. EXTEND_SELECTIONS indicates how many of these 583 // are added. A high setting will add candidates which don't fit the 584 // target well, but which can be smoothly concatenated with the context. 585 // In a sense, this means trading target costs against join costs. 586 if (clunitDB.getExtendSelections() > 0 && 587 item.getPrevious() != null) { 588 // Get the candidates for the preceding (segment) item 589 ViterbiCandidate lc = (ViterbiCandidate) (item. 590 getPrevious().getFeatures().getObject("clunit_cands")); 591 if (LOGGER.isLoggable(Level.FINE)) { 592 LOGGER.fine(" lc " + lc); 593 } 594 for (int e = 0; lc != null && 595 (e < clunitDB.getExtendSelections()); 596 lc = lc.next) { 597 int nu = clunitDB.getNextUnit(lc.ival); 598 if (LOGGER.isLoggable(Level.FINE)) { 599 LOGGER.fine(" e: " + e + " nu: " + nu); 600 } 601 if (nu == ClusterUnitDatabase.CLUNIT_NONE) { 602 continue; 603 } 604 605 // Look through the list of candidates for the current item: 606 for (gt = all; gt != null; gt = gt.next) { 607 if (LOGGER.isLoggable(Level.FINE)) { 608 LOGGER.fine(" gt " + gt.ival + " nu " + nu); 609 } 610 if (nu == gt.ival) { 611 // The unit following one of the candidates for the preceding 612 // item already is a candidate for the current item. 613 break; 614 } 615 } 616 617 if (LOGGER.isLoggable(Level.FINE)) { 618 LOGGER.fine("nu " + clunitDB.getUnit(nu).getName() + " all " + 619 clunitDB.getUnit(all.ival).getName() + 620 " " + all.ival); 621 } 622 if ((gt == null)&&clunitDB.isUnitTypeEqual(nu, all.ival)) { 623 // nu is of the right unit type and is not yet one of the candidates. 624 // add it to the queue of candidates for the current item: 625 p = new ViterbiCandidate(); 626 p.next = all; 627 p.item = item; 628 p.score = 0; 629 p.setInt(nu); 630 all = p; 631 e++; 632 } 633 } 634 } 635 item.getFeatures().setObject("clunit_cands", all); 636 return all; 637 } 638 639 /** 640 * Construct a new path element linking a previous path to the given candidate. 641 * The (penalty) score associated with the new path is calculated as the sum of 642 * the score of the old path plus the score of the candidate itself plus the 643 * join cost of appending the candidate to the nearest candidate in the given path. 644 * This join cost takes into account optimal coupling if the database has 645 * OPTIMAL_COUPLING set to 1. The join position is saved in the new path, as 646 * the features "unit_prev_move" and "unit_this_move". 647 * 648 * @param path the previous path, or null if this candidate starts a new path 649 * @param candiate the candidate to add to the path 650 * 651 * @return a new path, consisting of this candidate appended to the previous path, and 652 * with the cumulative (penalty) score calculated. 653 */ 654 private ViterbiPath getPath(ViterbiPath path, 655 ViterbiCandidate candidate) { 656 int cost; 657 ViterbiPath newPath = new ViterbiPath(); 658 659 newPath.candidate = candidate; 660 newPath.from = path; 661 // 662 // Flite 1.1 has some logic here to test to see 663 // if the unit database is fully populated or not and if not 664 // load fixed residuals and calculate distance with a 665 // different distance algorithm that is designed for fixed 666 // point. FreeTTS doesn't really need to do that. 667 // 668 669 if (path == null || path.candidate == null) { 670 cost = 0; 671 } else { 672 int u0 = path.candidate.ival; 673 int u1 = candidate.ival; 674 if (clunitDB.getOptimalCoupling() == 1) { 675 Cost oCost = getOptimalCouple(u0, u1); 676 if (oCost.u0Move != -1) { 677 newPath.setFeature("unit_prev_move", new 678 Integer(oCost.u0Move)); 679 } 680 if (oCost.u1Move != -1) { 681 newPath.setFeature("unit_this_move", new 682 Integer(oCost.u1Move)); 683 } 684 cost = oCost.cost; 685 } else if (clunitDB.getOptimalCoupling() == 2) { 686 cost = getOptimalCoupleFrame(u0, u1); 687 } else { 688 cost = 0; 689 } 690 } 691 692 // cost *= clunitDB.getContinuityWeight(); 693 cost *= 5; // magic number ("continuity weight") from flite 694 // TODO: remove the state attribute from ViterbiPath, as it is simply path.candidate.pos! 695 newPath.state = candidate.pos; 696 if (path == null) { 697 newPath.score = cost + candidate.score; 698 } else { 699 newPath.score = cost + candidate.score + path.score; 700 } 701 702 return newPath; 703 } 704 705 /** 706 * Find the best path. This requires decode() to have been run. 707 * 708 * @return the best path. 709 */ 710 private ViterbiPath findBestPath() { 711 ViterbiPoint t; 712 int best; 713 int worst; 714 ViterbiPath bestPath = null; 715 716 if (bigIsGood) { 717 worst = Integer.MIN_VALUE; 718 } else { 719 worst = Integer.MAX_VALUE; 720 } 721 722 best = worst; 723 724 // TODO: do not need t, can use lastPoint throughout 725 t = lastPoint; 726 727 if (numStates != 0) { 728 if (LOGGER.isLoggable(Level.FINE)) { 729 LOGGER.fine("fbp ns " + numStates + " t " 730 + t.numStates + " best " + best); 731 } 732 // All paths end in lastPoint, and take into account 733 // previous path segment's scores. Therefore, it is 734 // sufficient to find the best path from among the 735 // paths for lastPoint. 736 for (int i = 0; i < t.numStates; i++) { 737 if (t.statePaths[i] != null && 738 (isBetterThan(t.statePaths[i].score, best))) { 739 best = t.statePaths[i].score; 740 bestPath = t.statePaths[i]; 741 } 742 } 743 } 744 return bestPath; 745 } 746 747 /** 748 * Find the optimal coupling frame for a pair of units. 749 * 750 * @param u0 first unit to try 751 * @param u1 second unit to try 752 * 753 * @return the cost for this coupling, including the best coupling frame 754 */ 755 Cost getOptimalCouple(int u0, int u1) { 756 int a,b; 757 int u1_p; 758 int i, fcount; 759 int u0_st, u1_p_st, u0_end, u1_p_end; 760 int best_u0, best_u1_p; 761 int dist, best_val; 762 Cost cost = new Cost(); 763 764 u1_p = clunitDB.getPrevUnit(u1); 765 766 // If u0 precedes u1, the cost is 0, and we're finished. 767 if (u1_p == u0) { 768 return cost; 769 } 770 771 772 // If u1 does not have a previous unit, or that previous 773 // unit does not belong to the same phone, the optimal 774 // couple frame must be found between u0 and u1. 775 if (u1_p == ClusterUnitDatabase.CLUNIT_NONE || 776 clunitDB.getPhone(u0) != 777 clunitDB.getPhone(u1_p)) { 778 cost.cost = 10 * getOptimalCoupleFrame(u0, u1); 779 return cost; 780 } 781 782 // If u1 has a valid previous unit, try to find the optimal 783 // couple point between u0 and that previous unit, u1_p. 784 785 // Find out which of u1_p and u0 is shorter. 786 // In both units, we plan to start from one third of the unit length, 787 // and to compare frame coupling frame by frame until the end of the 788 // shorter unit is reached. 789 u0_end = clunitDB.getEnd(u0) - clunitDB.getStart(u0); 790 u1_p_end = clunitDB.getEnd(u1_p) - clunitDB.getStart(u1_p); 791 u0_st = u0_end / 3; 792 u1_p_st = u1_p_end / 3; 793 794 if ((u0_end - u0_st) < (u1_p_end - u1_p_st)) { 795 fcount = u0_end - u0_st; 796 // We could now shift the starting point for coupling in the longer unit 797 // so that the distance from the end is the same in both units: 798 /* u1_p_st = u1_p_end - fcount; */ 799 } else { 800 fcount = u1_p_end - u1_p_st; 801 // We could now shift the starting point for coupling in the longer unit 802 // so that the distance from the end is the same in both units: 803 /* u0_st = u0_end - fcount; */ 804 } 805 806 // Now go through the two units, and search for the frame pair where 807 // the acoustic distance is smallest. 808 best_u0 = u0_end; 809 best_u1_p = u1_p_end; 810 best_val = Integer.MAX_VALUE; 811 812 for (i = 0; i < fcount; ++i) { 813 a = clunitDB.getStart(u0)+ u0_st + i; 814 b = clunitDB.getStart(u1_p) + u1_p_st + i; 815 dist = getFrameDistance(a, b, 816 clunitDB.getJoinWeights(), 817 clunitDB.getMcep().getSampleInfo().getNumberOfChannels()) 818 + Math.abs( clunitDB.getSts().getFrameSize(a) - 819 clunitDB.getSts().getFrameSize(b)) * 820 clunitDB.getContinuityWeight(); 821 822 if (dist < best_val) { 823 best_val = dist; 824 best_u0 = u0_st + i; 825 best_u1_p = u1_p_st + i; 826 } 827 } 828 829 // u0Move is the new end for u0 830 // u1Move is the new start for u1 831 cost.u0Move = clunitDB.getStart(u0) + best_u0; 832 cost.u1Move = clunitDB.getStart(u1_p) + best_u1_p; 833 cost.cost = 30000 + best_val; 834 return cost; 835 } 836 837 /** 838 * Returns the distance between the successive potential 839 * frames. 840 * 841 * @param u0 the first unit to try 842 * @param u1 the second unit to try 843 * 844 * @return the distance between the two units 845 */ 846 int getOptimalCoupleFrame(int u0, int u1) { 847 int a, b; 848 849 if (clunitDB.getPrevUnit(u1) == u0) { 850 return 0; // consecutive units win 851 } 852 853 if (clunitDB.getNextUnit(u0) != ClusterUnitDatabase.CLUNIT_NONE) { 854 a = clunitDB.getEnd(u0); 855 } else { // don't want to do this but it's all that is left to do 856 a = clunitDB.getEnd(u0) - 1; // if num frames < 1 this is bad 857 } 858 b = clunitDB.getStart(u1); 859 860 return getFrameDistance(a, b, 861 clunitDB.getJoinWeights(), 862 clunitDB.getMcep().getSampleInfo().getNumberOfChannels()) 863 + Math.abs( clunitDB.getSts().getFrameSize(a) - 864 clunitDB.getSts().getFrameSize(b)) * 865 clunitDB.getContinuityWeight(); 866 } 867 868 /** 869 * Get the 'distance' between the frames a and b. 870 * 871 * @param a first frame 872 * @param b second frame 873 * @param joinWeights the weights used in comparison 874 * @param order number of compares 875 * 876 * @return the distance between the frames 877 */ 878 public int getFrameDistance(int a, int b, int[] joinWeights,int order) { 879 880 if (LOGGER.isLoggable(Level.FINE)) { 881 LOGGER.fine(" gfd a " + a + " b " + b + " or " + order); 882 } 883 int r, i; 884 short[] bv = clunitDB.getMcep().getSample(b).getFrameData(); 885 short[] av = clunitDB.getMcep().getSample(a).getFrameData(); 886 887 for (r = 0, i = 0; i < order; i++) { 888 int diff = av[i] - bv[i]; 889 r += Math.abs(diff) * joinWeights[i] / 65536; 890 } 891 return r; 892 } 893 894 } 895 896 897 /** 898 * Represents a point in the Viterbi path. A point corresponds to an item, 899 * e.g. a Segment. 900 * Each ViterbiPoint knows 901 * about its next ViterbiPoint, i.e. they can form a queue. 902 */ 903 static class ViterbiPoint { 904 Item item = null; 905 // TODO: remove the numStates attribute from ViterbiPoint, as this is only statePaths.length 906 int numStates = 0; 907 int numPaths = 0; 908 ViterbiCandidate cands = null; 909 ViterbiPath paths = null; 910 ViterbiPath[] statePaths = null; 911 ViterbiPoint next = null; 912 913 /** 914 * Creates a ViterbiPoint for the given item. A typical item of choice is a Segment item. 915 * 916 * @param item the item of interest 917 */ 918 public ViterbiPoint(Item item) { 919 this.item = item; 920 } 921 922 /** 923 * Initialize the path array to the given size. 924 * 925 * @param size the size of the path array 926 */ 927 public void initPathArray(int size) { 928 if (LOGGER.isLoggable(Level.FINE)) { 929 LOGGER.fine("init_path_array: " + size); 930 } 931 numStates = size; 932 statePaths = new ViterbiPath[size]; 933 } 934 935 /** 936 * Initializes the dynamic path array. The path array will have 937 * as many ViterbiPath members as there are candidates in the 938 * queue starting with candidate. 939 * Side effect on parameter: This will set the pos member of the 940 * candidates in the queue starting with candidate to the position 941 * in the queue. 942 * 943 * @param candidate the first candidate of interest 944 */ 945 public void initDynamicPathArray(ViterbiCandidate candidate) { 946 int i = 0; 947 for (ViterbiCandidate cc = candidate; cc != null; 948 i++, cc = cc.next) { 949 cc.pos = i; 950 } 951 if (LOGGER.isLoggable(Level.FINE)) { 952 LOGGER.fine("init_dynamic_ path_array: " + i); 953 } 954 initPathArray(i); 955 } 956 957 public String toString() { 958 return " pnt: " + numStates + " paths " + numPaths; 959 } 960 } 961 962 /** 963 * Represents a candidate for the Viterbi algorthm. 964 * Each candidate knows about its next candidate, i.e. they can form 965 * a queue. 966 */ 967 static class ViterbiCandidate { 968 int score = 0; 969 Object value = null; 970 int ival = 0; 971 int pos = 0; 972 Item item = null; 973 ViterbiCandidate next = null; 974 975 /** 976 * Sets the object for this candidate. 977 * 978 * @param obj the object 979 */ 980 void set(Object obj) { 981 value = obj; 982 } 983 984 /** 985 * Sets the integer value for this candidate. This can be used for saving 986 * the unit index of the candidate unit represented by this ViterbiCandidate. 987 * 988 * @param ival the integer value 989 */ 990 void setInt(int ival) { 991 this.ival = ival; 992 set(new Integer(ival)); 993 } 994 995 /** 996 * Converts this object to a string. 997 * 998 * @return the string form of this object 999 */ 1000 public String toString() { 1001 return "VC: Score " + score + " ival " + ival + " Pos " + pos; 1002 } 1003 } 1004 1005 /** 1006 * Describes a Viterbi path. 1007 */ 1008 static class ViterbiPath { 1009 int score = 0; 1010 int state = 0; 1011 ViterbiCandidate candidate = null; 1012 private FeatureSet f = null; 1013 ViterbiPath from = null; 1014 ViterbiPath next = null; 1015 1016 /** 1017 * Sets a feature with the given name to the given value. 1018 * 1019 * @param name the name of the feature 1020 * @param value the new value for the feature 1021 */ 1022 void setFeature(String name, Object value) { 1023 if (f == null) { 1024 f = new FeatureSetImpl(); 1025 } 1026 f.setObject(name, value); 1027 } 1028 1029 /** 1030 * Retrieves a feature. 1031 * 1032 * @param name the name of the feature 1033 * 1034 * @return the feature 1035 */ 1036 Object getFeature(String name) { 1037 Object value = null; 1038 if (f != null) { 1039 value = f.getObject(name); 1040 } 1041 return value; 1042 } 1043 1044 /** 1045 * Determines if the feature with the given name 1046 * exsists. 1047 * 1048 * @param name the feature to look for 1049 * 1050 * @return <code>true</code> if the feature is present; 1051 * otherwise <code>false</code>. 1052 */ 1053 boolean isPresent(String name) { 1054 if (f == null) { 1055 return false; 1056 } else { 1057 return getFeature(name) != null; 1058 } 1059 } 1060 1061 /** 1062 * Converts this object to a string. 1063 * 1064 * @return the string form of this object 1065 */ 1066 public String toString() { 1067 return "ViterbiPath score " + score + " state " + state; 1068 } 1069 } 1070} 1071 1072 1073/** 1074 * Information returned from getOptimalCoupling. 1075 */ 1076class Cost { 1077 int cost = 0; 1078 int u0Move = -1; 1079 int u1Move = -1; 1080} 1081 1082 1083/** 1084 * A Cluster Unit. 1085 */ 1086class ClusterUnit implements com.sun.speech.freetts.Unit { 1087 1088 private ClusterUnitDatabase db; 1089 private String name; 1090 private int start; 1091 private int end; 1092 1093 /** 1094 * Contructs a cluster unit given. 1095 * 1096 * @param db the database 1097 * @param name the unitName 1098 * @param start the start 1099 * @param end the end 1100 */ 1101 public ClusterUnit(ClusterUnitDatabase db, String name, int start,int end) { 1102 this.db = db; 1103 this.start = start; 1104 this.end = end; 1105 this.name = name; 1106 } 1107 1108 1109 /** 1110 * Returns the start. 1111 * 1112 * @return the start 1113 */ 1114 public int getStart() { 1115 return start; 1116 } 1117 1118 /** 1119 * Returns the end. 1120 * 1121 * @return the end 1122 */ 1123 public int getEnd() { 1124 return end; 1125 } 1126 1127 /** 1128 * Returns the name of this Unit. 1129 * 1130 * @return the name of this unit 1131 */ 1132 public String getName() { 1133 return name; 1134 } 1135 1136 /** 1137 * returns the size of the unit. 1138 * 1139 * @return the size of the unit 1140 */ 1141 public int getSize() { 1142 return db.getSts().getUnitSize(start, end); 1143 } 1144 1145 /** 1146 * Retrieves the nearest sample. 1147 * 1148 * @param index the ideal index 1149 * 1150 * @return the nearest Sample 1151 */ 1152 public Sample getNearestSample(float index) { 1153 int i, iSize = 0, nSize; 1154 SampleSet sts = db.getSts(); 1155 1156 // loop through all the Samples in this unit 1157 for (i = start; i < end; i++) { 1158 Sample sample = sts.getSample(i); 1159 nSize = iSize + sample.getResidualSize(); 1160 1161 if (Math.abs(index - (float) iSize) < 1162 Math.abs(index - (float) nSize)) { 1163 return sample; 1164 } 1165 iSize = nSize; 1166 } 1167 return sts.getSample(end - 1); 1168 } 1169 1170 /** 1171 * gets the string name for the unit. 1172 * 1173 * @return string rep of this object. 1174 */ 1175 public String toString() { 1176 return getName(); 1177 } 1178 1179 1180 /** 1181 * Dumps this unit. 1182 */ 1183 public void dump() { 1184 } 1185} 1186