001/** 002 * Portions Copyright 2001 Sun Microsystems, Inc. 003 * Portions Copyright 1999-2001 Language Technologies Institute, 004 * Carnegie Mellon University. 005 * All Rights Reserved. Use is subject to license terms. 006 * 007 * See the file "license.terms" for information on usage and 008 * redistribution of this file, and for a DISCLAIMER OF ALL 009 * WARRANTIES. 010 */ 011package com.sun.speech.freetts.relp; 012 013import java.io.BufferedWriter; 014import java.io.FileWriter; 015import java.io.IOException; 016import java.io.OutputStreamWriter; 017import java.io.PrintWriter; 018import java.io.Writer; 019import java.text.DecimalFormat; 020 021import javax.sound.sampled.AudioFormat; 022 023import com.sun.speech.freetts.FreeTTSSpeakable; 024import com.sun.speech.freetts.Utterance; 025import com.sun.speech.freetts.audio.AudioPlayer; 026import com.sun.speech.freetts.util.Utilities; 027import com.sun.speech.freetts.util.WaveUtils; 028 029 030/** 031 * Contains the result of linear predictive coding processing. 032 * 033 */ 034public class LPCResult { 035 036 private static final double POST_EMPHASIS = 0.0; 037 038 private int frameSize = 10; 039 private int numberOfFrames = 0; 040 041 private short[][] frames = null; 042 private int[] times = null; 043 private int[] sizes = null; 044 045 /** 046 * this is a normalized version of the residuals; to normalize it, 047 * add 128 to it 048 */ 049 private byte[] residuals = null; 050 051 private int numberOfChannels; 052 private int sampleRate; 053 private int residualFold; 054 055 private float lpcMinimum; 056 private float lpcRange; 057 058 private final static int MAX_SAMPLE_SIZE = 059 Utilities.getInteger("com.sun.speech.freetts.LpcResult.maxSamples", 060 1024).intValue(); 061 062 /** 063 * Given a residual, maps it using WaveUtils.ulawToShort() to a float. 064 */ 065 private final static float[] residualToFloatMap = new float[256]; 066 067 static { 068 for (short i = 0; i < residualToFloatMap.length; i++) { 069 residualToFloatMap[i] = (float) WaveUtils.ulawToShort(i); 070 } 071 residualToFloatMap[128] = (float) WaveUtils.ulawToShort((short) 255); 072 } 073 074 075 public LPCResult() { 076 residualFold = 1; 077 } 078 079 /** 080 * Resets the number of frames in this LPCResult. 081 * 082 * @param numberOfFrames the number of frames in this LPC result 083 */ 084 public void resizeFrames(int numberOfFrames) { 085 times = new int[numberOfFrames]; 086 frames = new short[numberOfFrames][]; 087 sizes = new int[numberOfFrames]; 088 this.numberOfFrames = numberOfFrames; 089 } 090 091 /** 092 * Resets the number of residuals, and initialize all of them to 255 093 * (which is 0 for mulaw). 094 * 095 * @param numberOfSamples the number of samples in this LPC result 096 */ 097 public void resizeResiduals(int numberOfSamples) { 098 residuals = new byte[numberOfSamples]; 099 } 100 101 /** 102 * A convenience method for setting the LPC values. 103 * 104 * @param numberOfChannels the number of channels 105 * @param sampleRate the sample rate 106 * @param lpcMin the LPC minimum 107 * @param lpcRange the LPC range 108 */ 109 public void setValues(int numberOfChannels, 110 int sampleRate, 111 int residualFold, 112 float lpcMin, float lpcRange) { 113 this.numberOfChannels = numberOfChannels; 114 this.sampleRate = sampleRate; 115 this.lpcMinimum = lpcMin; 116 this.lpcRange = lpcRange; 117 } 118 119 /** 120 * Returns the time difference of the frame at the given position 121 * with the frame prior to that. If the frame at the given position is 122 * the first frame (position 0), the time of that frame is returned. 123 * 124 * @param frameIndex the position of the frame 125 * 126 * @return the time difference of the frame at the given position 127 * with the frame prior to that 128 */ 129 public int getFrameShift(int frameIndex) { 130 if (0 <= frameIndex && frameIndex < times.length) { 131 if (frameIndex > 0) { 132 return times[frameIndex] - times[frameIndex - 1]; 133 } else { 134 return times[frameIndex]; 135 } 136 } else { 137 return 0; 138 } 139 } 140 141 /** 142 * Returns the sizes of frames in this LPC. 143 * 144 * @return the sizes of frames 145 */ 146 public int getFrameSize() { 147 return frameSize; 148 } 149 150 /** 151 * Returns the frame at the given index. 152 * 153 * @param index the index of interest 154 * 155 * @return the frame at the given index 156 */ 157 public short[] getFrame(int index) { 158 return frames[index]; 159 } 160 161 /** 162 * Returns the array of times. 163 * 164 * @return the array of times 165 */ 166 public int[] getTimes() { 167 return times; 168 } 169 170 /** 171 * Returns the number of frames in this LPCResult. 172 * 173 * @return the number of frames 174 */ 175 public int getNumberOfFrames() { 176 return numberOfFrames; 177 } 178 179 /** 180 * Returns the number of channels in this LPCResult. 181 * 182 * @return the number of channels 183 */ 184 public int getNumberOfChannels() { 185 return numberOfChannels; 186 } 187 188 /** 189 * Returns the LPC minimum. 190 * 191 * @return the LPC minimum 192 */ 193 public float getLPCMin() { 194 return lpcMinimum; 195 } 196 197 /** 198 * Returns the LPC range. 199 * 200 * @return the LPC range 201 */ 202 public float getLPCRange() { 203 return lpcRange; 204 } 205 206 /** 207 * Returns the number of samples in this LPC result 208 * 209 * @return the number of samples 210 */ 211 public int getNumberOfSamples() { 212 if (residuals == null) { 213 return 0; 214 } else { 215 return residuals.length; 216 } 217 } 218 219 /** 220 * Returns the sample rate. 221 * 222 * @return the sample rate 223 */ 224 public int getSampleRate() { 225 return sampleRate; 226 } 227 228 /** 229 * Returns the array of residuals sizes. 230 * 231 * @return the array of residuals sizes 232 */ 233 public int[] getResidualSizes() { 234 return sizes; 235 } 236 237 /** 238 * Returns the array of residuals. 239 * 240 * @return the array of residuals 241 */ 242 public byte[] getResiduals() { 243 return residuals; 244 } 245 246 /** 247 * Sets the sizes of frames in this LPC to the given size. 248 * 249 * @param frameSize the new frame size 250 */ 251 public void setFrameSize(int frameSize) { 252 this.frameSize = frameSize; 253 } 254 255 /** 256 * Sets the number of frames in this LPC Result. 257 * 258 * @param numberFrames the number of frames in this result 259 */ 260 public void setNumberOfFrames(int numberFrames) { 261 this.numberOfFrames = numberFrames; 262 } 263 264 /** 265 * Sets the frame at the given index. 266 * 267 * @param index the position of the frame to set 268 * @param newFrames new frame data 269 */ 270 public void setFrame(int index, short[] newFrames) { 271 frames[index] = newFrames; 272 } 273 274 /** 275 * Sets the array of times. 276 * 277 * @param times the times data 278 */ 279 public void setTimes(int[] times) { 280 this.times = times; 281 } 282 283 /** 284 * Sets the number of channels. 285 * 286 * @param numberOfChannels the number of channels 287 */ 288 public void setNumberOfChannels(int numberOfChannels) { 289 this.numberOfChannels = numberOfChannels; 290 } 291 292 /** 293 * Sets the LPC minimum. 294 * 295 * @param min the LPC minimum 296 */ 297 public void setLPCMin(float min) { 298 this.lpcMinimum = min; 299 } 300 301 /** 302 * Sets the LPC range. 303 * 304 * @param range the LPC range 305 */ 306 public void setLPCRange(float range) { 307 this.lpcRange = range; 308 } 309 310 /** 311 * Sets the sample rate. 312 * 313 * @param rate the sample rate 314 */ 315 public void setSampleRate(int rate) { 316 this.sampleRate = rate; 317 } 318 319 /** 320 * Sets the array of residual sizes. 321 * 322 * @param sizes the new residual sizes 323 */ 324 public void setResidualSizes(int[] sizes) { 325 for (int i = 0; i < this.sizes.length && i < sizes.length; i++) { 326 this.sizes[i] = sizes[i]; 327 } 328 } 329 330 /** 331 * Copies the information in the given unit to the array of residuals, 332 * starting at the given index, up until targetSize chars. 333 * 334 * @param source the unit that holds the information source 335 * @param targetPosition start position in the array of residuals 336 * @param targetSize the maximum number of characters to copy 337 */ 338 public void copyResiduals(byte[] source, 339 int targetPosition, 340 int targetSize) { 341 int unitSize = source.length; 342 if (unitSize < targetSize) { 343 int targetStart = (targetSize - unitSize)/2; 344 System.arraycopy(source, 0, 345 residuals, targetPosition + targetStart, 346 source.length); 347 } else { 348 int sourcePosition = (unitSize - targetSize)/2; 349 System.arraycopy(source, sourcePosition, 350 residuals, targetPosition, 351 targetSize); 352 } 353 } 354 355 /** 356 * Copies the residual puse in the given unit to the array of residuals, 357 * starting at the given index, up until targetSize chars. 358 * 359 * @param source the unit that holds the information source 360 * @param targetPosition start position in the array of residuals 361 * @param targetSize the maximum number of characters to copy 362 */ 363 public void copyResidualsPulse(byte[] source, 364 int targetPosition, int targetSize) { 365 int unitSize = source.length; 366 short sample = (short) (source[0] + 128); 367 if (unitSize < targetSize) { 368 residuals[(targetSize-unitSize)/2] = WaveUtils.shortToUlaw(sample); 369 } else { 370 residuals[(unitSize-targetSize)/2] = WaveUtils.shortToUlaw(sample); 371 } 372 } 373 374 /** 375 * Given a 16 bit value (represented as an int), extract 376 * the high eight bits and return them 377 * 378 * @param val the 16 bit value 379 * 380 * @return the high eight bits 381 */ 382 private final static byte hibyte(int val) { 383 return (byte) (val >>> 8); 384 } 385 386 /** 387 * Given a 16 bit value (represented as an int), extract 388 * the low eight bits and return them 389 * 390 * @param val the 16 bit value 391 * 392 * @return the low eight bits 393 */ 394 private final static byte lobyte(int val) { 395 return (byte) (val & 0x000000FF); 396 } 397 398 399 /** 400 * Synthesize a Wave from this LPCResult 401 * 402 * @return the wave 403 * @exception IOException 404 * if an error occurs while writing the audio data 405 */ 406 public boolean playWave(AudioPlayer player, Utterance utterance) 407 throws IOException { 408 return playWaveSamples(player, utterance.getSpeakable(), 409 getNumberOfSamples() * 2); 410 } 411 412 413 public byte[] getWaveSamples() 414 { 415 return getWaveSamples(2*getNumberOfSamples(), null); 416 } 417 418 /** 419 * get the samples for this utterance 420 * 421 * @param numberSamples the number of samples desirred 422 * @param utterance the utterance 423 * 424 * [[[ TODO: well there is a bunch of duplicated code here .. 425 * these should be combined into one routine. 426 * ]]] 427 */ 428 private byte[] getWaveSamples(int numberSamples, 429 Utterance utterance) { 430 int numberChannels = getNumberOfChannels(); 431 int pmSizeSamples; 432 float pp = 0; 433 434 byte[] samples = new byte[numberSamples]; 435 byte[] residuals = getResiduals(); 436 int[] residualSizes = getResidualSizes(); 437 438 FloatList outBuffer = FloatList.createList(numberChannels + 1); 439 FloatList lpcCoefficients = FloatList.createList(numberChannels); 440 441 double multiplier = (double) getLPCRange() / 65535.0; 442 int s = 0; 443 444 // for each frame in the LPC result 445 for (int r = 0, i = 0; i < numberOfFrames; i++) { 446 447 // unpack the LPC coefficients 448 short[] frame = getFrame(i); 449 450 FloatList lpcCoeffs = lpcCoefficients; 451 for (int k = 0; k < numberChannels; k++) { 452 lpcCoeffs.value = (float) ( (frame[k] + 32768.0) 453 * multiplier) + lpcMinimum; 454 lpcCoeffs = lpcCoeffs.next; 455 } 456 457 pmSizeSamples = residualSizes[i]; 458 459 // resynthesis the signal, pmSizeSamples ~= 90 460 // what's in the loop is done for each residual 461 for (int j = 0; j < pmSizeSamples; j++, r++) { 462 463 FloatList backBuffer = outBuffer.prev; 464 float ob = residualToFloatMap[residuals[r] + 128]; 465 466 lpcCoeffs = lpcCoefficients; 467 do { 468 ob += lpcCoeffs.value * backBuffer.value; 469 backBuffer = backBuffer.prev; 470 lpcCoeffs = lpcCoeffs.next; 471 } while (lpcCoeffs != lpcCoefficients); 472 473 int sample = (int) (ob + (pp * POST_EMPHASIS)); 474 samples[s++] = (byte) hibyte(sample); 475 samples[s++] = (byte) lobyte(sample); 476 477 478 outBuffer.value = pp = ob; 479 outBuffer = outBuffer.next; 480 } 481 } 482 return samples; 483 } 484 485 /** 486 * Play the sample data on the given player 487 * 488 * @param player where to send the audio 489 * @param numberSamples the number of samples 490 * @exception IOException 491 * if an error occurs while writing the audio data 492 */ 493 private boolean playWaveSamples(AudioPlayer player, 494 FreeTTSSpeakable speakable, 495 int numberSamples) throws IOException { 496 boolean ok = true; 497 int numberChannels = getNumberOfChannels(); 498 int pmSizeSamples; 499 float pp = 0; 500 501 byte[] samples = new byte[MAX_SAMPLE_SIZE]; 502 byte[] residuals = getResiduals(); 503 int[] residualSizes = getResidualSizes(); 504 505 FloatList outBuffer = FloatList.createList(numberChannels + 1); 506 FloatList lpcCoefficients = FloatList.createList(numberChannels); 507 508 double multiplier = (double) getLPCRange() / 65535.0; 509 int s = 0; 510 511 // for each frame in the LPC result 512 player.begin(numberSamples); 513 for (int r = 0, i = 0; 514 (ok &= !speakable.isCompleted()) && 515 i < numberOfFrames; i++) { 516 517 // unpack the LPC coefficients 518 short[] frame = getFrame(i); 519 520 FloatList lpcCoeffs = lpcCoefficients; 521 for (int k = 0; k < numberChannels; k++) { 522 lpcCoeffs.value = (float) ( (frame[k] + 32768.0) 523 * multiplier) + lpcMinimum; 524 lpcCoeffs = lpcCoeffs.next; 525 } 526 527 pmSizeSamples = residualSizes[i]; 528 529 // resynthesis the signal, pmSizeSamples ~= 90 530 // what's in the loop is done for each residual 531 for (int j = 0; j < pmSizeSamples; j++, r++) { 532 533 FloatList backBuffer = outBuffer.prev; 534 float ob = residualToFloatMap[residuals[r] + 128]; 535 536 lpcCoeffs = lpcCoefficients; 537 do { 538 ob += lpcCoeffs.value * backBuffer.value; 539 backBuffer = backBuffer.prev; 540 lpcCoeffs = lpcCoeffs.next; 541 } while (lpcCoeffs != lpcCoefficients); 542 543 int sample = (int) (ob + (pp * POST_EMPHASIS)); 544 samples[s++] = hibyte(sample); 545 samples[s++] = lobyte(sample); 546 547 if (s >= MAX_SAMPLE_SIZE) { 548 if ((ok &= !speakable.isCompleted()) && 549 !player.write(samples)) { 550 ok = false; 551 } 552 s = 0; 553 } 554 555 outBuffer.value = pp = ob; 556 outBuffer = outBuffer.next; 557 } 558 } 559 560 // write out the very last samples 561 if ((ok &= !speakable.isCompleted()) && s > 0) { 562 ok = player.write(samples, 0, s); 563 s = 0; 564 } 565 566 // tell the AudioPlayer it is the end of Utterance 567 if (ok &= !speakable.isCompleted()) { 568 ok = player.end(); 569 } 570 571 return ok; 572 } 573 574 /** 575 * Dumps this LPCResult to standard out 576 */ 577 public void dump() { 578 dump(new OutputStreamWriter(System.out)); 579 } 580 581 /** 582 * Dumps this LPCResult to the given stream. 583 * 584 * @param writer the output stream 585 */ 586 public void dump(Writer writer) { 587 DecimalFormat numberFormat = new DecimalFormat(); 588 numberFormat.setMaximumFractionDigits(6); 589 numberFormat.setMinimumFractionDigits(6); 590 PrintWriter pw = new PrintWriter(new BufferedWriter(writer)); 591 592 if (getNumberOfFrames() == 0) { 593 pw.println("# ========== LPCResult =========="); 594 pw.println("# Num_of_Frames: " + getNumberOfFrames()); 595 pw.flush(); 596 return; 597 } 598 pw.println("========== LPCResult =========="); 599 pw.println("Num_of_Frames: " + getNumberOfFrames()); 600 pw.println("Num_of_Channels: " + getNumberOfChannels()); 601 pw.println("Num_of_Samples: " + getNumberOfSamples()); 602 pw.println("Sample_Rate: " + sampleRate); 603 pw.println("LPC_Minimum: " + numberFormat.format(lpcMinimum)); 604 pw.println("LPC_Range: " + numberFormat.format(lpcRange)); 605 pw.println("Residual_Fold: " + residualFold); 606 pw.println("Post_Emphasis: " + numberFormat.format(POST_EMPHASIS)); 607 608 int i; 609 pw.print("Times:\n"); 610 for (i = 0; i < getNumberOfFrames(); i++) { 611 pw.print(times[i] + " "); 612 } 613 pw.print("\nFrames: "); 614 for (i = 0; i < getNumberOfFrames(); i++) { 615 // for each frame, print all elements 616 short[] frame = getFrame(i); 617 for (int j = 0; j < frame.length; j++) { 618 pw.print(( ((int) frame[j]) + 32768) + "\n"); 619 } 620 } 621 pw.print("\nSizes: "); 622 for (i = 0; i < getNumberOfFrames(); i++) { 623 pw.print(sizes[i] + " "); 624 } 625 pw.print("\nResiduals: "); 626 for (i = 0; i < getNumberOfSamples(); i++) { 627 if (residuals[i] == 0) { 628 pw.print(255); 629 } else { 630 pw.print(( ((int) residuals[i]) + 128)); 631 } 632 pw.print("\n"); 633 pw.flush(); 634 } 635 pw.flush(); 636 } 637 638 639 /** 640 * Dumps the wave data associated with this result 641 */ 642 public void dumpASCII() { 643 dumpASCII(new OutputStreamWriter(System.out)); 644 } 645 646 /** 647 * Dumps the wave data associated with this result 648 * 649 * @param path the path where the wave data is appended to 650 * 651 * @throws IOException if an IO error occurs 652 */ 653 public void dumpASCII(String path) throws IOException { 654 Writer writer = new FileWriter(path, true); 655 getWave().dump(writer); 656 } 657 658 /** 659 * Synthesize a Wave from this LPCResult 660 * 661 * @return the wave 662 */ 663 private Wave getWave() { 664 // construct a new wave object 665 AudioFormat audioFormat = new AudioFormat 666 (getSampleRate(), 667 Wave.DEFAULT_SAMPLE_SIZE_IN_BITS, 1, 668 Wave.DEFAULT_SIGNED, true); 669 return new Wave(audioFormat, 670 getWaveSamples( getNumberOfSamples() * 2, null)); 671 } 672 673 /** 674 * Dumps the wave out to the given stream 675 * 676 * @param writer the output stream 677 */ 678 public void dumpASCII(Writer writer) { 679 Wave wave = getWave(); 680 wave.dump(writer); 681 } 682 683 /** 684 * A Wave is an immutable class that contains the AudioFormat and 685 * the actual wave samples, which currently is in the form 686 * of AudioInputStream. 687 */ 688 private static class Wave { 689 /** 690 * The default sample size of the Wave, which is 16. 691 */ 692 public static final int DEFAULT_SAMPLE_SIZE_IN_BITS = 16; 693 694 /** 695 * A boolean indicating that the Wave is signed, i.e., 696 * this value is true. 697 */ 698 public static final boolean DEFAULT_SIGNED = true; 699 700 /** 701 * A boolean indicating that the Wave samples are represented as 702 * little endian, i.e., this value is false. 703 */ 704 public static final boolean DEFAULT_BIG_ENDIAN = false; 705 706 707 private byte[] samples = null; 708 private AudioFormat audioFormat = null; 709 710 /** 711 * Constructs a Wave with the given audio format and wave samples. 712 * 713 * @param audioFormat the audio format of the wave 714 * @param samples the wave samples 715 */ 716 Wave(AudioFormat audioFormat, byte[] samples) { 717 this.audioFormat = audioFormat; 718 this.samples = samples; 719 } 720 721 722 /** 723 * Dumps the wave out to the given stream 724 * @param writer the output stream 725 */ 726 public void dump(Writer writer) { 727 PrintWriter pw = new PrintWriter(new BufferedWriter(writer)); 728 pw.println("#========== Wave =========="); 729 pw.println("#Type: NULL"); 730 pw.println("#Sample_Rate: " + (int)audioFormat.getSampleRate()); 731 pw.println("#Num_of_Samples: " + samples.length / 2); 732 pw.println("#Num_of_Channels: " + audioFormat.getChannels()); 733 if (samples != null) { 734 for (int i = 0; i < samples.length; i+=2) { 735 pw.println( 736 WaveUtils.bytesToShort(samples[i], samples[i+1])); 737 } 738 } 739 pw.flush(); 740 } 741 } 742} 743 744 745 746/** 747 * FloatList is used to maintain a circular buffer of float values. 748 * It is essentially an index-free array of floats that can easily be 749 * iterated through forwards or backwards. Keeping values in an index 750 * free list like this eliminates index bounds checking which can 751 * save us some time. 752 */ 753class FloatList { 754 float value; 755 FloatList next; 756 FloatList prev; 757 758 /** 759 * Creates a new node 760 */ 761 FloatList() { 762 value = 0.0F; 763 next = null; 764 prev = null; 765 } 766 767 /** 768 * Creates a circular list of nodes of the given size 769 * 770 * @param size the number of nodes in the list 771 * 772 * @return an entry in the list. 773 */ 774 static FloatList createList(int size) { 775 FloatList prev = null; 776 FloatList first = null; 777 778 for (int i = 0; i < size; i++) { 779 FloatList cur = new FloatList(); 780 cur.prev = prev; 781 if (prev == null) { 782 first = cur; 783 } else { 784 prev.next = cur; 785 } 786 prev = cur; 787 } 788 first.prev = prev; 789 prev.next = first; 790 791 return first; 792 } 793 794 /** 795 * prints out the contents of this list 796 * 797 * @param title the title of the dump 798 * @param list the list to dump 799 */ 800 static void dump(String title, FloatList list) { 801 System.out.println(title); 802 803 FloatList cur = list; 804 do { 805 System.out.println("Item: " + cur.value); 806 cur = cur.next; 807 } while (cur != list); 808 } 809} 810