001/**
002 * Portions Copyright 2001 Sun Microsystems, Inc.
003 * Portions Copyright 1999-2001 Language Technologies Institute, 
004 * Carnegie Mellon University.
005 * All Rights Reserved.  Use is subject to license terms.
006 * 
007 * See the file "license.terms" for information on usage and
008 * redistribution of this file, and for a DISCLAIMER OF ALL 
009 * WARRANTIES.
010 */
011package com.sun.speech.freetts.relp;
012
013import java.io.BufferedWriter;
014import java.io.FileWriter;
015import java.io.IOException;
016import java.io.OutputStreamWriter;
017import java.io.PrintWriter;
018import java.io.Writer;
019import java.text.DecimalFormat;
020
021import javax.sound.sampled.AudioFormat;
022
023import com.sun.speech.freetts.FreeTTSSpeakable;
024import com.sun.speech.freetts.Utterance;
025import com.sun.speech.freetts.audio.AudioPlayer;
026import com.sun.speech.freetts.util.Utilities;
027import com.sun.speech.freetts.util.WaveUtils;
028
029
030/**
031 * Contains the result of linear predictive coding processing.
032 *
033 */
034public class LPCResult {
035
036    private static final double POST_EMPHASIS = 0.0;
037
038    private int frameSize = 10;
039    private int numberOfFrames = 0;
040    
041    private short[][] frames = null;
042    private int[] times = null;
043    private int[] sizes = null;
044    
045    /**
046     * this is a normalized version of the residuals; to normalize it,
047     * add 128 to it
048     */
049    private byte[] residuals = null;
050    
051    private int numberOfChannels;
052    private int sampleRate;
053    private int residualFold;
054    
055    private float lpcMinimum;
056    private float lpcRange;
057
058    private final static int MAX_SAMPLE_SIZE = 
059        Utilities.getInteger("com.sun.speech.freetts.LpcResult.maxSamples",
060                1024).intValue();
061
062    /**
063     * Given a residual, maps it using WaveUtils.ulawToShort() to a float.
064     */
065    private final static float[] residualToFloatMap = new float[256];
066
067    static {
068        for (short i = 0; i < residualToFloatMap.length; i++) {
069            residualToFloatMap[i] = (float) WaveUtils.ulawToShort(i);
070        }
071        residualToFloatMap[128] = (float) WaveUtils.ulawToShort((short) 255);
072    }
073
074    
075    public LPCResult() {
076        residualFold = 1;
077    }
078        
079    /**
080     * Resets the number of frames in this LPCResult.
081     *
082     * @param numberOfFrames  the number of frames in this LPC result
083     */
084    public void resizeFrames(int numberOfFrames) {
085        times = new int[numberOfFrames];
086        frames = new short[numberOfFrames][];
087        sizes = new int[numberOfFrames];
088        this.numberOfFrames = numberOfFrames;
089    }
090
091    /**
092     * Resets the number of residuals, and initialize all of them to 255
093     * (which is 0 for mulaw).
094     *
095     * @param numberOfSamples  the number of samples in this LPC result
096     */
097    public void resizeResiduals(int numberOfSamples) {
098        residuals = new byte[numberOfSamples];
099    }
100
101    /**
102     * A convenience method for setting the LPC values.
103     *
104     * @param numberOfChannels  the number of channels
105     * @param sampleRate  the sample rate
106     * @param lpcMin  the LPC minimum
107     * @param lpcRange  the LPC range
108     */
109    public void setValues(int numberOfChannels,
110                          int sampleRate,
111                          int residualFold,
112                          float lpcMin, float lpcRange) {
113        this.numberOfChannels = numberOfChannels;
114        this.sampleRate = sampleRate;
115        this.lpcMinimum = lpcMin;
116        this.lpcRange = lpcRange;
117    }
118
119    /**
120     * Returns the time difference of the frame at the given position 
121     * with the frame prior to that. If the frame at the given position is
122     * the first frame (position 0), the time of that frame is returned.
123     *
124     * @param frameIndex  the position of the frame
125     *
126     * @return the time difference of the frame at the given position 
127     *     with the frame prior to that
128     */
129    public int getFrameShift(int frameIndex) {
130        if (0 <= frameIndex && frameIndex < times.length) {
131            if (frameIndex > 0) {
132                return times[frameIndex] - times[frameIndex - 1];
133            } else {
134                return times[frameIndex];
135            }
136        } else {
137            return 0;
138        }
139    }
140    
141    /**
142     * Returns the sizes of frames in this LPC.
143     *
144     * @return the sizes of frames
145     */
146    public int getFrameSize() {
147        return frameSize;
148    }
149
150    /**
151     * Returns the frame at the given index.
152     *
153     * @param index the index of interest
154     *
155     * @return the frame at the given index
156     */
157    public short[] getFrame(int index) {
158        return frames[index];
159    }
160    
161    /**
162     * Returns the array of times.
163     *
164     * @return the array of times
165     */
166    public int[] getTimes() {
167        return times;
168    }
169    
170    /**
171     * Returns the number of frames in this LPCResult.
172     *
173     * @return the number of frames
174     */
175    public int getNumberOfFrames() {
176        return numberOfFrames;
177    }
178    
179    /**
180     * Returns the number of channels in this LPCResult.
181     *
182     * @return the number of channels
183     */
184    public int getNumberOfChannels() {
185        return numberOfChannels;
186    }
187    
188    /**
189     * Returns the LPC minimum.
190     *
191     * @return the LPC minimum
192     */
193    public float getLPCMin() {
194        return lpcMinimum;
195    }
196    
197    /**
198     * Returns the LPC range.
199     *
200     * @return the LPC range
201     */
202    public float getLPCRange() {
203        return lpcRange;
204    }
205    
206    /**
207     * Returns the number of samples in this LPC result
208     *
209     * @return the number of samples
210     */
211    public int getNumberOfSamples() {
212        if (residuals == null) {
213            return 0;
214        } else {
215            return residuals.length;
216        }
217    }
218    
219    /**
220     * Returns the sample rate.
221     *
222     * @return the sample rate
223     */
224    public int getSampleRate() {
225        return sampleRate;
226    }
227    
228    /**
229     * Returns the array of residuals sizes.
230     *
231     * @return the array of residuals sizes
232     */
233    public int[] getResidualSizes() {
234        return sizes;
235    }
236
237    /**
238     * Returns the array of residuals.
239     *
240     * @return the array of residuals
241     */
242    public byte[] getResiduals() {
243        return residuals;
244    }
245
246    /**
247     * Sets the sizes of frames in this LPC to the given size.
248     *
249     * @param frameSize the new frame size
250     */
251    public void setFrameSize(int frameSize) {
252        this.frameSize = frameSize;
253    }
254
255    /**
256     * Sets the number of frames in this LPC Result.
257     * 
258     * @param numberFrames the number of frames in this result
259     */
260    public void setNumberOfFrames(int numberFrames) {
261        this.numberOfFrames = numberFrames;
262    }
263        
264    /**
265     * Sets the frame at the given index.
266     *
267     * @param index the position of the frame to set
268     * @param newFrames new frame data
269     */
270    public void setFrame(int index, short[] newFrames) {
271        frames[index] = newFrames;
272    }
273
274    /**
275     * Sets the array of times.
276     *
277     * @param times the times data
278     */
279    public void setTimes(int[] times) {
280        this.times = times;
281    }
282    
283    /**
284     * Sets the number of channels.
285     *
286     * @param numberOfChannels the number of channels
287     */
288    public void setNumberOfChannels(int numberOfChannels) {
289        this.numberOfChannels = numberOfChannels;
290    }
291    
292    /**
293     * Sets the LPC minimum.
294     *
295     * @param min the LPC minimum
296     */
297    public void setLPCMin(float min) {
298        this.lpcMinimum = min;
299    }
300    
301    /**
302     * Sets the LPC range.
303     *
304     * @param range the LPC range
305     */
306    public void setLPCRange(float range) {
307        this.lpcRange = range;
308    }
309    
310    /**
311     * Sets the sample rate.
312     *
313     * @param rate the sample rate
314     */
315    public void setSampleRate(int rate) {
316        this.sampleRate = rate;
317    }
318    
319    /**
320     * Sets the array of residual sizes.
321     *
322     * @param sizes the new residual sizes
323     */
324    public void setResidualSizes(int[] sizes) {
325        for (int i = 0; i < this.sizes.length && i < sizes.length; i++) {
326            this.sizes[i] = sizes[i];
327        }
328    }
329
330    /**
331     * Copies the information in the given unit to the array of residuals,
332     * starting at the given index, up until targetSize chars.
333     *
334     * @param source  the unit that holds the information source 
335     * @param targetPosition  start position in the array of residuals
336     * @param targetSize  the maximum number of characters to copy
337     */
338    public void copyResiduals(byte[] source, 
339                              int targetPosition, 
340                              int targetSize) {
341        int unitSize = source.length;
342        if (unitSize < targetSize) {
343            int targetStart = (targetSize - unitSize)/2;
344            System.arraycopy(source, 0,
345                             residuals, targetPosition + targetStart,
346                             source.length);
347        } else {
348            int sourcePosition = (unitSize - targetSize)/2;
349            System.arraycopy(source, sourcePosition,
350                             residuals, targetPosition,
351                             targetSize);
352        }
353    }
354
355    /**
356     * Copies the residual puse in the given unit to the array of residuals,
357     * starting at the given index, up until targetSize chars.
358     *
359     * @param source  the unit that holds the information source 
360     * @param targetPosition  start position in the array of residuals
361     * @param targetSize  the maximum number of characters to copy
362     */
363    public void copyResidualsPulse(byte[] source,
364                                   int targetPosition, int targetSize) {
365        int unitSize = source.length;
366        short sample = (short) (source[0] + 128);
367        if (unitSize < targetSize) {
368            residuals[(targetSize-unitSize)/2] = WaveUtils.shortToUlaw(sample);
369        } else {
370            residuals[(unitSize-targetSize)/2] = WaveUtils.shortToUlaw(sample);
371        }
372    }
373        
374    /**
375     * Given a 16 bit value (represented as an int), extract
376     * the high eight bits and return them
377     *
378     * @param val the 16 bit value
379     *
380     * @return the high eight bits
381     */
382    private final static byte hibyte(int val) {
383        return (byte) (val >>> 8);
384    }
385
386    /**
387     * Given a 16 bit value (represented as an int), extract
388     * the low eight bits and return them
389     *
390     * @param val the 16 bit value
391     *
392     * @return the low eight bits
393     */
394    private final static byte lobyte(int val) {
395        return (byte) (val & 0x000000FF);
396    }
397    
398
399    /**
400     * Synthesize a Wave  from this LPCResult
401     *
402     * @return the wave
403     * @exception IOException
404     *            if an error occurs while writing the audio data
405     */
406    public boolean playWave(AudioPlayer player, Utterance utterance)
407        throws IOException {
408        return playWaveSamples(player, utterance.getSpeakable(),
409                               getNumberOfSamples() * 2);
410    }
411
412
413    public byte[] getWaveSamples()
414    {
415        return getWaveSamples(2*getNumberOfSamples(), null);
416    }
417
418    /**
419     * get the samples for this utterance
420     *
421     * @param numberSamples the number of samples desirred
422     * @param utterance the utterance
423     *
424     * [[[ TODO: well there is a bunch of duplicated code here ..
425     *     these should be combined into one routine.
426     *  ]]]
427     */
428    private byte[] getWaveSamples(int numberSamples,
429                                  Utterance utterance) {
430        int numberChannels = getNumberOfChannels();
431        int pmSizeSamples;
432        float pp = 0;
433
434        byte[] samples = new byte[numberSamples];
435        byte[] residuals = getResiduals();
436        int[] residualSizes = getResidualSizes();
437        
438        FloatList outBuffer = FloatList.createList(numberChannels + 1);
439        FloatList lpcCoefficients = FloatList.createList(numberChannels);
440        
441        double multiplier = (double) getLPCRange() / 65535.0;
442        int s = 0;
443
444        // for each frame in the LPC result
445        for (int r = 0, i = 0; i < numberOfFrames; i++) {
446            
447            // unpack the LPC coefficients
448            short[] frame =  getFrame(i);
449
450            FloatList lpcCoeffs = lpcCoefficients;
451            for (int k = 0; k < numberChannels; k++) {
452                lpcCoeffs.value = (float) ( (frame[k] + 32768.0) 
453                    * multiplier) + lpcMinimum;
454                lpcCoeffs = lpcCoeffs.next;
455            }
456            
457            pmSizeSamples = residualSizes[i];
458
459            // resynthesis the signal, pmSizeSamples ~= 90
460            // what's in the loop is done for each residual
461            for (int j = 0; j < pmSizeSamples; j++, r++) {
462
463                FloatList backBuffer = outBuffer.prev;
464                float ob = residualToFloatMap[residuals[r] + 128];
465
466                lpcCoeffs = lpcCoefficients;
467                do {
468                    ob += lpcCoeffs.value * backBuffer.value;
469                    backBuffer = backBuffer.prev;
470                    lpcCoeffs = lpcCoeffs.next;
471                } while (lpcCoeffs != lpcCoefficients);
472
473                int sample = (int) (ob + (pp * POST_EMPHASIS));
474                samples[s++] = (byte) hibyte(sample);
475                samples[s++] = (byte) lobyte(sample);
476
477
478                outBuffer.value = pp = ob;
479                outBuffer = outBuffer.next;
480            }
481        }
482        return samples;
483    }
484
485    /**
486     * Play the sample data on the given player
487     *
488     * @param player where to send the audio
489     * @param numberSamples the number of samples
490     * @exception IOException
491     *            if an error occurs while writing the audio data
492     */
493    private boolean playWaveSamples(AudioPlayer player, 
494                                    FreeTTSSpeakable speakable,
495                                    int numberSamples) throws IOException {
496        boolean ok = true;
497        int numberChannels = getNumberOfChannels();
498        int pmSizeSamples;
499        float pp = 0;
500
501        byte[] samples = new byte[MAX_SAMPLE_SIZE];
502        byte[] residuals = getResiduals();
503        int[] residualSizes = getResidualSizes();
504        
505        FloatList outBuffer = FloatList.createList(numberChannels + 1);
506        FloatList lpcCoefficients = FloatList.createList(numberChannels);
507        
508        double multiplier = (double) getLPCRange() / 65535.0;
509        int s = 0;
510
511        // for each frame in the LPC result
512        player.begin(numberSamples);
513        for (int r = 0, i = 0;
514             (ok &= !speakable.isCompleted()) && 
515                 i < numberOfFrames; i++) {
516            
517            // unpack the LPC coefficients
518            short[] frame =  getFrame(i);
519
520            FloatList lpcCoeffs = lpcCoefficients;
521            for (int k = 0; k < numberChannels; k++) {
522                lpcCoeffs.value = (float) ( (frame[k] + 32768.0) 
523                    * multiplier) + lpcMinimum;
524                lpcCoeffs = lpcCoeffs.next;
525            }
526            
527            pmSizeSamples = residualSizes[i];
528
529            // resynthesis the signal, pmSizeSamples ~= 90
530            // what's in the loop is done for each residual
531            for (int j = 0; j < pmSizeSamples; j++, r++) {
532
533                FloatList backBuffer = outBuffer.prev;
534                float ob = residualToFloatMap[residuals[r] + 128];
535
536                lpcCoeffs = lpcCoefficients;
537                do {
538                    ob += lpcCoeffs.value * backBuffer.value;
539                    backBuffer = backBuffer.prev;
540                    lpcCoeffs = lpcCoeffs.next;
541                } while (lpcCoeffs != lpcCoefficients);
542
543                int sample = (int) (ob + (pp * POST_EMPHASIS));
544                samples[s++] = hibyte(sample);
545                samples[s++] = lobyte(sample);
546
547                if (s >= MAX_SAMPLE_SIZE) {
548                    if ((ok &= !speakable.isCompleted()) && 
549                        !player.write(samples)) {
550                        ok = false;
551                    }
552                    s = 0;
553                }
554
555                outBuffer.value = pp = ob;
556                outBuffer = outBuffer.next;
557            }
558        }
559
560        // write out the very last samples
561        if ((ok &= !speakable.isCompleted()) && s > 0) {
562            ok = player.write(samples, 0, s);
563            s = 0;
564        }
565
566        // tell the AudioPlayer it is the end of Utterance
567        if (ok &= !speakable.isCompleted()) {
568            ok = player.end();
569        }
570
571        return ok;
572    }
573
574    /**
575     * Dumps this LPCResult to standard out
576     */
577    public void dump() {
578        dump(new OutputStreamWriter(System.out));
579    }
580
581    /**
582     * Dumps this LPCResult to the given stream.
583     *
584     * @param writer the output stream
585     */
586    public void dump(Writer writer) {
587        DecimalFormat numberFormat = new DecimalFormat();
588        numberFormat.setMaximumFractionDigits(6);
589        numberFormat.setMinimumFractionDigits(6);
590        PrintWriter pw = new PrintWriter(new BufferedWriter(writer));
591
592        if (getNumberOfFrames() == 0) {
593            pw.println("# ========== LPCResult ==========");
594            pw.println("# Num_of_Frames: " + getNumberOfFrames());
595            pw.flush();
596            return;
597        }
598        pw.println("========== LPCResult ==========");
599        pw.println("Num_of_Frames: " + getNumberOfFrames());
600        pw.println("Num_of_Channels: " + getNumberOfChannels());
601        pw.println("Num_of_Samples: " + getNumberOfSamples());
602        pw.println("Sample_Rate: " + sampleRate);
603        pw.println("LPC_Minimum: " + numberFormat.format(lpcMinimum));
604        pw.println("LPC_Range: " + numberFormat.format(lpcRange));
605        pw.println("Residual_Fold: " + residualFold);
606        pw.println("Post_Emphasis: " + numberFormat.format(POST_EMPHASIS));
607                
608        int i;
609        pw.print("Times:\n");
610        for (i = 0; i < getNumberOfFrames(); i++) {
611            pw.print(times[i] + " ");
612        }
613        pw.print("\nFrames: ");
614        for (i = 0; i < getNumberOfFrames(); i++) {
615            // for each frame, print all elements
616            short[] frame = getFrame(i);
617            for (int j = 0; j < frame.length; j++) {
618                pw.print(( ((int) frame[j]) + 32768) + "\n");
619            }
620        }
621        pw.print("\nSizes: ");
622        for (i = 0; i < getNumberOfFrames(); i++) {
623            pw.print(sizes[i] + " ");
624        }
625        pw.print("\nResiduals: ");
626        for (i = 0; i < getNumberOfSamples(); i++) {
627            if (residuals[i] == 0) {
628                pw.print(255);
629            } else {
630                pw.print(( ((int) residuals[i]) + 128));
631            }
632            pw.print("\n");
633            pw.flush();
634        }
635        pw.flush();
636    }
637
638
639    /**
640     * Dumps the wave data associated with this result
641     */
642    public void dumpASCII() {
643        dumpASCII(new OutputStreamWriter(System.out));
644    }
645
646    /**
647     * Dumps the wave data associated with this result
648     *
649     * @param path the path where the wave data is appended to
650     *
651     * @throws IOException if an IO error occurs
652     */
653    public void dumpASCII(String path) throws IOException {
654        Writer writer = new FileWriter(path, true);
655        getWave().dump(writer);
656    }
657
658    /**
659     * Synthesize a Wave  from this LPCResult
660     *
661     * @return the wave
662     */
663    private  Wave getWave() {
664        // construct a new wave object
665        AudioFormat audioFormat = new AudioFormat
666            (getSampleRate(),
667             Wave.DEFAULT_SAMPLE_SIZE_IN_BITS, 1,
668             Wave.DEFAULT_SIGNED, true);
669        return new Wave(audioFormat,
670                getWaveSamples( getNumberOfSamples() * 2, null));
671    }
672
673    /**
674     * Dumps the wave out to the given stream
675     *
676     * @param writer the output stream
677     */
678    public void dumpASCII(Writer writer)  {
679        Wave wave = getWave();
680        wave.dump(writer);
681    }
682
683    /**
684     * A Wave is an immutable class that contains the AudioFormat and
685     * the actual wave samples, which currently is in the form 
686     * of AudioInputStream.
687     */
688    private static  class Wave {
689        /**
690         * The default sample size of the Wave, which is 16.
691         */
692        public static final int DEFAULT_SAMPLE_SIZE_IN_BITS = 16;
693
694        /**
695         * A boolean indicating that the Wave is signed, i.e., 
696         * this value is true.
697         */
698        public static final boolean DEFAULT_SIGNED = true;
699
700        /**
701         * A boolean indicating that the Wave samples are represented as
702         * little endian, i.e., this value is false.
703         */
704        public static final boolean DEFAULT_BIG_ENDIAN = false;
705
706
707        private byte[] samples = null;
708        private AudioFormat audioFormat = null;
709            
710        /**
711         * Constructs a Wave with the given audio format and wave samples.
712         *
713         * @param audioFormat the audio format of the wave
714         * @param samples the wave samples
715         */
716         Wave(AudioFormat audioFormat, byte[] samples) {
717            this.audioFormat = audioFormat;
718            this.samples = samples;
719        }
720
721
722        /**
723         * Dumps the wave out to the given stream
724         * @param writer the output stream
725         */
726        public void dump(Writer writer)  {
727            PrintWriter pw = new PrintWriter(new BufferedWriter(writer));
728            pw.println("#========== Wave ==========");
729            pw.println("#Type: NULL");
730            pw.println("#Sample_Rate: " + (int)audioFormat.getSampleRate());
731            pw.println("#Num_of_Samples: " + samples.length / 2);
732            pw.println("#Num_of_Channels: " + audioFormat.getChannels());
733            if (samples != null) {
734                for (int i = 0; i < samples.length; i+=2) {
735                    pw.println(
736                        WaveUtils.bytesToShort(samples[i], samples[i+1]));
737                }
738            }
739            pw.flush();
740        }
741    }
742}
743
744    
745
746/**
747 * FloatList is used to maintain a circular buffer of float values.
748 * It is essentially an index-free array of floats that can easily be
749 * iterated through forwards or backwards. Keeping values in an index
750 * free list like this eliminates index bounds checking which can
751 * save us some time.
752 */
753class FloatList {
754    float value;
755    FloatList next;
756    FloatList prev;
757
758    /**
759     * Creates a new node
760     */
761    FloatList() {
762        value = 0.0F;
763        next = null;
764        prev = null;
765    }
766
767    /**
768     * Creates a circular list of nodes of the given size
769     *
770     * @param size the number of nodes in the list
771     *
772     * @return an entry in the list.
773     */
774    static FloatList createList(int size) {
775        FloatList prev = null;
776        FloatList first = null;
777
778        for (int i = 0; i < size; i++) {
779            FloatList cur = new FloatList();
780            cur.prev = prev;
781            if (prev == null) {
782                first = cur;
783            } else {
784                prev.next = cur;
785            }
786            prev = cur;
787        }
788        first.prev = prev;
789        prev.next = first;
790
791        return first;
792    }
793
794    /**
795     * prints out the contents of this list
796     * 
797     * @param title the title of the dump
798     * @param list the list to dump
799     */
800    static void dump(String title, FloatList list) {
801        System.out.println(title);
802
803        FloatList cur = list;
804        do {
805            System.out.println("Item: " + cur.value);
806            cur = cur.next;
807        } while (cur != list);
808    }
809}
810