001/**
002 * Portions Copyright 2001 Sun Microsystems, Inc.
003 * Portions Copyright 1999-2001 Language Technologies Institute, 
004 * Carnegie Mellon University.
005 * All Rights Reserved.  Use is subject to license terms.
006 * 
007 * See the file "license.terms" for information on usage and
008 * redistribution of this file, and for a DISCLAIMER OF ALL 
009 * WARRANTIES.
010 */
011package com.sun.speech.freetts.en;
012
013import com.sun.speech.freetts.UtteranceProcessor;
014import com.sun.speech.freetts.Voice;
015import com.sun.speech.freetts.Relation;
016import com.sun.speech.freetts.Utterance;
017import com.sun.speech.freetts.ProcessException;
018import com.sun.speech.freetts.PathExtractorImpl;
019import com.sun.speech.freetts.PathExtractor;
020import com.sun.speech.freetts.Item;
021import java.util.StringTokenizer;
022import java.util.NoSuchElementException;
023import java.util.List;
024import java.util.ArrayList;
025import java.io.BufferedReader;
026import java.io.InputStreamReader;
027import java.io.IOException;
028import java.net.URL;
029
030
031/**
032 * Calculates the F0 curve for an utterance based on the Black and
033 * Hunt article "Generating F0 Contours from ToBI Labels Using Linear
034 * Regression," ICSLP96, vol. 3, pp 1385-1388, Philadelphia,
035 * PA. 1996.
036 */
037public class ContourGenerator implements UtteranceProcessor {
038    private final static PathExtractor endPath =
039        new PathExtractorImpl("R:SylStructure.daughter.R:Segment.p.end",
040                              true);
041    private final static PathExtractor lastDaughterEndPath =
042        new PathExtractorImpl("R:SylStructure.daughtern.end",
043                              true);
044    private final static PathExtractor postBreakPath =
045        new PathExtractorImpl("R:SylStructure.daughter.R:Segment.p.name",
046                              true);
047    private final static PathExtractor preBreakPath =
048        new PathExtractorImpl("R:SylStructure.daughtern.R:Segment.n.name",
049                              true);
050    private final static PathExtractor vowelMidPath =
051        new PathExtractorImpl("R:Segment.p.end",
052                              true);
053    private final static PathExtractor localF0Shift =
054        new PathExtractorImpl(
055            "R:SylStructure.parent.R:Token.parent.local_f0_shift", true);
056    private final static PathExtractor localF0Range =
057        new PathExtractorImpl(
058            "R:SylStructure.parent.R:Token.parent.local_f0_range", true);
059
060    private final float modelMean;
061    private final float modelStddev;
062    private  F0ModelTerm[] terms = { null };
063
064    /**
065     * Creates a ContourGenerator utterance processor.
066     *
067     * @param url source of the data
068     * @param modelMean the average frequency
069     * @param modelStddev the std deviation of the frequency
070     *
071     * @throws IOException if an error occurs while loading data
072     */
073    public ContourGenerator(URL  url,
074                        float modelMean, float modelStddev)
075                            throws IOException {
076        this.modelMean = modelMean;
077        this.modelStddev = modelStddev;
078
079        List termsList = new ArrayList();
080
081        String line;
082        BufferedReader reader = new BufferedReader(
083                new InputStreamReader(url.openStream()));
084        line = reader.readLine();
085        while (line != null) {
086            if (!line.startsWith("***")) {
087                parseAndAdd(termsList, line);
088            }
089            line = reader.readLine();
090        }
091        terms = (F0ModelTerm[]) termsList.toArray(terms);
092        reader.close();
093    }
094
095    /**
096     * Generates the F0 contour for the utterance.
097     *
098     * @param  utterance  the utterance to process
099     *
100     * @throws ProcessException if an <code>IOException</code> is 
101     *          thrown during the processing of the utterance
102     */
103    public void processUtterance(Utterance utterance) throws ProcessException {
104        float lend = 0.0f;
105        float mean;
106        float stddev;
107        float localMean;
108        float localStddev;
109        Object tval;
110
111        mean = utterance.getVoice().getPitch();
112        mean *= utterance.getVoice().getPitchShift();
113        stddev = utterance.getVoice().getPitchRange();
114
115        Relation target = utterance.createRelation(Relation.TARGET);
116        for (Item syllable =
117                 utterance.getRelation(Relation.SYLLABLE).getHead();
118                syllable != null;
119                syllable = syllable.getNext()) {
120
121           if (syllable.getItemAs(Relation.SYLLABLE_STRUCTURE).hasDaughters()) {
122
123                tval = localF0Shift.findFeature(syllable);
124                localMean  = Float.parseFloat(tval.toString());
125
126                if (localMean == 0.0) {
127                    localMean = mean;
128                } else {
129                    localMean *= mean;
130                }
131
132                tval = localF0Range.findFeature(syllable);
133                localStddev  = Float.parseFloat(tval.toString());
134
135                if (localStddev == 0.0) {
136                    localStddev = stddev;
137                }
138
139                Interceptor interceptor = applyLrModel(syllable);
140                if (isPostBreak(syllable)) {
141                    lend = mapF0(interceptor.start, localMean, localStddev);
142                }
143
144                Float val = (Float) endPath.findFeature(syllable);
145                // assert val != null;
146                // don't mind null ptr exception
147                addTargetPoint(target, val.floatValue(), 
148                        mapF0((interceptor.start + lend) / 2.0f,
149                            localMean, localStddev));
150                addTargetPoint(target, vowelMid(syllable),
151                        mapF0(interceptor.mid, localMean, localStddev));
152                lend = mapF0(interceptor.end, localMean, localStddev);
153                if (isPreBreak(syllable)) {
154                    Float eval = (Float) lastDaughterEndPath.findFeature(
155                            syllable);
156                    addTargetPoint(target, eval.floatValue(),
157                            mapF0(interceptor.end, localMean, localStddev));
158                }
159            }
160        }
161
162        if (utterance.getRelation(Relation.SEGMENT).getHead() != null) {
163            Item first = target.getHead();
164            if (first == null) {
165                addTargetPoint(target, 0, mean);
166            } else  if (first.getFeatures().getFloat("pos") > 0) {
167                    Item newItem = first.prependItem(null);
168                    newItem.getFeatures().setFloat("pos", 0.0f);
169                    newItem.getFeatures().setFloat(
170                            "f0", first.getFeatures().getFloat("f0"));
171            }
172            Item last = (Item) target.getTail();
173            Item lastSegment 
174                = utterance.getRelation(Relation.SEGMENT).getTail();
175            float segEnd = 0.0f;
176
177            if (lastSegment != null) {
178                segEnd = lastSegment.getFeatures().getFloat("end");
179            }
180
181            if (last.getFeatures().getFloat("pos") < segEnd) {
182                addTargetPoint(target, segEnd, last.getFeatures().
183                        getFloat("f0"));
184            }
185        }
186    }
187
188    /**
189     * Applies the linear regression model.
190     *
191     * @param syllable the syllable to process
192
193     * @return the 3 points for the syllable as an <code>Interceptor</code>
194     */
195    private Interceptor applyLrModel(Item syllable) {
196        float fv = 0.0f;
197        Interceptor interceptor = new Interceptor();
198        interceptor.start = terms[0].start;
199        interceptor.mid = terms[0].mid;
200        interceptor.end = terms[0].end;
201
202        for (int i = 1; i < terms.length; i++) {
203            Object value = terms[i].findFeature(syllable);
204            if (terms[i].type != null) {
205                if (value.toString().equals(terms[i].type)) {
206                    fv = 1.0f;
207                } else {
208                    fv = 0.0f;
209                }
210            } else {
211                fv = Float.parseFloat(value.toString());
212            }
213
214            interceptor.start += fv * terms[i].start;
215            interceptor.mid += fv * terms[i].mid;
216            interceptor.end += fv * terms[i].end;
217        }
218
219        return interceptor;
220    }
221
222    /**
223     * Returns the time point mid way in vowel in this syllable.
224     *
225     * @param syllable the syllable of interest
226     *
227     * @return the time point mid way in vowel in this syllable
228     */
229    private final float vowelMid(Item syllable) {
230        Voice voice = syllable.getUtterance().getVoice();
231        Item firstSeg  = syllable.getItemAs(
232            Relation.SYLLABLE_STRUCTURE).getDaughter();
233        Item segment;
234        float val;
235
236        for (segment = firstSeg; segment != null; segment =segment.getNext()) {
237            // TODO refactor phone feature stuff like this so that
238            // it can be understood.
239            if ("+".equals(voice.getPhoneFeature(segment.toString(), "vc"))) {
240                val = (segment.getFeatures().getFloat("end") +
241               ((Float) vowelMidPath.findFeature(segment)).floatValue()) / 2.0f;
242                return val;
243            }
244        }
245
246        if (firstSeg == null) {
247            val =  0.0f;
248        } else {
249            val =  (firstSeg.getFeatures().getFloat("end") +
250               ((Float) vowelMidPath.findFeature(firstSeg)).floatValue()) 
251                / 2.0f;
252        }
253
254        return val;
255    }
256
257    /**
258     * Adds the target point at the given time to the given frequency
259     * to the given relation.
260     *
261     * @param target the target of interest
262     * @param pos the time
263     * @param f0 the frequency
264     */
265    private void addTargetPoint(Relation target, float pos, float f0) {
266        Item item = target.appendItem();
267        item.getFeatures().setFloat("pos", pos);
268        if (f0 > 500.0) {
269            item.getFeatures().setFloat("f0", 500.0f);
270        } else if (f0 < 50.0)  {
271            item.getFeatures().setFloat("f0", 50.0f);
272        } else {
273            item.getFeatures().setFloat("f0", f0);
274        }
275    }
276    
277    /**
278     * Determines if this syllable is following a break.
279     *
280     * @param syllable the syllable to check
281     *
282     * @return <code>true</code> if this syllable is following a
283     *  break; otherwise <code>false</code>.
284     */
285    private final boolean isPostBreak(Item syllable) {
286        return ((syllable.getPrevious() == null) ||
287                "pau".equals(postBreakPath.findFeature(syllable)));
288    }
289    
290    /**
291     * Determines if this syllable is before a break.
292     *
293     * @param syllable the syllable to check
294     *
295     * @return <code>true</code> if this syllable is before a
296     *  break; otherwise <code>false</code>.
297     */
298    private final boolean isPreBreak(Item syllable) {
299        return ((syllable.getNext() == null) ||
300                "pau".equals(preBreakPath.findFeature(syllable)));
301    }
302
303    /**
304     * Maps the given value to the curve.
305     *
306     * @param val the value to map
307     *
308     * @return the mapped value
309     */
310    private final float mapF0(float val, float mean, float stddev) {
311        return ((((val - modelMean)/ modelStddev) * stddev) + mean);
312    }
313
314    /**
315     * Parses the line into an F0ModelTerm.
316     *
317     * @param list resulting F0ModelTerm is added to this list
318     * @param line the string to parse
319     */
320    protected void parseAndAdd(List list, String line) {
321        try {
322            StringTokenizer tokenizer = new StringTokenizer(line," ");
323            String feature = tokenizer.nextToken();        
324            float start = Float.parseFloat(tokenizer.nextToken());        
325            float mid = Float.parseFloat(tokenizer.nextToken());        
326            float end = Float.parseFloat(tokenizer.nextToken());        
327            String type = tokenizer.nextToken(); 
328
329            if (type.equals("null")) {
330                type = null;
331            }
332
333            list.add(new F0ModelTerm(feature, start, mid, end, type));
334        } catch (NoSuchElementException nsee) {
335            throw new Error("ContourGenerator: Error while parsing F0ModelTerm " 
336                    + nsee.getMessage());
337        } catch (NumberFormatException nfe) {
338            throw new Error("ContourGenerator: Bad float format " 
339                    + nfe.getMessage());
340        }
341    }
342
343    /**
344     * Returns the string representation of the object.
345     *
346     * @return the string representation of the object
347     */
348    public String toString() {
349        return "ContourGenerator";
350    }
351}
352
353/**
354 * Represents a single term for the F0 model
355 */
356class  F0ModelTerm {
357    PathExtractor path;
358    float start;
359    float mid;
360    float end;
361    String type;
362
363    /**
364     * Constructs an F0ModelTerm.
365     *
366     * @param feature the feature of the term
367     * @param start the starting point of the term
368     * @param mid the mid-point of the term
369     * @param end the end point of the term
370     * @param type the type of the term
371     */
372    F0ModelTerm(String feature, float start, float mid, 
373        float end, String type) {
374        path = new PathExtractorImpl(feature, true);
375        this.start = start;
376        this.mid = mid;
377        this.end = end;
378        this.type = type;
379    }
380
381    /**
382     * Find the feature associated with the given item
383     *
384     * @param item the item of interest
385     *
386     * @return the object representing the feature.
387     */
388    public Object findFeature(Item item) {
389        return path.findFeature(item);
390    }
391
392    /**
393     * Returns the string representation of the object
394     *
395     * @return the string representation of the object
396     */
397    public String toString() {
398        return path.toString();
399    }
400}
401
402/**
403 * Represents an interceptor.
404 */
405class Interceptor {
406    float start;
407    float mid;
408    float end;
409
410    /**
411     * Constructs the default interceptor
412     */
413    Interceptor() {
414        start = 0.0f;
415        mid = 0.0f;
416        end = 0.0f;
417    }
418
419    /**
420     * Returns the string representation of the object.
421     *
422     * @return the string representation of the object
423     */
424    public String toString() {
425        return Float.toString(start) + " " +
426               Float.toString(mid) + " " +
427               Float.toString(end);
428    }
429}