001/**
002 * Portions Copyright 2001 Sun Microsystems, Inc.
003 * Portions Copyright 1999-2001 Language Technologies Institute, 
004 * Carnegie Mellon University.
005 * All Rights Reserved.  Use is subject to license terms.
006 * 
007 * See the file "license.terms" for information on usage and
008 * redistribution of this file, and for a DISCLAIMER OF ALL 
009 * WARRANTIES.
010 */
011package com.sun.speech.freetts;
012
013import java.io.Reader;
014
015/**
016 * Chops a string or text file into Token instances.
017 */
018public interface Tokenizer {
019    /**
020     * Sets the text to be tokenized by this tokenizer.
021     *
022     * @param textToTokenize  the text to tokenize
023     */
024    void setInputText(String textToTokenize);
025
026    /**
027     * Sets the input reader.
028     *
029     * @param  reader the input source
030     */
031    void setInputReader(Reader reader);
032    
033    
034    /**
035     * Returns the next token.
036     *
037     * @return  the next token if it exists; otherwise null
038     */
039    Token getNextToken();
040
041
042    /**
043     * Returns true if there are more tokens, false otherwise.
044     *
045     * @return true if there are more tokens; otherwise false
046     */
047    boolean hasMoreTokens();
048
049    /**
050     * Returns true if there were errors while reading tokens.
051     *
052     * @return true if there were errors; otherwise false
053     */
054    boolean hasErrors();
055
056    /**
057     * If hasErrors returns true, returns a description of the error
058     * encountered.  Otherwise returns null.
059     *
060     * @return a description of the last error that occurred
061     */
062    String getErrorDescription();
063
064    /**
065     * Sets the whitespace symbols of this Tokenizer to the given
066     * symbols.
067     * 
068     * @param symbols the whitespace symbols
069     */
070    void setWhitespaceSymbols(String symbols);
071
072    /**
073     * Sets the single character symbols of this Tokenizer to the given
074     * symbols.
075     *
076     * @param symbols the single character symbols
077     */
078    void setSingleCharSymbols(String symbols);
079
080    /**
081     * Sets the prepunctuation symbols of this Tokenizer to the given
082     * symbols.
083     *
084     * @param symbols the prepunctuation symbols
085     */
086    void setPrepunctuationSymbols(String symbols);
087
088    /**
089     * Sets the postpunctuation symbols of this Tokenizer to the given
090     * symbols.
091     *
092     * @param symbols the postpunctuation symbols
093     */
094    void setPostpunctuationSymbols(String symbols);
095
096    /**
097     * Determines if the current token should start a new sentence.
098     *
099     * @return true if a new sentence should be started
100     */
101    boolean isBreak();
102}