001/** 002 * Portions Copyright 2001 Sun Microsystems, Inc. 003 * Portions Copyright 1999-2001 Language Technologies Institute, 004 * Carnegie Mellon University. 005 * All Rights Reserved. Use is subject to license terms. 006 * 007 * See the file "license.terms" for information on usage and 008 * redistribution of this file, and for a DISCLAIMER OF ALL 009 * WARRANTIES. 010 */ 011package com.sun.speech.freetts; 012 013import java.io.Reader; 014 015/** 016 * Chops a string or text file into Token instances. 017 */ 018public interface Tokenizer { 019 /** 020 * Sets the text to be tokenized by this tokenizer. 021 * 022 * @param textToTokenize the text to tokenize 023 */ 024 void setInputText(String textToTokenize); 025 026 /** 027 * Sets the input reader. 028 * 029 * @param reader the input source 030 */ 031 void setInputReader(Reader reader); 032 033 034 /** 035 * Returns the next token. 036 * 037 * @return the next token if it exists; otherwise null 038 */ 039 Token getNextToken(); 040 041 042 /** 043 * Returns true if there are more tokens, false otherwise. 044 * 045 * @return true if there are more tokens; otherwise false 046 */ 047 boolean hasMoreTokens(); 048 049 /** 050 * Returns true if there were errors while reading tokens. 051 * 052 * @return true if there were errors; otherwise false 053 */ 054 boolean hasErrors(); 055 056 /** 057 * If hasErrors returns true, returns a description of the error 058 * encountered. Otherwise returns null. 059 * 060 * @return a description of the last error that occurred 061 */ 062 String getErrorDescription(); 063 064 /** 065 * Sets the whitespace symbols of this Tokenizer to the given 066 * symbols. 067 * 068 * @param symbols the whitespace symbols 069 */ 070 void setWhitespaceSymbols(String symbols); 071 072 /** 073 * Sets the single character symbols of this Tokenizer to the given 074 * symbols. 075 * 076 * @param symbols the single character symbols 077 */ 078 void setSingleCharSymbols(String symbols); 079 080 /** 081 * Sets the prepunctuation symbols of this Tokenizer to the given 082 * symbols. 083 * 084 * @param symbols the prepunctuation symbols 085 */ 086 void setPrepunctuationSymbols(String symbols); 087 088 /** 089 * Sets the postpunctuation symbols of this Tokenizer to the given 090 * symbols. 091 * 092 * @param symbols the postpunctuation symbols 093 */ 094 void setPostpunctuationSymbols(String symbols); 095 096 /** 097 * Determines if the current token should start a new sentence. 098 * 099 * @return true if a new sentence should be started 100 */ 101 boolean isBreak(); 102}