public class TokenToWords extends Object implements UtteranceProcessor
* It translates the following code from flite:
lang/usenglish/us_text.c
Modifier and Type | Field and Description |
---|---|
private static Pattern |
alphabetPattern |
private CART |
cart |
private static Pattern |
commaIntPattern |
private static Pattern |
digits2DashPattern |
private static Pattern |
digitsPattern |
private static Pattern |
digitsSlashDigitsPattern |
private static Pattern |
dottedAbbrevPattern |
private static Pattern |
doublePattern |
private static Pattern |
drStPattern |
private static Pattern |
fourDigitsPattern |
private static Pattern |
hasVowelPattern |
private static Pattern |
illionPattern |
private static String |
KING_NAMES |
private static String |
KING_TITLES |
private static String[] |
kingNames |
private static Hashtable |
kingSectionLikeHash
Here we use a hashtable for constant time matching, instead of using
if (A.equals(B) || A.equals(C) || ...) to match Strings
|
private static String[] |
kingTitles |
private static Pattern |
numberTimePattern |
private static Pattern |
numessPattern |
private static Pattern |
ordinalPattern |
private static String[] |
postrophes |
private PronounceableFSM |
prefixFSM |
private static Pattern |
romanNumbersPattern |
private static String |
RX_HAS_VOWEL
Regular expression for something that has a vowel
|
private static String |
SECTION_TYPES |
private static String[] |
sectionTypes |
private static Pattern |
sevenPhoneNumberPattern |
private PronounceableFSM |
suffixFSM |
private static Pattern |
threeDigitsPattern |
private Item |
tokenItem |
private static Pattern |
usMoneyPattern |
private static String[][] |
usStates |
private static Hashtable |
usStatesHash |
private WordRelation |
wordRelation |
Constructor and Description |
---|
TokenToWords(CART usNumbersCART,
PronounceableFSM prefixFSM,
PronounceableFSM suffixFSM)
Constructs a default USTokenWordProcessor.
|
Modifier and Type | Method and Description |
---|---|
private void |
dashToWords(String tokenVal)
Convert the given dashed string (e.g.
|
private void |
digitsDashToWords(String tokenVal)
Convert the given digit token with dashes (e.g. 999-999-999)
into (word) Items in the WordRelation.
|
private void |
digitsSlashDigitsToWords(String tokenVal)
Convert the given digits/digits string into word (Items) in the
WordRelation.
|
private void |
digitsToWords(String tokenVal)
Convert the given digit token into (word) Items in the WordRelation.
|
private void |
drStToWords(String drStString)
Converts the given string containing "St" and "Dr" to (word) Items
in the WordRelation.
|
Item |
getTokenItem()
Returns the currently processing token Item.
|
private static boolean |
inKingSectionLikeHash(String key,
String value)
Returns true if the given key is in the kingSectionLikeHash
Hashtable, and the value is the same as the given value.
|
private static boolean |
inStringArray(String value,
String[] stringArray)
Returns true if the given string is in the given string array.
|
private static boolean |
isLetter(char ch)
Returns true if the given character is a letter (a-z or A-Z).
|
private static boolean |
isLowercaseLetter(char ch)
Returns true if the given character is a lowercase letter (a-z).
|
boolean |
isPronounceable(String word)
Returns true if the given word is pronounceable.
|
private boolean |
isStateName(String tokenVal)
Returns true if the given token is the name of a US state.
|
private static boolean |
isTextSplitable(String text,
int index)
Determines if the character at the given position of the given
input text is splittable.
|
private static boolean |
isUppercaseLetter(char ch)
Returns true if the given character is an uppercase letter (A-Z).
|
static boolean |
kingLike(Item tokenItem)
Returns true if the given token item contains a token that is
in a king-like context, e.g., "King" or "Louis".
|
private static boolean |
matches(Pattern pattern,
String input)
Determines if the given input matches the given Pattern.
|
private boolean |
matchesPartPhoneNumber(String tokenVal)
Returns true if the given token matches part of a phone number
|
private void |
notJustAlphasToWords(String tokenVal)
Convert the given string (which does not only consist of alphabet)
into (word) Items in the WordRelation.
|
private void |
postropheToWords(String tokenVal)
Convert the given apostrophed word into (word) Items in the Word
Relation.
|
void |
processUtterance(Utterance utterance)
process the utterance
|
private void |
romanToWords(String romanString)
Converts the given Roman numeral string into (word) Items in the
WordRelation.
|
static boolean |
sectionLike(Item tokenItem)
Returns true if the given token item contains a token that is
in a section-like context, e.g., "chapter" or "act".
|
private void |
tokenToWords(String tokenVal)
Converts the given Token into (word) Items in the WordRelation.
|
String |
toString()
Converts this object to its String representation
|
private void |
usMoneyToWords(String tokenVal)
Converts US money string into (word) Items in the WordRelation.
|
private static final String RX_HAS_VOWEL
private static final Pattern alphabetPattern
private static final Pattern commaIntPattern
private static final Pattern digits2DashPattern
private static final Pattern digitsPattern
private static final Pattern digitsSlashDigitsPattern
private static final Pattern dottedAbbrevPattern
private static final Pattern doublePattern
private static final Pattern drStPattern
private static final Pattern fourDigitsPattern
private static final Pattern hasVowelPattern
private static final Pattern illionPattern
private static final Pattern numberTimePattern
private static final Pattern numessPattern
private static final Pattern ordinalPattern
private static final Pattern romanNumbersPattern
private static final Pattern sevenPhoneNumberPattern
private static final Pattern threeDigitsPattern
private static final Pattern usMoneyPattern
private static final String[] kingTitles
private static final String[] sectionTypes
private static Hashtable kingSectionLikeHash
private static final String KING_NAMES
private static final String KING_TITLES
private static final String SECTION_TYPES
private static final String[] postrophes
private PronounceableFSM prefixFSM
private PronounceableFSM suffixFSM
private static Hashtable usStatesHash
private WordRelation wordRelation
public TokenToWords(CART usNumbersCART, PronounceableFSM prefixFSM, PronounceableFSM suffixFSM)
usNumbersCART
- the cart to use to classify numberspublic Item getTokenItem()
public void processUtterance(Utterance utterance) throws ProcessException
processUtterance
in interface UtteranceProcessor
utterance
- the utterance contain the tokensProcessException
- if an IOException is thrown during the
processing of the utteranceprivate boolean matchesPartPhoneNumber(String tokenVal)
tokenItem
- the tokentokenVal
- the string value of the tokenprivate static boolean inStringArray(String value, String[] stringArray)
value
- the string to checkstringArray
- the array to checkprivate void tokenToWords(String tokenVal)
tokenVal
- the String value of the token, which may or may not be
same as the one in called "name" in fliteprivate void digitsDashToWords(String tokenVal)
tokenVal
- the digit stringprivate void digitsToWords(String tokenVal)
tokenVal
- the digit stringprivate void romanToWords(String romanString)
romanString
- the roman numeral stringprivate static boolean inKingSectionLikeHash(String key, String value)
key
- key to look for in the hashtablevalue
- the value to matchpublic static boolean kingLike(Item tokenItem)
tokenItem
- the token item to checkpublic static boolean sectionLike(Item tokenItem)
tokenItem
- the token item to checkprivate void drStToWords(String drStString)
drStString
- the string with "St" and "Dr"private void usMoneyToWords(String tokenVal)
tokenVal
- the US money stringprivate void postropheToWords(String tokenVal)
tokenVal
- the apostrophed word stringprivate void digitsSlashDigitsToWords(String tokenVal)
tokenVal
- the digits/digits stringprivate void dashToWords(String tokenVal)
tokenVal
- the dashed stringprivate void notJustAlphasToWords(String tokenVal)
tokenVal
- the stringpublic boolean isPronounceable(String word)
word
- the word to testprivate boolean isStateName(String tokenVal)
tokenVal
- the token stringprivate static boolean matches(Pattern pattern, String input)
pattern
- the pattern to matchinput
- the string to testtrue
if the input string matches the given Pattern;
false
otherwiseprivate static boolean isTextSplitable(String text, int index)
1) the character and the following character are not letters in the English alphabet (A-Z and a-z)
2) the character and the following character are not digits (0-9)
text
- the text containing the character of interestindex
- the index of the character of interestprivate static boolean isLetter(char ch)
ch
- the character to testprivate static boolean isUppercaseLetter(char ch)
ch
- the character to testprivate static boolean isLowercaseLetter(char ch)
ch
- the character to testWebARTS Library Licensed Under the GNU - General Public License. Other Libraries licensed under their respective Open Source Licenses