001/** 002 * Portions Copyright 2001-2003 Sun Microsystems, Inc. 003 * Portions Copyright 1999-2001 Language Technologies Institute, 004 * Carnegie Mellon University. 005 * All Rights Reserved. Use is subject to license terms. 006 * 007 * See the file "license.terms" for information on usage and 008 * redistribution of this file, and for a DISCLAIMER OF ALL 009 * WARRANTIES. 010 */ 011package com.sun.speech.freetts.en.us; 012 013import com.sun.speech.freetts.FeatureSet; 014import com.sun.speech.freetts.Item; 015import com.sun.speech.freetts.util.Utilities; 016 017/** 018 * Expands Strings containing digits characters into 019 * a list of words representing those digits. 020 * 021 * It translates the following code from flite: 022 * <code>lang/usEnglish/us_expand.c</code> 023 */ 024public class NumberExpander { 025 026 private static final String[] digit2num = { 027 "zero", 028 "one", 029 "two", 030 "three", 031 "four", 032 "five", 033 "six", 034 "seven", 035 "eight", 036 "nine" }; 037 038 private static final String[] digit2teen = { 039 "ten", /* shouldn't get called */ 040 "eleven", 041 "twelve", 042 "thirteen", 043 "fourteen", 044 "fifteen", 045 "sixteen", 046 "seventeen", 047 "eighteen", 048 "nineteen" }; 049 050 private static final String[] digit2enty = { 051 "zero", /* shouldn't get called */ 052 "ten", 053 "twenty", 054 "thirty", 055 "forty", 056 "fifty", 057 "sixty", 058 "seventy", 059 "eighty", 060 "ninety" }; 061 062 private static final String[] ord2num = { 063 "zeroth", 064 "first", 065 "second", 066 "third", 067 "fourth", 068 "fifth", 069 "sixth", 070 "seventh", 071 "eighth", 072 "ninth" }; 073 074 private static final String[] ord2teen = { 075 "tenth", /* shouldn't get called */ 076 "eleventh", 077 "twelfth", 078 "thirteenth", 079 "fourteenth", 080 "fifteenth", 081 "sixteenth", 082 "seventeenth", 083 "eighteenth", 084 "nineteenth" }; 085 086 private static final String[] ord2enty = { 087 "zeroth", /* shouldn't get called */ 088 "tenth", 089 "twentieth", 090 "thirtieth", 091 "fortieth", 092 "fiftieth", 093 "sixtieth", 094 "seventieth", 095 "eightieth", 096 "ninetieth" }; 097 098 099 /** 100 * Unconstructable 101 */ 102 private NumberExpander() { 103 } 104 105 106 /** 107 * Expands a digit string into a list of English words of those digits. 108 * For example, "1234" expands to "one two three four" 109 * 110 * @param numberString the digit string to expand. 111 * @param wordRelation words are added to this Relation 112 */ 113 public static void expandNumber(String numberString, 114 WordRelation wordRelation) { 115 int numDigits = numberString.length(); 116 117 if (numDigits == 0) { 118 // wordRelation = null; 119 } else if (numDigits == 1) { 120 expandDigits(numberString, wordRelation); 121 } else if (numDigits == 2) { 122 expand2DigitNumber(numberString, wordRelation); 123 } else if (numDigits == 3) { 124 expand3DigitNumber(numberString, wordRelation); 125 } else if (numDigits < 7) { 126 expandBelow7DigitNumber(numberString, wordRelation); 127 } else if (numDigits < 10) { 128 expandBelow10DigitNumber(numberString, wordRelation); 129 } else if (numDigits < 13) { 130 expandBelow13DigitNumber(numberString, wordRelation); 131 } else { 132 expandDigits(numberString, wordRelation); 133 } 134 } 135 136 137 /** 138 * Expands a two-digit string into a list of English words. 139 * 140 * @param numberString the string which is the number to expand 141 * @param wordRelation words are added to this Relation 142 */ 143 private static void expand2DigitNumber(String numberString, 144 WordRelation wordRelation) { 145 if (numberString.charAt(0) == '0') { 146 // numberString is "0X" 147 if (numberString.charAt(1) == '0') { 148 // numberString is "00", do nothing 149 } else { 150 // numberString is "01", "02" ... 151 String number = digit2num[numberString.charAt(1)-'0']; 152 wordRelation.addWord(number); 153 } 154 } else if (numberString.charAt(1) == '0') { 155 // numberString is "10", "20", ... 156 String number = digit2enty[numberString.charAt(0)-'0']; 157 wordRelation.addWord(number); 158 } else if (numberString.charAt(0) == '1') { 159 // numberString is "11", "12", ..., "19" 160 String number = digit2teen[numberString.charAt(1)-'0']; 161 wordRelation.addWord(number); 162 } else { 163 // numberString is "2X", "3X", ... 164 String enty = digit2enty[numberString.charAt(0)-'0']; 165 wordRelation.addWord(enty); 166 expandDigits(numberString.substring(1,numberString.length()), 167 wordRelation); 168 } 169 } 170 171 172 /** 173 * Expands a three-digit string into a list of English words. 174 * 175 * @param numberString the string which is the number to expand 176 * @param wordRelation words are added to this Relation 177 */ 178 private static void expand3DigitNumber(String numberString, 179 WordRelation wordRelation) { 180 if (numberString.charAt(0) == '0') { 181 expandNumberAt(numberString, 1, wordRelation); 182 } else { 183 String hundredDigit = digit2num[numberString.charAt(0)-'0']; 184 wordRelation.addWord(hundredDigit); 185 wordRelation.addWord("hundred"); 186 expandNumberAt(numberString, 1, wordRelation); 187 } 188 } 189 190 191 /** 192 * Expands a string that is a 4 to 6 digits number into a list 193 * of English words. For example, "333000" into "three hundred 194 * and thirty-three thousand". 195 * 196 * @param numberString the string which is the number to expand 197 * @param wordRelation words are added to this Relation 198 */ 199 private static void expandBelow7DigitNumber(String numberString, 200 WordRelation wordRelation) { 201 expandLargeNumber(numberString, "thousand", 3, wordRelation); 202 } 203 204 205 /** 206 * Expands a string that is a 7 to 9 digits number into a list 207 * of English words. For example, "19000000" into nineteen million. 208 * 209 * @param numberString the string which is the number to expand 210 * @param wordRelation words are added to this Relation 211 */ 212 private static void expandBelow10DigitNumber(String numberString, 213 WordRelation wordRelation) { 214 expandLargeNumber(numberString, "million", 6, wordRelation); 215 } 216 217 218 /** 219 * Expands a string that is a 10 to 12 digits number into a list 220 * of English words. For example, "27000000000" into twenty-seven 221 * billion. 222 * 223 * @param numberString the string which is the number to expand 224 * @param wordRelation words are added to this Relation 225 */ 226 private static void expandBelow13DigitNumber(String numberString, 227 WordRelation wordRelation) { 228 expandLargeNumber(numberString, "billion", 9, wordRelation); 229 } 230 231 232 /** 233 * Expands a string that is a number longer than 3 digits into a list 234 * of English words. For example, "1000" into one thousand. 235 * 236 * @param numberString the string which is the number to expand 237 * @param order either "thousand", "million", or "billion" 238 * @param numberZeroes the number of zeroes, depending on the order, so 239 * its either 3, 6, or 9 240 * @param wordRelation words are added to this Relation 241 */ 242 private static void expandLargeNumber(String numberString, 243 String order, 244 int numberZeroes, 245 WordRelation wordRelation) { 246 int numberDigits = numberString.length(); 247 248 // parse out the prefix, e.g., "113" in "113,000" 249 int i = numberDigits - numberZeroes; 250 String part = numberString.substring(0, i); 251 252 // get how many thousands/millions/billions 253 Item oldTail = wordRelation.getTail(); 254 255 expandNumber(part, wordRelation); 256 257 if (wordRelation.getTail() == oldTail) { 258 expandNumberAt(numberString, i, wordRelation); 259 } else { 260 wordRelation.addWord(order); 261 expandNumberAt(numberString, i, wordRelation); 262 } 263 } 264 265 266 /** 267 * Returns the number string list of the given string starting at 268 * the given index. E.g., expandNumberAt("1100", 1) gives "one hundred" 269 * 270 * @param numberString the string which is the number to expand 271 * @param startIndex the starting position 272 * @param wordRelation words are added to this Relation 273 */ 274 private static void expandNumberAt(String numberString, 275 int startIndex, 276 WordRelation wordRelation) { 277 expandNumber(numberString.substring(startIndex,numberString.length()), 278 wordRelation); 279 } 280 281 282 /** 283 * Expands given token to list of words pronouncing it as digits 284 * 285 * @param numberString the string which is the number to expand 286 * @param wordRelation words are added to this Relation 287 */ 288 public static void expandDigits(String numberString, 289 WordRelation wordRelation) { 290 int numberDigits = numberString.length(); 291 for (int i = 0; i < numberDigits; i++) { 292 char digit = numberString.charAt(i); 293 if (isDigit(digit)) { 294 wordRelation.addWord(digit2num[numberString.charAt(i)-'0']); 295 } else { 296 wordRelation.addWord("umpty"); 297 } 298 } 299 } 300 301 302 /** 303 * Expands the digit string of an ordinal number. 304 * 305 * @param rawNumberString the string which is the number to expand 306 * @param wordRelation words are added to this Relation 307 */ 308 public static void expandOrdinal(String rawNumberString, 309 WordRelation wordRelation) { 310 // remove all ','s from the raw number string 311 String numberString = Utilities.deleteChar(rawNumberString, ','); 312 313 expandNumber(numberString, wordRelation); 314 315 // get the last in the list of number strings 316 Item lastItem = wordRelation.getTail(); 317 318 if (lastItem != null) { 319 320 FeatureSet featureSet = lastItem.getFeatures(); 321 String lastNumber = featureSet.getString("name"); 322 String ordinal = findMatchInArray(lastNumber, digit2num, ord2num); 323 324 if (ordinal == null) { 325 ordinal = findMatchInArray(lastNumber, digit2teen, ord2teen); 326 } 327 if (ordinal == null) { 328 ordinal = findMatchInArray(lastNumber, digit2enty, ord2enty); 329 } 330 331 if (lastNumber.equals("hundred")) { 332 ordinal = "hundredth"; 333 } else if (lastNumber.equals("thousand")) { 334 ordinal = "thousandth"; 335 } else if (lastNumber.equals("billion")) { 336 ordinal = "billionth"; 337 } 338 339 // if there was an ordinal, set the last element of the list 340 // to that ordinal; otherwise, don't do anything 341 if (ordinal != null) { 342 wordRelation.setLastWord(ordinal); 343 } 344 } 345 } 346 347 348 /** 349 * Finds a match of the given string in the given array, 350 * and returns the element at the same index in the returnInArray 351 * 352 * @param strToMatch the string to match 353 * @param matchInArray the source array 354 * @param returnInArray the return array 355 * 356 * @return an element in returnInArray, or <code>null</code> 357 * if a match is not found 358 */ 359 private static String findMatchInArray(String strToMatch, 360 String[] matchInArray, 361 String[] returnInArray) { 362 for (int i = 0; i < matchInArray.length; i++) { 363 if (strToMatch.equals(matchInArray[i])) { 364 if (i < returnInArray.length) { 365 return returnInArray[i]; 366 } else { 367 return null; 368 } 369 } 370 } 371 return null; 372 } 373 374 375 /** 376 * Expands the given number string as pairs as in years or IDs 377 * 378 * @param numberString the string which is the number to expand 379 * @param wordRelation words are added to this Relation 380 */ 381 public static void expandID(String numberString, WordRelation wordRelation) { 382 383 int numberDigits = numberString.length(); 384 385 if ((numberDigits == 4) && 386 (numberString.charAt(2) == '0') && 387 (numberString.charAt(3) == '0')) { 388 if (numberString.charAt(1) == '0') { // e.g. 2000, 3000 389 expandNumber(numberString, wordRelation); 390 } else { 391 expandNumber(numberString.substring(0,2), wordRelation); 392 wordRelation.addWord("hundred"); 393 } 394 } else if ((numberDigits == 2) && (numberString.charAt(0) == '0')) { 395 wordRelation.addWord("oh"); 396 expandDigits(numberString.substring(1,2), wordRelation); 397 } else if ((numberDigits == 4 && 398 numberString.charAt(1) == '0') || 399 numberDigits < 3) { 400 expandNumber(numberString, wordRelation); 401 } else if (numberDigits % 2 == 1) { 402 String firstDigit = digit2num[numberString.charAt(0)-'0']; 403 wordRelation.addWord(firstDigit); 404 expandID(numberString.substring(1,numberDigits), wordRelation); 405 } else { 406 expandNumber(numberString.substring(0,2), wordRelation); 407 expandID(numberString.substring(2,numberDigits), wordRelation); 408 } 409 } 410 411 412 /** 413 * Expands the given number string as a real number. 414 * 415 * @param numberString the string which is the real number to expand 416 * @param wordRelation words are added to this Relation 417 */ 418 public static void expandReal(String numberString, WordRelation wordRelation) { 419 420 int stringLength = numberString.length(); 421 int position; 422 423 if (numberString.charAt(0) == '-') { 424 // negative real numbers 425 wordRelation.addWord("minus"); 426 expandReal(numberString.substring(1, stringLength), wordRelation); 427 } else if (numberString.charAt(0) == '+') { 428 // prefixed with a '+' 429 wordRelation.addWord("plus"); 430 expandReal(numberString.substring(1, stringLength), wordRelation); 431 } else if ((position = numberString.indexOf('e')) != -1 || 432 (position = numberString.indexOf('E')) != -1) { 433 // numbers with 'E' or 'e' 434 expandReal(numberString.substring(0, position), wordRelation); 435 wordRelation.addWord("e"); 436 expandReal(numberString.substring(position + 1), wordRelation); 437 } else if ((position = numberString.indexOf('.')) != -1) { 438 // numbers with '.' 439 String beforeDot = numberString.substring(0, position); 440 if (beforeDot.length() > 0) { 441 expandReal(beforeDot, wordRelation); 442 } 443 wordRelation.addWord("point"); 444 String afterDot = numberString.substring(position + 1); 445 if (afterDot.length() > 0) { 446 expandDigits(afterDot, wordRelation); 447 } 448 } else { 449 // everything else 450 expandNumber(numberString, wordRelation); 451 } 452 } 453 454 455 /** 456 * Expands the given string of letters as a list of single char symbols. 457 * 458 * @param letters the string of letters to expand 459 * @param wordRelation words are added to this Relation 460 */ 461 public static void expandLetters(String letters, 462 WordRelation wordRelation) { 463 letters = letters.toLowerCase(); 464 char c; 465 466 for (int i = 0; i < letters.length(); i++) { 467 // if this is a number 468 c = letters.charAt(i); 469 470 if (isDigit(c)) { 471 wordRelation.addWord(digit2num[c-'0']); 472 } else if (letters.equals("a")) { 473 wordRelation.addWord("_a"); 474 } else { 475 wordRelation.addWord(String.valueOf(c)); 476 } 477 } 478 } 479 480 481 /** 482 * Returns the integer value of the given string of Roman numerals. 483 * 484 * @param roman the string of Roman numbers 485 * 486 * @return the integer value 487 */ 488 public static int expandRoman(String roman) { 489 int value = 0; 490 491 for (int p = 0; p < roman.length(); p++) { 492 char c = roman.charAt(p); 493 if (c == 'X') { 494 value += 10; 495 } else if (c == 'V') { 496 value += 5; 497 } else if (c == 'I') { 498 if (p+1 < roman.length()) { 499 char p1 = roman.charAt(p+1); 500 if (p1 == 'V') { 501 value += 4; 502 p++; 503 } else if (p1 == 'X') { 504 value += 9; 505 p++; 506 } else { 507 value += 1; 508 } 509 } else { 510 value += 1; 511 } 512 } 513 } 514 return value; 515 } 516 517 518 /** 519 * Returns true if the given character is a digit (0-9 only). 520 * 521 * @param ch the character to test 522 * 523 * @return true or false 524 */ 525 public static boolean isDigit(char ch) { 526 return ('0' <= ch && ch <= '9'); 527 } 528}