001/* 002 * $Id: ArabicLigaturizer.java 4784 2011-03-15 08:33:00Z blowagie $ 003 * 004 * This file is part of the iText (R) project. 005 * Copyright (c) 1998-2011 1T3XT BVBA 006 * Authors: Bruno Lowagie, Paulo Soares, et al. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Affero General Public License version 3 010 * as published by the Free Software Foundation with the addition of the 011 * following permission added to Section 15 as permitted in Section 7(a): 012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT, 013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS. 014 * 015 * This program is distributed in the hope that it will be useful, but 016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 017 * or FITNESS FOR A PARTICULAR PURPOSE. 018 * See the GNU Affero General Public License for more details. 019 * You should have received a copy of the GNU Affero General Public License 020 * along with this program; if not, see http://www.gnu.org/licenses or write to 021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 022 * Boston, MA, 02110-1301 USA, or download the license from the following URL: 023 * http://itextpdf.com/terms-of-use/ 024 * 025 * The interactive user interfaces in modified source and object code versions 026 * of this program must display Appropriate Legal Notices, as required under 027 * Section 5 of the GNU Affero General Public License. 028 * 029 * In accordance with Section 7(b) of the GNU Affero General Public License, 030 * a covered work must retain the producer line in every PDF that is created 031 * or manipulated using iText. 032 * 033 * You can be released from the requirements of the license by purchasing 034 * a commercial license. Buying such a license is mandatory as soon as you 035 * develop commercial activities involving the iText software without 036 * disclosing the source code of your own applications. 037 * These activities include: offering paid services to customers as an ASP, 038 * serving PDFs on the fly in a web application, shipping iText with a closed 039 * source product. 040 * 041 * For more information, please contact iText Software Corp. at this 042 * address: sales@itextpdf.com 043 */ 044package com.itextpdf.text.pdf; 045 046/** 047 * Shape arabic characters. This code was inspired by an LGPL'ed C library: 048 * Pango ( see http://www.pango.com/ ). Note that the code of this class is 049 * the original work of Paulo Soares. 050 * 051 * @author Paulo Soares 052 */ 053public class ArabicLigaturizer { 054 055 static boolean isVowel(char s) { 056 return ((s >= 0x064B) && (s <= 0x0655)) || (s == 0x0670); 057 } 058 059 static char charshape(char s, int which) 060 /* which 0=isolated 1=final 2=initial 3=medial */ 061 { 062 int l, r, m; 063 if ((s >= 0x0621) && (s <= 0x06D3)) { 064 l = 0; 065 r = chartable.length - 1; 066 while (l <= r) { 067 m = (l + r) / 2; 068 if (s == chartable[m][0]) { 069 return chartable[m][which + 1]; 070 } 071 else if (s < chartable[m][0]) { 072 r = m - 1; 073 } 074 else { 075 l = m + 1; 076 } 077 } 078 } 079 else if (s >= 0xfef5 && s <= 0xfefb) 080 return (char)(s + which); 081 return s; 082 } 083 084 static int shapecount(char s) { 085 int l, r, m; 086 if ((s >= 0x0621) && (s <= 0x06D3) && !isVowel(s)) { 087 l = 0; 088 r = chartable.length - 1; 089 while (l <= r) { 090 m = (l + r) / 2; 091 if (s == chartable[m][0]) { 092 return chartable[m].length - 1; 093 } 094 else if (s < chartable[m][0]) { 095 r = m - 1; 096 } 097 else { 098 l = m + 1; 099 } 100 } 101 } 102 else if (s == ZWJ) { 103 return 4; 104 } 105 return 1; 106 } 107 108 static int ligature(char newchar, charstruct oldchar) { 109 /* 0 == no ligature possible; 1 == vowel; 2 == two chars; 3 == Lam+Alef */ 110 int retval = 0; 111 112 if (oldchar.basechar == 0) 113 return 0; 114 if (isVowel(newchar)) { 115 retval = 1; 116 if ((oldchar.vowel != 0) && (newchar != SHADDA)) { 117 retval = 2; /* we eliminate the old vowel .. */ 118 } 119 switch (newchar) { 120 case SHADDA: 121 if (oldchar.mark1 == 0) { 122 oldchar.mark1 = SHADDA; 123 } 124 else { 125 return 0; /* no ligature possible */ 126 } 127 break; 128 case HAMZABELOW: 129 switch (oldchar.basechar) { 130 case ALEF: 131 oldchar.basechar = ALEFHAMZABELOW; 132 retval = 2; 133 break; 134 case LAM_ALEF: 135 oldchar.basechar = LAM_ALEFHAMZABELOW; 136 retval = 2; 137 break; 138 default: 139 oldchar.mark1 = HAMZABELOW; 140 break; 141 } 142 break; 143 case HAMZAABOVE: 144 switch (oldchar.basechar) { 145 case ALEF: 146 oldchar.basechar = ALEFHAMZA; 147 retval = 2; 148 break; 149 case LAM_ALEF: 150 oldchar.basechar = LAM_ALEFHAMZA; 151 retval = 2; 152 break; 153 case WAW: 154 oldchar.basechar = WAWHAMZA; 155 retval = 2; 156 break; 157 case YEH: 158 case ALEFMAKSURA: 159 case FARSIYEH: 160 oldchar.basechar = YEHHAMZA; 161 retval = 2; 162 break; 163 default: /* whatever sense this may make .. */ 164 oldchar.mark1 = HAMZAABOVE; 165 break; 166 } 167 break; 168 case MADDA: 169 switch (oldchar.basechar) { 170 case ALEF: 171 oldchar.basechar = ALEFMADDA; 172 retval = 2; 173 break; 174 } 175 break; 176 default: 177 oldchar.vowel = newchar; 178 break; 179 } 180 if (retval == 1) { 181 oldchar.lignum++; 182 } 183 return retval; 184 } 185 if (oldchar.vowel != 0) { /* if we already joined a vowel, we can't join a Hamza */ 186 return 0; 187 } 188 189 switch (oldchar.basechar) { 190 case LAM: 191 switch (newchar) { 192 case ALEF: 193 oldchar.basechar = LAM_ALEF; 194 oldchar.numshapes = 2; 195 retval = 3; 196 break; 197 case ALEFHAMZA: 198 oldchar.basechar = LAM_ALEFHAMZA; 199 oldchar.numshapes = 2; 200 retval = 3; 201 break; 202 case ALEFHAMZABELOW: 203 oldchar.basechar = LAM_ALEFHAMZABELOW; 204 oldchar.numshapes = 2; 205 retval = 3; 206 break; 207 case ALEFMADDA: 208 oldchar.basechar = LAM_ALEFMADDA; 209 oldchar.numshapes = 2; 210 retval = 3; 211 break; 212 } 213 break; 214 case 0: 215 oldchar.basechar = newchar; 216 oldchar.numshapes = shapecount(newchar); 217 retval = 1; 218 break; 219 } 220 return retval; 221 } 222 223 static void copycstostring(StringBuffer string, charstruct s, int level) { 224 /* s is a shaped charstruct; i is the index into the string */ 225 if (s.basechar == 0) 226 return; 227 228 string.append(s.basechar); 229 s.lignum--; 230 if (s.mark1 != 0) { 231 if ((level & ar_novowel) == 0) { 232 string.append(s.mark1); 233 s.lignum--; 234 } 235 else { 236 s.lignum--; 237 } 238 } 239 if (s.vowel != 0) { 240 if ((level & ar_novowel) == 0) { 241 string.append(s.vowel); 242 s.lignum--; 243 } 244 else { /* vowel elimination */ 245 s.lignum--; 246 } 247 } 248// while (s.lignum > 0) { /* NULL-insertion for Langbox-font */ 249// string[i] = 0; 250// i++; 251// (s.lignum)--; 252// } 253// return i; 254 } 255 256 // return len 257 static void doublelig(StringBuffer string, int level) 258 /* Ok. We have presentation ligatures in our font. */ 259 { 260 int len; 261 int olen = len = string.length(); 262 int j = 0, si = 1; 263 char lapresult; 264 265 while (si < olen) { 266 lapresult = 0; 267 if ((level & ar_composedtashkeel) != 0) { 268 switch (string.charAt(j)) { 269 case SHADDA: 270 switch (string.charAt(si)) { 271 case KASRA: 272 lapresult = 0xFC62; 273 break; 274 case FATHA: 275 lapresult = 0xFC60; 276 break; 277 case DAMMA: 278 lapresult = 0xFC61; 279 break; 280 case 0x064C: 281 lapresult = 0xFC5E; 282 break; 283 case 0x064D: 284 lapresult = 0xFC5F; 285 break; 286 } 287 break; 288 case KASRA: 289 if (string.charAt(si) == SHADDA) 290 lapresult = 0xFC62; 291 break; 292 case FATHA: 293 if (string.charAt(si) == SHADDA) 294 lapresult = 0xFC60; 295 break; 296 case DAMMA: 297 if (string.charAt(si) == SHADDA) 298 lapresult = 0xFC61; 299 break; 300 } 301 } 302 303 if ((level & ar_lig) != 0) { 304 switch (string.charAt(j)) { 305 case 0xFEDF: /* LAM initial */ 306 switch (string.charAt(si)) { 307 case 0xFE9E: 308 lapresult = 0xFC3F; 309 break; /* JEEM final */ 310 case 0xFEA0: 311 lapresult = 0xFCC9; 312 break; /* JEEM medial */ 313 case 0xFEA2: 314 lapresult = 0xFC40; 315 break; /* HAH final */ 316 case 0xFEA4: 317 lapresult = 0xFCCA; 318 break; /* HAH medial */ 319 case 0xFEA6: 320 lapresult = 0xFC41; 321 break; /* KHAH final */ 322 case 0xFEA8: 323 lapresult = 0xFCCB; 324 break; /* KHAH medial */ 325 case 0xFEE2: 326 lapresult = 0xFC42; 327 break; /* MEEM final */ 328 case 0xFEE4: 329 lapresult = 0xFCCC; 330 break; /* MEEM medial */ 331 } 332 break; 333 case 0xFE97: /* TEH inital */ 334 switch (string.charAt(si)) { 335 case 0xFEA0: 336 lapresult = 0xFCA1; 337 break; /* JEEM medial */ 338 case 0xFEA4: 339 lapresult = 0xFCA2; 340 break; /* HAH medial */ 341 case 0xFEA8: 342 lapresult = 0xFCA3; 343 break; /* KHAH medial */ 344 } 345 break; 346 case 0xFE91: /* BEH inital */ 347 switch (string.charAt(si)) { 348 case 0xFEA0: 349 lapresult = 0xFC9C; 350 break; /* JEEM medial */ 351 case 0xFEA4: 352 lapresult = 0xFC9D; 353 break; /* HAH medial */ 354 case 0xFEA8: 355 lapresult = 0xFC9E; 356 break; /* KHAH medial */ 357 } 358 break; 359 case 0xFEE7: /* NOON inital */ 360 switch (string.charAt(si)) { 361 case 0xFEA0: 362 lapresult = 0xFCD2; 363 break; /* JEEM initial */ 364 case 0xFEA4: 365 lapresult = 0xFCD3; 366 break; /* HAH medial */ 367 case 0xFEA8: 368 lapresult = 0xFCD4; 369 break; /* KHAH medial */ 370 } 371 break; 372 373 case 0xFEE8: /* NOON medial */ 374 switch (string.charAt(si)) { 375 case 0xFEAE: 376 lapresult = 0xFC8A; 377 break; /* REH final */ 378 case 0xFEB0: 379 lapresult = 0xFC8B; 380 break; /* ZAIN final */ 381 } 382 break; 383 case 0xFEE3: /* MEEM initial */ 384 switch (string.charAt(si)) { 385 case 0xFEA0: 386 lapresult = 0xFCCE; 387 break; /* JEEM medial */ 388 case 0xFEA4: 389 lapresult = 0xFCCF; 390 break; /* HAH medial */ 391 case 0xFEA8: 392 lapresult = 0xFCD0; 393 break; /* KHAH medial */ 394 case 0xFEE4: 395 lapresult = 0xFCD1; 396 break; /* MEEM medial */ 397 } 398 break; 399 400 case 0xFED3: /* FEH initial */ 401 switch (string.charAt(si)) { 402 case 0xFEF2: 403 lapresult = 0xFC32; 404 break; /* YEH final */ 405 } 406 break; 407 408 default: 409 break; 410 } /* end switch string[si] */ 411 } 412 if (lapresult != 0) { 413 string.setCharAt(j, lapresult); 414 len--; 415 si++; /* jump over one character */ 416 /* we'll have to change this, too. */ 417 } 418 else { 419 j++; 420 string.setCharAt(j, string.charAt(si)); 421 si++; 422 } 423 } 424 string.setLength(len); 425 } 426 427 static boolean connects_to_left(charstruct a) { 428 return a.numshapes > 2; 429 } 430 431 static void shape(char text[], StringBuffer string, int level) { 432 /* string is assumed to be empty and big enough. 433 * text is the original text. 434 * This routine does the basic arabic reshaping. 435 * *len the number of non-null characters. 436 * 437 * Note: We have to unshape each character first! 438 */ 439 int join; 440 int which; 441 char nextletter; 442 443 int p = 0; /* initialize for output */ 444 charstruct oldchar = new charstruct(); 445 charstruct curchar = new charstruct(); 446 while (p < text.length) { 447 nextletter = text[p++]; 448 //nextletter = unshape (nextletter); 449 450 join = ligature(nextletter, curchar); 451 if (join == 0) { /* shape curchar */ 452 int nc = shapecount(nextletter); 453 //(*len)++; 454 if (nc == 1) { 455 which = 0; /* final or isolated */ 456 } 457 else { 458 which = 2; /* medial or initial */ 459 } 460 if (connects_to_left(oldchar)) { 461 which++; 462 } 463 464 which = which % (curchar.numshapes); 465 curchar.basechar = charshape(curchar.basechar, which); 466 467 /* get rid of oldchar */ 468 copycstostring(string, oldchar, level); 469 oldchar = curchar; /* new values in oldchar */ 470 471 /* init new curchar */ 472 curchar = new charstruct(); 473 curchar.basechar = nextletter; 474 curchar.numshapes = nc; 475 curchar.lignum++; 476 // (*len) += unligature (&curchar, level); 477 } 478 else if (join == 1) { 479 } 480 // else 481 // { 482 // (*len) += unligature (&curchar, level); 483 // } 484 // p = g_utf8_next_char (p); 485 } 486 487 /* Handle last char */ 488 if (connects_to_left(oldchar)) 489 which = 1; 490 else 491 which = 0; 492 which = which % (curchar.numshapes); 493 curchar.basechar = charshape(curchar.basechar, which); 494 495 /* get rid of oldchar */ 496 copycstostring(string, oldchar, level); 497 copycstostring(string, curchar, level); 498 } 499 500 static int arabic_shape(char src[], int srcoffset, int srclength, char dest[], int destoffset, int destlength, int level) { 501 char str[] = new char[srclength]; 502 for (int k = srclength + srcoffset - 1; k >= srcoffset; --k) 503 str[k - srcoffset] = src[k]; 504 StringBuffer string = new StringBuffer(srclength); 505 shape(str, string, level); 506 if ((level & (ar_composedtashkeel | ar_lig)) != 0) 507 doublelig(string, level); 508// string.reverse(); 509 System.arraycopy(string.toString().toCharArray(), 0, dest, destoffset, string.length()); 510 return string.length(); 511 } 512 513 static void processNumbers(char text[], int offset, int length, int options) { 514 int limit = offset + length; 515 if ((options & DIGITS_MASK) != 0) { 516 char digitBase = '\u0030'; // European digits 517 switch (options & DIGIT_TYPE_MASK) { 518 case DIGIT_TYPE_AN: 519 digitBase = '\u0660'; // Arabic-Indic digits 520 break; 521 522 case DIGIT_TYPE_AN_EXTENDED: 523 digitBase = '\u06f0'; // Eastern Arabic-Indic digits (Persian and Urdu) 524 break; 525 526 default: 527 break; 528 } 529 530 switch (options & DIGITS_MASK) { 531 case DIGITS_EN2AN: { 532 int digitDelta = digitBase - '\u0030'; 533 for (int i = offset; i < limit; ++i) { 534 char ch = text[i]; 535 if (ch <= '\u0039' && ch >= '\u0030') { 536 text[i] += digitDelta; 537 } 538 } 539 } 540 break; 541 542 case DIGITS_AN2EN: { 543 char digitTop = (char)(digitBase + 9); 544 int digitDelta = '\u0030' - digitBase; 545 for (int i = offset; i < limit; ++i) { 546 char ch = text[i]; 547 if (ch <= digitTop && ch >= digitBase) { 548 text[i] += digitDelta; 549 } 550 } 551 } 552 break; 553 554 case DIGITS_EN2AN_INIT_LR: 555 shapeToArabicDigitsWithContext(text, 0, length, digitBase, false); 556 break; 557 558 case DIGITS_EN2AN_INIT_AL: 559 shapeToArabicDigitsWithContext(text, 0, length, digitBase, true); 560 break; 561 562 default: 563 break; 564 } 565 } 566 } 567 568 static void shapeToArabicDigitsWithContext(char[] dest, int start, int length, char digitBase, boolean lastStrongWasAL) { 569 digitBase -= '0'; // move common adjustment out of loop 570 571 int limit = start + length; 572 for(int i = start; i < limit; ++i) { 573 char ch = dest[i]; 574 switch (BidiOrder.getDirection(ch)) { 575 case BidiOrder.L: 576 case BidiOrder.R: 577 lastStrongWasAL = false; 578 break; 579 case BidiOrder.AL: 580 lastStrongWasAL = true; 581 break; 582 case BidiOrder.EN: 583 if (lastStrongWasAL && ch <= '\u0039') { 584 dest[i] = (char)(ch + digitBase); 585 } 586 break; 587 default: 588 break; 589 } 590 } 591 } 592 593 private static final char ALEF = 0x0627; 594 private static final char ALEFHAMZA = 0x0623; 595 private static final char ALEFHAMZABELOW = 0x0625; 596 private static final char ALEFMADDA = 0x0622; 597 private static final char LAM = 0x0644; 598 private static final char HAMZA = 0x0621; 599 private static final char TATWEEL = 0x0640; 600 private static final char ZWJ = 0x200D; 601 602 private static final char HAMZAABOVE = 0x0654; 603 private static final char HAMZABELOW = 0x0655; 604 605 private static final char WAWHAMZA = 0x0624; 606 private static final char YEHHAMZA = 0x0626; 607 private static final char WAW = 0x0648; 608 private static final char ALEFMAKSURA = 0x0649; 609 private static final char YEH = 0x064A; 610 private static final char FARSIYEH = 0x06CC; 611 612 private static final char SHADDA = 0x0651; 613 private static final char KASRA = 0x0650; 614 private static final char FATHA = 0x064E; 615 private static final char DAMMA = 0x064F; 616 private static final char MADDA = 0x0653; 617 618 private static final char LAM_ALEF = 0xFEFB; 619 private static final char LAM_ALEFHAMZA = 0xFEF7; 620 private static final char LAM_ALEFHAMZABELOW = 0xFEF9; 621 private static final char LAM_ALEFMADDA = 0xFEF5; 622 623 private static final char chartable[][] = { 624 {0x0621, 0xFE80}, /* HAMZA */ 625 {0x0622, 0xFE81, 0xFE82}, /* ALEF WITH MADDA ABOVE */ 626 {0x0623, 0xFE83, 0xFE84}, /* ALEF WITH HAMZA ABOVE */ 627 {0x0624, 0xFE85, 0xFE86}, /* WAW WITH HAMZA ABOVE */ 628 {0x0625, 0xFE87, 0xFE88}, /* ALEF WITH HAMZA BELOW */ 629 {0x0626, 0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}, /* YEH WITH HAMZA ABOVE */ 630 {0x0627, 0xFE8D, 0xFE8E}, /* ALEF */ 631 {0x0628, 0xFE8F, 0xFE90, 0xFE91, 0xFE92}, /* BEH */ 632 {0x0629, 0xFE93, 0xFE94}, /* TEH MARBUTA */ 633 {0x062A, 0xFE95, 0xFE96, 0xFE97, 0xFE98}, /* TEH */ 634 {0x062B, 0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}, /* THEH */ 635 {0x062C, 0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}, /* JEEM */ 636 {0x062D, 0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}, /* HAH */ 637 {0x062E, 0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}, /* KHAH */ 638 {0x062F, 0xFEA9, 0xFEAA}, /* DAL */ 639 {0x0630, 0xFEAB, 0xFEAC}, /* THAL */ 640 {0x0631, 0xFEAD, 0xFEAE}, /* REH */ 641 {0x0632, 0xFEAF, 0xFEB0}, /* ZAIN */ 642 {0x0633, 0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}, /* SEEN */ 643 {0x0634, 0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}, /* SHEEN */ 644 {0x0635, 0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}, /* SAD */ 645 {0x0636, 0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}, /* DAD */ 646 {0x0637, 0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4}, /* TAH */ 647 {0x0638, 0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}, /* ZAH */ 648 {0x0639, 0xFEC9, 0xFECA, 0xFECB, 0xFECC}, /* AIN */ 649 {0x063A, 0xFECD, 0xFECE, 0xFECF, 0xFED0}, /* GHAIN */ 650 {0x0640, 0x0640, 0x0640, 0x0640, 0x0640}, /* TATWEEL */ 651 {0x0641, 0xFED1, 0xFED2, 0xFED3, 0xFED4}, /* FEH */ 652 {0x0642, 0xFED5, 0xFED6, 0xFED7, 0xFED8}, /* QAF */ 653 {0x0643, 0xFED9, 0xFEDA, 0xFEDB, 0xFEDC}, /* KAF */ 654 {0x0644, 0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}, /* LAM */ 655 {0x0645, 0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4}, /* MEEM */ 656 {0x0646, 0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}, /* NOON */ 657 {0x0647, 0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC}, /* HEH */ 658 {0x0648, 0xFEED, 0xFEEE}, /* WAW */ 659 {0x0649, 0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9}, /* ALEF MAKSURA */ 660 {0x064A, 0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4}, /* YEH */ 661 {0x0671, 0xFB50, 0xFB51}, /* ALEF WASLA */ 662 {0x0679, 0xFB66, 0xFB67, 0xFB68, 0xFB69}, /* TTEH */ 663 {0x067A, 0xFB5E, 0xFB5F, 0xFB60, 0xFB61}, /* TTEHEH */ 664 {0x067B, 0xFB52, 0xFB53, 0xFB54, 0xFB55}, /* BEEH */ 665 {0x067E, 0xFB56, 0xFB57, 0xFB58, 0xFB59}, /* PEH */ 666 {0x067F, 0xFB62, 0xFB63, 0xFB64, 0xFB65}, /* TEHEH */ 667 {0x0680, 0xFB5A, 0xFB5B, 0xFB5C, 0xFB5D}, /* BEHEH */ 668 {0x0683, 0xFB76, 0xFB77, 0xFB78, 0xFB79}, /* NYEH */ 669 {0x0684, 0xFB72, 0xFB73, 0xFB74, 0xFB75}, /* DYEH */ 670 {0x0686, 0xFB7A, 0xFB7B, 0xFB7C, 0xFB7D}, /* TCHEH */ 671 {0x0687, 0xFB7E, 0xFB7F, 0xFB80, 0xFB81}, /* TCHEHEH */ 672 {0x0688, 0xFB88, 0xFB89}, /* DDAL */ 673 {0x068C, 0xFB84, 0xFB85}, /* DAHAL */ 674 {0x068D, 0xFB82, 0xFB83}, /* DDAHAL */ 675 {0x068E, 0xFB86, 0xFB87}, /* DUL */ 676 {0x0691, 0xFB8C, 0xFB8D}, /* RREH */ 677 {0x0698, 0xFB8A, 0xFB8B}, /* JEH */ 678 {0x06A4, 0xFB6A, 0xFB6B, 0xFB6C, 0xFB6D}, /* VEH */ 679 {0x06A6, 0xFB6E, 0xFB6F, 0xFB70, 0xFB71}, /* PEHEH */ 680 {0x06A9, 0xFB8E, 0xFB8F, 0xFB90, 0xFB91}, /* KEHEH */ 681 {0x06AD, 0xFBD3, 0xFBD4, 0xFBD5, 0xFBD6}, /* NG */ 682 {0x06AF, 0xFB92, 0xFB93, 0xFB94, 0xFB95}, /* GAF */ 683 {0x06B1, 0xFB9A, 0xFB9B, 0xFB9C, 0xFB9D}, /* NGOEH */ 684 {0x06B3, 0xFB96, 0xFB97, 0xFB98, 0xFB99}, /* GUEH */ 685 {0x06BA, 0xFB9E, 0xFB9F}, /* NOON GHUNNA */ 686 {0x06BB, 0xFBA0, 0xFBA1, 0xFBA2, 0xFBA3}, /* RNOON */ 687 {0x06BE, 0xFBAA, 0xFBAB, 0xFBAC, 0xFBAD}, /* HEH DOACHASHMEE */ 688 {0x06C0, 0xFBA4, 0xFBA5}, /* HEH WITH YEH ABOVE */ 689 {0x06C1, 0xFBA6, 0xFBA7, 0xFBA8, 0xFBA9}, /* HEH GOAL */ 690 {0x06C5, 0xFBE0, 0xFBE1}, /* KIRGHIZ OE */ 691 {0x06C6, 0xFBD9, 0xFBDA}, /* OE */ 692 {0x06C7, 0xFBD7, 0xFBD8}, /* U */ 693 {0x06C8, 0xFBDB, 0xFBDC}, /* YU */ 694 {0x06C9, 0xFBE2, 0xFBE3}, /* KIRGHIZ YU */ 695 {0x06CB, 0xFBDE, 0xFBDF}, /* VE */ 696 {0x06CC, 0xFBFC, 0xFBFD, 0xFBFE, 0xFBFF}, /* FARSI YEH */ 697 {0x06D0, 0xFBE4, 0xFBE5, 0xFBE6, 0xFBE7}, /* E */ 698 {0x06D2, 0xFBAE, 0xFBAF}, /* YEH BARREE */ 699 {0x06D3, 0xFBB0, 0xFBB1} /* YEH BARREE WITH HAMZA ABOVE */ 700 }; 701 702 public static final int ar_nothing = 0x0; 703 public static final int ar_novowel = 0x1; 704 public static final int ar_composedtashkeel = 0x4; 705 public static final int ar_lig = 0x8; 706 /** 707 * Digit shaping option: Replace European digits (U+0030...U+0039) by Arabic-Indic digits. 708 */ 709 public static final int DIGITS_EN2AN = 0x20; 710 711 /** 712 * Digit shaping option: Replace Arabic-Indic digits by European digits (U+0030...U+0039). 713 */ 714 public static final int DIGITS_AN2EN = 0x40; 715 716 /** 717 * Digit shaping option: 718 * Replace European digits (U+0030...U+0039) by Arabic-Indic digits 719 * if the most recent strongly directional character 720 * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC). 721 * The initial state at the start of the text is assumed to be not an Arabic, 722 * letter, so European digits at the start of the text will not change. 723 * Compare to DIGITS_ALEN2AN_INIT_AL. 724 */ 725 public static final int DIGITS_EN2AN_INIT_LR = 0x60; 726 727 /** 728 * Digit shaping option: 729 * Replace European digits (U+0030...U+0039) by Arabic-Indic digits 730 * if the most recent strongly directional character 731 * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC). 732 * The initial state at the start of the text is assumed to be an Arabic, 733 * letter, so European digits at the start of the text will change. 734 * Compare to DIGITS_ALEN2AN_INT_LR. 735 */ 736 public static final int DIGITS_EN2AN_INIT_AL = 0x80; 737 738 /** Not a valid option value. */ 739 private static final int DIGITS_RESERVED = 0xa0; 740 741 /** 742 * Bit mask for digit shaping options. 743 */ 744 public static final int DIGITS_MASK = 0xe0; 745 746 /** 747 * Digit type option: Use Arabic-Indic digits (U+0660...U+0669). 748 */ 749 public static final int DIGIT_TYPE_AN = 0; 750 751 /** 752 * Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). 753 */ 754 public static final int DIGIT_TYPE_AN_EXTENDED = 0x100; 755 756 /** 757 * Bit mask for digit type options. 758 */ 759 public static final int DIGIT_TYPE_MASK = 0x0100; // 0x3f00? 760 761 static class charstruct { 762 char basechar; 763 char mark1; /* has to be initialized to zero */ 764 char vowel; 765 int lignum; /* is a ligature with lignum aditional characters */ 766 int numshapes = 1; 767 }; 768 769 770}