001/*
002 * $Id: ArabicLigaturizer.java 4784 2011-03-15 08:33:00Z blowagie $
003 *
004 * This file is part of the iText (R) project.
005 * Copyright (c) 1998-2011 1T3XT BVBA
006 * Authors: Bruno Lowagie, Paulo Soares, et al.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU Affero General Public License version 3
010 * as published by the Free Software Foundation with the addition of the
011 * following permission added to Section 15 as permitted in Section 7(a):
012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT,
013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS.
014 *
015 * This program is distributed in the hope that it will be useful, but
016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
017 * or FITNESS FOR A PARTICULAR PURPOSE.
018 * See the GNU Affero General Public License for more details.
019 * You should have received a copy of the GNU Affero General Public License
020 * along with this program; if not, see http://www.gnu.org/licenses or write to
021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
022 * Boston, MA, 02110-1301 USA, or download the license from the following URL:
023 * http://itextpdf.com/terms-of-use/
024 *
025 * The interactive user interfaces in modified source and object code versions
026 * of this program must display Appropriate Legal Notices, as required under
027 * Section 5 of the GNU Affero General Public License.
028 *
029 * In accordance with Section 7(b) of the GNU Affero General Public License,
030 * a covered work must retain the producer line in every PDF that is created
031 * or manipulated using iText.
032 *
033 * You can be released from the requirements of the license by purchasing
034 * a commercial license. Buying such a license is mandatory as soon as you
035 * develop commercial activities involving the iText software without
036 * disclosing the source code of your own applications.
037 * These activities include: offering paid services to customers as an ASP,
038 * serving PDFs on the fly in a web application, shipping iText with a closed
039 * source product.
040 *
041 * For more information, please contact iText Software Corp. at this
042 * address: sales@itextpdf.com
043 */
044package com.itextpdf.text.pdf;
045
046/**
047 * Shape arabic characters. This code was inspired by an LGPL'ed C library:
048 * Pango ( see http://www.pango.com/ ). Note that the code of this class is
049 * the original work of Paulo Soares.
050 *
051 * @author Paulo Soares
052 */
053public class ArabicLigaturizer {
054    
055    static boolean isVowel(char s) {
056        return ((s >= 0x064B) && (s <= 0x0655)) || (s == 0x0670);
057    }
058
059    static char charshape(char s, int which)
060    /* which 0=isolated 1=final 2=initial 3=medial */
061    {
062        int l, r, m;
063        if ((s >= 0x0621) && (s <= 0x06D3)) {
064            l = 0;
065            r = chartable.length - 1;
066            while (l <= r) {
067                m = (l + r) / 2;
068                if (s == chartable[m][0]) {
069                    return chartable[m][which + 1];
070                }
071                else if (s < chartable[m][0]) {
072                    r = m - 1;
073                }
074                else {
075                    l = m + 1;
076                }
077            }
078        }
079        else if (s >= 0xfef5 && s <= 0xfefb)
080            return (char)(s + which);
081        return s;
082    }
083
084    static int shapecount(char s) {
085        int l, r, m;
086        if ((s >= 0x0621) && (s <= 0x06D3) && !isVowel(s)) {
087            l = 0;
088            r = chartable.length - 1;
089            while (l <= r) {
090                m = (l + r) / 2;
091                if (s == chartable[m][0]) {
092                    return chartable[m].length - 1;
093                }
094                else if (s < chartable[m][0]) {
095                    r = m - 1;
096                }
097                else {
098                    l = m + 1;
099                }
100            }
101        }
102        else if (s == ZWJ) {
103            return 4;
104        }
105        return 1;
106    }
107    
108    static int ligature(char newchar, charstruct oldchar) {
109    /* 0 == no ligature possible; 1 == vowel; 2 == two chars; 3 == Lam+Alef */
110        int retval = 0;
111        
112        if (oldchar.basechar == 0)
113            return 0;
114        if (isVowel(newchar)) {
115            retval = 1;
116            if ((oldchar.vowel != 0) && (newchar != SHADDA)) {
117                retval = 2;           /* we eliminate the old vowel .. */
118            }
119            switch (newchar) {
120                case SHADDA:
121                    if (oldchar.mark1 == 0) {
122                        oldchar.mark1 = SHADDA;
123                    }
124                    else {
125                        return 0;         /* no ligature possible */
126                    }
127                    break;
128                case HAMZABELOW:
129                    switch (oldchar.basechar) {
130                        case ALEF:
131                            oldchar.basechar = ALEFHAMZABELOW;
132                            retval = 2;
133                            break;
134                        case LAM_ALEF:
135                            oldchar.basechar = LAM_ALEFHAMZABELOW;
136                            retval = 2;
137                            break;
138                        default:
139                            oldchar.mark1 = HAMZABELOW;
140                            break;
141                    }
142                    break;
143                case HAMZAABOVE:
144                    switch (oldchar.basechar) {
145                        case ALEF:
146                            oldchar.basechar = ALEFHAMZA;
147                            retval = 2;
148                            break;
149                        case LAM_ALEF:
150                            oldchar.basechar = LAM_ALEFHAMZA;
151                            retval = 2;
152                            break;
153                        case WAW:
154                            oldchar.basechar = WAWHAMZA;
155                            retval = 2;
156                            break;
157                        case YEH:
158                        case ALEFMAKSURA:
159                        case FARSIYEH:
160                            oldchar.basechar = YEHHAMZA;
161                            retval = 2;
162                            break;
163                        default:           /* whatever sense this may make .. */
164                            oldchar.mark1 = HAMZAABOVE;
165                            break;
166                    }
167                    break;
168                case MADDA:
169                    switch (oldchar.basechar) {
170                        case ALEF:
171                            oldchar.basechar = ALEFMADDA;
172                            retval = 2;
173                            break;
174                    }
175                    break;
176                default:
177                    oldchar.vowel = newchar;
178                    break;
179            }
180            if (retval == 1) {
181                oldchar.lignum++;
182            }
183            return retval;
184        }
185        if (oldchar.vowel != 0) {  /* if we already joined a vowel, we can't join a Hamza */
186            return 0;
187        }
188        
189        switch (oldchar.basechar) {
190            case LAM:
191                switch (newchar) {
192                    case ALEF:
193                        oldchar.basechar = LAM_ALEF;
194                        oldchar.numshapes = 2;
195                        retval = 3;
196                        break;
197                    case ALEFHAMZA:
198                        oldchar.basechar = LAM_ALEFHAMZA;
199                        oldchar.numshapes = 2;
200                        retval = 3;
201                        break;
202                    case ALEFHAMZABELOW:
203                        oldchar.basechar = LAM_ALEFHAMZABELOW;
204                        oldchar.numshapes = 2;
205                        retval = 3;
206                        break;
207                    case ALEFMADDA:
208                        oldchar.basechar = LAM_ALEFMADDA;
209                        oldchar.numshapes = 2;
210                        retval = 3;
211                        break;
212                }
213                break;
214            case 0:
215                oldchar.basechar = newchar;
216                oldchar.numshapes = shapecount(newchar);
217                retval = 1;
218                break;
219        }
220        return retval;
221    }
222    
223    static void copycstostring(StringBuffer string, charstruct s, int level) {
224    /* s is a shaped charstruct; i is the index into the string */
225        if (s.basechar == 0)
226            return;
227        
228        string.append(s.basechar);
229        s.lignum--;
230        if (s.mark1 != 0) {
231            if ((level & ar_novowel) == 0) {
232                string.append(s.mark1);
233                s.lignum--;
234            }
235            else {
236                s.lignum--;
237            }
238        }
239        if (s.vowel != 0) {
240            if ((level & ar_novowel) == 0) {
241                string.append(s.vowel);
242                s.lignum--;
243            }
244            else {                       /* vowel elimination */
245                s.lignum--;
246            }
247        }
248//        while (s.lignum > 0) {                           /* NULL-insertion for Langbox-font */
249//            string[i] = 0;
250//            i++;
251//            (s.lignum)--;
252//        }
253//        return i;
254    }
255
256    // return len
257    static void doublelig(StringBuffer string, int level)
258    /* Ok. We have presentation ligatures in our font. */
259    {
260        int len;
261        int olen = len = string.length();
262        int j = 0, si = 1;
263        char lapresult;
264        
265        while (si < olen) {
266            lapresult = 0;
267            if ((level & ar_composedtashkeel) != 0) {
268                switch (string.charAt(j)) {
269                    case SHADDA:
270                        switch (string.charAt(si)) {
271                            case KASRA:
272                                lapresult = 0xFC62;
273                                break;
274                            case FATHA:
275                                lapresult = 0xFC60;
276                                break;
277                            case DAMMA:
278                                lapresult = 0xFC61;
279                                break;
280                            case 0x064C:
281                                lapresult = 0xFC5E;
282                                break;
283                            case 0x064D:
284                                lapresult = 0xFC5F;
285                                break;
286                        }
287                        break;
288                    case KASRA:
289                        if (string.charAt(si) == SHADDA)
290                            lapresult = 0xFC62;
291                        break;
292                    case FATHA:
293                        if (string.charAt(si) == SHADDA)
294                            lapresult = 0xFC60;
295                        break;
296                    case DAMMA:
297                        if (string.charAt(si) == SHADDA)
298                            lapresult = 0xFC61;
299                        break;
300                }
301            }
302            
303            if ((level & ar_lig) != 0) {
304                switch (string.charAt(j)) {
305                    case 0xFEDF:       /* LAM initial */
306                        switch (string.charAt(si)) {
307                            case 0xFE9E:
308                                lapresult = 0xFC3F;
309                                break;        /* JEEM final */
310                            case 0xFEA0:
311                                lapresult = 0xFCC9;
312                                break;        /* JEEM medial */
313                            case 0xFEA2:
314                                lapresult = 0xFC40;
315                                break;        /* HAH final */
316                            case 0xFEA4:
317                                lapresult = 0xFCCA;
318                                break;        /* HAH medial */
319                            case 0xFEA6:
320                                lapresult = 0xFC41;
321                                break;        /* KHAH final */
322                            case 0xFEA8:
323                                lapresult = 0xFCCB;
324                                break;        /* KHAH medial */
325                            case 0xFEE2:
326                                lapresult = 0xFC42;
327                                break;        /* MEEM final */
328                            case 0xFEE4:
329                                lapresult = 0xFCCC;
330                                break;        /* MEEM medial */
331                        }
332                        break;
333                    case 0xFE97:       /* TEH inital */
334                        switch (string.charAt(si)) {
335                            case 0xFEA0:
336                                lapresult = 0xFCA1;
337                                break;        /* JEEM medial */
338                            case 0xFEA4:
339                                lapresult = 0xFCA2;
340                                break;        /* HAH medial */
341                            case 0xFEA8:
342                                lapresult = 0xFCA3;
343                                break;        /* KHAH medial */
344                        }
345                        break;
346                    case 0xFE91:       /* BEH inital */
347                        switch (string.charAt(si)) {
348                            case 0xFEA0:
349                                lapresult = 0xFC9C;
350                                break;        /* JEEM medial */
351                            case 0xFEA4:
352                                lapresult = 0xFC9D;
353                                break;        /* HAH medial */
354                            case 0xFEA8:
355                                lapresult = 0xFC9E;
356                                break;        /* KHAH medial */
357                        }
358                        break;
359                    case 0xFEE7:       /* NOON inital */
360                        switch (string.charAt(si)) {
361                            case 0xFEA0:
362                                lapresult = 0xFCD2;
363                                break;        /* JEEM initial */
364                            case 0xFEA4:
365                                lapresult = 0xFCD3;
366                                break;        /* HAH medial */
367                            case 0xFEA8:
368                                lapresult = 0xFCD4;
369                                break;        /* KHAH medial */
370                        }
371                        break;
372                        
373                    case 0xFEE8:       /* NOON medial */
374                        switch (string.charAt(si)) {
375                            case 0xFEAE:
376                                lapresult = 0xFC8A;
377                                break;        /* REH final  */
378                            case 0xFEB0:
379                                lapresult = 0xFC8B;
380                                break;        /* ZAIN final */
381                        }
382                        break;
383                    case 0xFEE3:       /* MEEM initial */
384                        switch (string.charAt(si)) {
385                            case 0xFEA0:
386                                lapresult = 0xFCCE;
387                                break;        /* JEEM medial */
388                            case 0xFEA4:
389                                lapresult = 0xFCCF;
390                                break;        /* HAH medial */
391                            case 0xFEA8:
392                                lapresult = 0xFCD0;
393                                break;        /* KHAH medial */
394                            case 0xFEE4:
395                                lapresult = 0xFCD1;
396                                break;        /* MEEM medial */
397                        }
398                        break;
399                        
400                    case 0xFED3:       /* FEH initial */
401                        switch (string.charAt(si)) {
402                            case 0xFEF2:
403                                lapresult = 0xFC32;
404                                break;        /* YEH final */
405                        }
406                        break;
407                        
408                    default:
409                        break;
410                }                   /* end switch string[si] */
411            }
412            if (lapresult != 0) {
413                string.setCharAt(j, lapresult);
414                len--;
415                si++;                 /* jump over one character */
416                /* we'll have to change this, too. */
417            }
418            else {
419                j++;
420                string.setCharAt(j, string.charAt(si));
421                si++;
422            }
423        }
424        string.setLength(len);
425    }
426
427    static boolean connects_to_left(charstruct a) {
428        return a.numshapes > 2;
429    }
430    
431    static void shape(char text[], StringBuffer string, int level) {
432  /* string is assumed to be empty and big enough.
433   * text is the original text.
434   * This routine does the basic arabic reshaping.
435   * *len the number of non-null characters.
436   *
437   * Note: We have to unshape each character first!
438   */
439        int join;
440        int which;
441        char nextletter;
442        
443        int p = 0;                     /* initialize for output */
444        charstruct oldchar = new charstruct();
445        charstruct curchar = new charstruct();
446        while (p < text.length) {
447            nextletter = text[p++];
448            //nextletter = unshape (nextletter);
449            
450            join = ligature(nextletter, curchar);
451            if (join == 0) {                       /* shape curchar */
452                int nc = shapecount(nextletter);
453                //(*len)++;
454                if (nc == 1) {
455                    which = 0;        /* final or isolated */
456                }
457                else {
458                    which = 2;        /* medial or initial */
459                }
460                if (connects_to_left(oldchar)) {
461                    which++;
462                }
463                
464                which = which % (curchar.numshapes);
465                curchar.basechar = charshape(curchar.basechar, which);
466                
467                /* get rid of oldchar */
468                copycstostring(string, oldchar, level);
469                oldchar = curchar;    /* new values in oldchar */
470                
471                /* init new curchar */
472                curchar = new charstruct();
473                curchar.basechar = nextletter;
474                curchar.numshapes = nc;
475                curchar.lignum++;
476                //          (*len) += unligature (&curchar, level);
477            }
478            else if (join == 1) {
479            }
480            //      else
481            //        {
482            //          (*len) += unligature (&curchar, level);
483            //        }
484            //      p = g_utf8_next_char (p);
485        }
486        
487        /* Handle last char */
488        if (connects_to_left(oldchar))
489            which = 1;
490        else
491            which = 0;
492        which = which % (curchar.numshapes);
493        curchar.basechar = charshape(curchar.basechar, which);
494        
495        /* get rid of oldchar */
496        copycstostring(string, oldchar, level);
497        copycstostring(string, curchar, level);
498    }
499
500    static int arabic_shape(char src[], int srcoffset, int srclength, char dest[], int destoffset, int destlength, int level) {
501        char str[] = new char[srclength];
502        for (int k = srclength + srcoffset - 1; k >= srcoffset; --k)
503            str[k - srcoffset] = src[k];
504        StringBuffer string = new StringBuffer(srclength);
505        shape(str, string, level);
506        if ((level & (ar_composedtashkeel | ar_lig)) != 0)
507            doublelig(string, level);
508//        string.reverse();
509        System.arraycopy(string.toString().toCharArray(), 0, dest, destoffset, string.length());
510        return string.length();
511    }
512
513    static void processNumbers(char text[], int offset, int length, int options) {
514        int limit = offset + length;
515        if ((options & DIGITS_MASK) != 0) {
516            char digitBase = '\u0030'; // European digits
517            switch (options & DIGIT_TYPE_MASK) {
518                case DIGIT_TYPE_AN:
519                    digitBase = '\u0660';  // Arabic-Indic digits
520                    break;
521                    
522                case DIGIT_TYPE_AN_EXTENDED:
523                    digitBase = '\u06f0';  // Eastern Arabic-Indic digits (Persian and Urdu)
524                    break;
525                    
526                default:
527                    break;
528            }
529            
530            switch (options & DIGITS_MASK) {
531                case DIGITS_EN2AN: {
532                    int digitDelta = digitBase - '\u0030';
533                    for (int i = offset; i < limit; ++i) {
534                        char ch = text[i];
535                        if (ch <= '\u0039' && ch >= '\u0030') {
536                            text[i] += digitDelta;
537                        }
538                    }
539                }
540                break;
541                
542                case DIGITS_AN2EN: {
543                    char digitTop = (char)(digitBase + 9);
544                    int digitDelta = '\u0030' - digitBase;
545                    for (int i = offset; i < limit; ++i) {
546                        char ch = text[i];
547                        if (ch <= digitTop && ch >= digitBase) {
548                            text[i] += digitDelta;
549                        }
550                    }
551                }
552                break;
553                
554                case DIGITS_EN2AN_INIT_LR:
555                    shapeToArabicDigitsWithContext(text, 0, length, digitBase, false);
556                    break;
557                    
558                case DIGITS_EN2AN_INIT_AL:
559                    shapeToArabicDigitsWithContext(text, 0, length, digitBase, true);
560                    break;
561                    
562                default:
563                    break;
564            }
565        }
566    }
567    
568    static void shapeToArabicDigitsWithContext(char[] dest, int start, int length, char digitBase,  boolean lastStrongWasAL) {
569        digitBase -= '0'; // move common adjustment out of loop
570 
571        int limit = start + length;
572        for(int i = start; i < limit; ++i) {
573            char ch = dest[i];
574            switch (BidiOrder.getDirection(ch)) {
575            case BidiOrder.L:
576            case BidiOrder.R:
577                lastStrongWasAL = false;
578                break;
579            case BidiOrder.AL:
580                lastStrongWasAL = true;
581                break;
582            case BidiOrder.EN:
583                if (lastStrongWasAL && ch <= '\u0039') {
584                    dest[i] = (char)(ch + digitBase);
585                }
586                break;
587            default:
588                break;
589            }
590        }
591    }
592
593    private static final char ALEF = 0x0627;
594    private static final char ALEFHAMZA = 0x0623;
595    private static final char ALEFHAMZABELOW = 0x0625;
596    private static final char ALEFMADDA = 0x0622;
597    private static final char LAM = 0x0644;
598    private static final char HAMZA = 0x0621;
599    private static final char TATWEEL = 0x0640;
600    private static final char ZWJ = 0x200D;
601
602    private static final char HAMZAABOVE = 0x0654;
603    private static final char HAMZABELOW = 0x0655;
604
605    private static final char WAWHAMZA = 0x0624;
606    private static final char YEHHAMZA = 0x0626;
607    private static final char WAW = 0x0648;
608    private static final char ALEFMAKSURA = 0x0649;
609    private static final char YEH = 0x064A;
610    private static final char FARSIYEH = 0x06CC;
611
612    private static final char SHADDA = 0x0651;
613    private static final char KASRA = 0x0650;
614    private static final char FATHA = 0x064E;
615    private static final char DAMMA = 0x064F;
616    private static final char MADDA = 0x0653;
617
618    private static final char LAM_ALEF = 0xFEFB;
619    private static final char LAM_ALEFHAMZA = 0xFEF7;
620    private static final char LAM_ALEFHAMZABELOW = 0xFEF9;
621    private static final char LAM_ALEFMADDA = 0xFEF5;
622
623    private static final char chartable[][] = {
624        {0x0621, 0xFE80}, /* HAMZA */
625        {0x0622, 0xFE81, 0xFE82}, /* ALEF WITH MADDA ABOVE */
626        {0x0623, 0xFE83, 0xFE84}, /* ALEF WITH HAMZA ABOVE */
627        {0x0624, 0xFE85, 0xFE86}, /* WAW WITH HAMZA ABOVE */
628        {0x0625, 0xFE87, 0xFE88}, /* ALEF WITH HAMZA BELOW */
629        {0x0626, 0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}, /* YEH WITH HAMZA ABOVE */
630        {0x0627, 0xFE8D, 0xFE8E}, /* ALEF */
631        {0x0628, 0xFE8F, 0xFE90, 0xFE91, 0xFE92}, /* BEH */
632        {0x0629, 0xFE93, 0xFE94}, /* TEH MARBUTA */
633        {0x062A, 0xFE95, 0xFE96, 0xFE97, 0xFE98}, /* TEH */
634        {0x062B, 0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}, /* THEH */
635        {0x062C, 0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}, /* JEEM */
636        {0x062D, 0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}, /* HAH */
637        {0x062E, 0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}, /* KHAH */
638        {0x062F, 0xFEA9, 0xFEAA}, /* DAL */
639        {0x0630, 0xFEAB, 0xFEAC}, /* THAL */
640        {0x0631, 0xFEAD, 0xFEAE}, /* REH */
641        {0x0632, 0xFEAF, 0xFEB0}, /* ZAIN */
642        {0x0633, 0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}, /* SEEN */
643        {0x0634, 0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}, /* SHEEN */
644        {0x0635, 0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}, /* SAD */
645        {0x0636, 0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}, /* DAD */
646        {0x0637, 0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4}, /* TAH */
647        {0x0638, 0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}, /* ZAH */
648        {0x0639, 0xFEC9, 0xFECA, 0xFECB, 0xFECC}, /* AIN */
649        {0x063A, 0xFECD, 0xFECE, 0xFECF, 0xFED0}, /* GHAIN */
650        {0x0640, 0x0640, 0x0640, 0x0640, 0x0640}, /* TATWEEL */
651        {0x0641, 0xFED1, 0xFED2, 0xFED3, 0xFED4}, /* FEH */
652        {0x0642, 0xFED5, 0xFED6, 0xFED7, 0xFED8}, /* QAF */
653        {0x0643, 0xFED9, 0xFEDA, 0xFEDB, 0xFEDC}, /* KAF */
654        {0x0644, 0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}, /* LAM */
655        {0x0645, 0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4}, /* MEEM */
656        {0x0646, 0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}, /* NOON */
657        {0x0647, 0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC}, /* HEH */
658        {0x0648, 0xFEED, 0xFEEE}, /* WAW */
659        {0x0649, 0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9}, /* ALEF MAKSURA */
660        {0x064A, 0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4}, /* YEH */
661        {0x0671, 0xFB50, 0xFB51}, /* ALEF WASLA */
662        {0x0679, 0xFB66, 0xFB67, 0xFB68, 0xFB69}, /* TTEH */
663        {0x067A, 0xFB5E, 0xFB5F, 0xFB60, 0xFB61}, /* TTEHEH */
664        {0x067B, 0xFB52, 0xFB53, 0xFB54, 0xFB55}, /* BEEH */
665        {0x067E, 0xFB56, 0xFB57, 0xFB58, 0xFB59}, /* PEH */
666        {0x067F, 0xFB62, 0xFB63, 0xFB64, 0xFB65}, /* TEHEH */
667        {0x0680, 0xFB5A, 0xFB5B, 0xFB5C, 0xFB5D}, /* BEHEH */
668        {0x0683, 0xFB76, 0xFB77, 0xFB78, 0xFB79}, /* NYEH */
669        {0x0684, 0xFB72, 0xFB73, 0xFB74, 0xFB75}, /* DYEH */
670        {0x0686, 0xFB7A, 0xFB7B, 0xFB7C, 0xFB7D}, /* TCHEH */
671        {0x0687, 0xFB7E, 0xFB7F, 0xFB80, 0xFB81}, /* TCHEHEH */
672        {0x0688, 0xFB88, 0xFB89}, /* DDAL */
673        {0x068C, 0xFB84, 0xFB85}, /* DAHAL */
674        {0x068D, 0xFB82, 0xFB83}, /* DDAHAL */
675        {0x068E, 0xFB86, 0xFB87}, /* DUL */
676        {0x0691, 0xFB8C, 0xFB8D}, /* RREH */
677        {0x0698, 0xFB8A, 0xFB8B}, /* JEH */
678        {0x06A4, 0xFB6A, 0xFB6B, 0xFB6C, 0xFB6D}, /* VEH */
679        {0x06A6, 0xFB6E, 0xFB6F, 0xFB70, 0xFB71}, /* PEHEH */
680        {0x06A9, 0xFB8E, 0xFB8F, 0xFB90, 0xFB91}, /* KEHEH */
681        {0x06AD, 0xFBD3, 0xFBD4, 0xFBD5, 0xFBD6}, /* NG */
682        {0x06AF, 0xFB92, 0xFB93, 0xFB94, 0xFB95}, /* GAF */
683        {0x06B1, 0xFB9A, 0xFB9B, 0xFB9C, 0xFB9D}, /* NGOEH */
684        {0x06B3, 0xFB96, 0xFB97, 0xFB98, 0xFB99}, /* GUEH */
685        {0x06BA, 0xFB9E, 0xFB9F}, /* NOON GHUNNA */
686        {0x06BB, 0xFBA0, 0xFBA1, 0xFBA2, 0xFBA3}, /* RNOON */
687        {0x06BE, 0xFBAA, 0xFBAB, 0xFBAC, 0xFBAD}, /* HEH DOACHASHMEE */
688        {0x06C0, 0xFBA4, 0xFBA5}, /* HEH WITH YEH ABOVE */
689        {0x06C1, 0xFBA6, 0xFBA7, 0xFBA8, 0xFBA9}, /* HEH GOAL */
690        {0x06C5, 0xFBE0, 0xFBE1}, /* KIRGHIZ OE */
691        {0x06C6, 0xFBD9, 0xFBDA}, /* OE */
692        {0x06C7, 0xFBD7, 0xFBD8}, /* U */
693        {0x06C8, 0xFBDB, 0xFBDC}, /* YU */
694        {0x06C9, 0xFBE2, 0xFBE3}, /* KIRGHIZ YU */
695        {0x06CB, 0xFBDE, 0xFBDF}, /* VE */
696        {0x06CC, 0xFBFC, 0xFBFD, 0xFBFE, 0xFBFF}, /* FARSI YEH */
697        {0x06D0, 0xFBE4, 0xFBE5, 0xFBE6, 0xFBE7}, /* E */
698        {0x06D2, 0xFBAE, 0xFBAF}, /* YEH BARREE */
699        {0x06D3, 0xFBB0, 0xFBB1} /* YEH BARREE WITH HAMZA ABOVE */
700        };
701
702        public static final int ar_nothing  = 0x0;
703        public static final int ar_novowel = 0x1;
704        public static final int ar_composedtashkeel = 0x4;
705        public static final int ar_lig = 0x8;
706        /**
707         * Digit shaping option: Replace European digits (U+0030...U+0039) by Arabic-Indic digits.
708         */
709        public static final int DIGITS_EN2AN = 0x20;
710        
711        /**
712         * Digit shaping option: Replace Arabic-Indic digits by European digits (U+0030...U+0039).
713         */
714        public static final int DIGITS_AN2EN = 0x40;
715        
716        /**
717         * Digit shaping option:
718         * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
719         * if the most recent strongly directional character
720         * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
721         * The initial state at the start of the text is assumed to be not an Arabic,
722         * letter, so European digits at the start of the text will not change.
723         * Compare to DIGITS_ALEN2AN_INIT_AL.
724         */
725        public static final int DIGITS_EN2AN_INIT_LR = 0x60;
726        
727        /**
728         * Digit shaping option:
729         * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
730         * if the most recent strongly directional character
731         * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
732         * The initial state at the start of the text is assumed to be an Arabic,
733         * letter, so European digits at the start of the text will change.
734         * Compare to DIGITS_ALEN2AN_INT_LR.
735         */
736        public static final int DIGITS_EN2AN_INIT_AL = 0x80;
737        
738        /** Not a valid option value. */
739        private static final int DIGITS_RESERVED = 0xa0;
740        
741        /**
742         * Bit mask for digit shaping options.
743         */
744        public static final int DIGITS_MASK = 0xe0;
745        
746        /**
747         * Digit type option: Use Arabic-Indic digits (U+0660...U+0669).
748         */
749        public static final int DIGIT_TYPE_AN = 0;
750        
751        /**
752         * Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9).
753         */
754        public static final int DIGIT_TYPE_AN_EXTENDED = 0x100;
755
756        /**
757         * Bit mask for digit type options.
758         */
759        public static final int DIGIT_TYPE_MASK = 0x0100; // 0x3f00?
760
761        static class charstruct {
762            char basechar;
763            char mark1;               /* has to be initialized to zero */
764            char vowel;
765            int lignum;           /* is a ligature with lignum aditional characters */
766            int numshapes = 1;
767        };
768
769
770}