001/*
002 * $Id: BidiOrder.java 4784 2011-03-15 08:33:00Z blowagie $
003 *
004 * This file is part of the iText (R) project.
005 * Copyright (c) 1998-2011 1T3XT BVBA
006 * Authors: Bruno Lowagie, Paulo Soares, et al.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU Affero General Public License version 3
010 * as published by the Free Software Foundation with the addition of the
011 * following permission added to Section 15 as permitted in Section 7(a):
012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT,
013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS.
014 *
015 * This program is distributed in the hope that it will be useful, but
016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
017 * or FITNESS FOR A PARTICULAR PURPOSE.
018 * See the GNU Affero General Public License for more details.
019 * You should have received a copy of the GNU Affero General Public License
020 * along with this program; if not, see http://www.gnu.org/licenses or write to
021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
022 * Boston, MA, 02110-1301 USA, or download the license from the following URL:
023 * http://itextpdf.com/terms-of-use/
024 *
025 * The interactive user interfaces in modified source and object code versions
026 * of this program must display Appropriate Legal Notices, as required under
027 * Section 5 of the GNU Affero General Public License.
028 *
029 * In accordance with Section 7(b) of the GNU Affero General Public License,
030 * a covered work must retain the producer line in every PDF that is created
031 * or manipulated using iText.
032 *
033 * You can be released from the requirements of the license by purchasing
034 * a commercial license. Buying such a license is mandatory as soon as you
035 * develop commercial activities involving the iText software without
036 * disclosing the source code of your own applications.
037 * These activities include: offering paid services to customers as an ASP,
038 * serving PDFs on the fly in a web application, shipping iText with a closed
039 * source product.
040 *
041 * For more information, please contact iText Software Corp. at this
042 * address: sales@itextpdf.com
043 */
044
045/*
046 * (C) Copyright IBM Corp. 1999, All Rights Reserved
047 *
048 * version 1.1
049 */
050
051/*
052 * As stated in the Javadoc comments below, materials from Unicode.org
053 * are used in this class. The following license applies to these materials:
054 * http://www.unicode.org/copyright.html#Exhibit1
055 * 
056 * EXHIBIT 1
057 * UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
058 * 
059 * Unicode Data Files include all data files under the directories
060 * http://www.unicode.org/Public/, http://www.unicode.org/reports/,
061 * and http://www.unicode.org/cldr/data/ .
062 * Unicode Software includes any source code published in the Unicode Standard
063 * or under the directories http://www.unicode.org/Public/, http://www.unicode.org/reports/,
064 * and http://www.unicode.org/cldr/data/.
065 * 
066 * NOTICE TO USER: Carefully read the following legal agreement. BY DOWNLOADING,
067 * INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S DATA FILES ("DATA FILES"),
068 * AND/OR SOFTWARE ("SOFTWARE"), YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY,
069 * ALL OF THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
070 * DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
071 * 
072 * COPYRIGHT AND PERMISSION NOTICE
073 * Copyright (C) 1991-2007 Unicode, Inc. All rights reserved. Distributed under
074 * the Terms of Use in http://www.unicode.org/copyright.html.
075 * 
076 * Permission is hereby granted, free of charge, to any person obtaining a copy
077 * of the Unicode data files and any associated documentation (the "Data Files")
078 * or Unicode software and any associated documentation (the "Software") to deal
079 * in the Data Files or Software without restriction, including without limitation
080 * the rights to use, copy, modify, merge, publish, distribute, and/or sell copies
081 * of the Data Files or Software, and to permit persons to whom the Data Files
082 * or Software are furnished to do so, provided that (a) the above copyright
083 * notice(s) and this permission notice appear with all copies of the Data Files
084 * or Software, (b) both the above copyright notice(s) and this permission notice
085 * appear in associated documentation, and (c) there is clear notice in each
086 * modified Data File or in the Software as well as in the documentation associated
087 * with the Data File(s) or Software that the data or software has been modified.
088 * 
089 * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
090 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
091 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
092 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
093 * LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
094 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
095 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
096 * CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA FILES OR SOFTWARE.
097 * 
098 * Except as contained in this notice, the name of a copyright holder shall not
099 * be used in advertising or otherwise to promote the sale, use or other dealings
100 * in these Data Files or Software without prior written authorization of the
101 * copyright holder.
102 */
103package com.itextpdf.text.pdf;
104
105/**
106 * Reference implementation of the Unicode 3.0 Bidi algorithm.
107 *
108 * <p>
109 * This implementation is not optimized for performance.  It is intended
110 * as a reference implementation that closely follows the specification
111 * of the Bidirectional Algorithm in The Unicode Standard version 3.0.
112 * <p>
113 * <b>Input:</b><br>
114 * There are two levels of input to the algorithm, since clients may prefer
115 * to supply some information from out-of-band sources rather than relying on
116 * the default behavior.
117 * <ol>
118 * <li>unicode type array
119 * <li>unicode type array, with externally supplied base line direction
120 * </ol>
121 * <p><b>Output:</b><br>
122 * Output is separated into several stages as well, to better enable clients
123 * to evaluate various aspects of implementation conformance.
124 * <ol>
125 * <li>levels array over entire paragraph
126 * <li>reordering array over entire paragraph
127 * <li>levels array over line
128 * <li>reordering array over line
129 * </ol>
130 * Note that for conformance, algorithms are only required to generate correct
131 * reordering and character directionality (odd or even levels) over a line.
132 * Generating identical level arrays over a line is not required.  Bidi
133 * explicit format codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned
134 * arbitrary levels and positions as long as the other text matches.
135 * <p>
136 * As the algorithm is defined to operate on a single paragraph at a time,
137 * this implementation is written to handle single paragraphs.  Thus
138 * rule P1 is presumed by this implementation-- the data provided to the
139 * implementation is assumed to be a single paragraph, and either contains no
140 * 'B' codes, or a single 'B' code at the end of the input.  'B' is allowed
141 * as input to illustrate how the algorithm assigns it a level.
142 * <p>
143 * Also note that rules L3 and L4 depend on the rendering engine that uses
144 * the result of the bidi algorithm.  This implementation assumes that the
145 * rendering engine expects combining marks in visual order (e.g. to the
146 * left of their base character in RTL runs) and that it adjust the glyphs
147 * used to render mirrored characters that are in RTL runs so that they
148 * render appropriately.
149 *
150 * @author Doug Felt
151 */
152
153import com.itextpdf.text.error_messages.MessageLocalization;
154
155public final class BidiOrder {
156    private byte[] initialTypes;
157    private byte[] embeddings; // generated from processing format codes
158    private byte paragraphEmbeddingLevel = -1; // undefined
159    
160    private int textLength; // for convenience
161    private byte[] resultTypes; // for paragraph, not lines
162    private byte[] resultLevels; // for paragraph, not lines
163    
164    // The bidi types
165    
166    /** Left-to-right*/
167    public static final byte L = 0;
168    
169    /** Left-to-Right Embedding */
170    public static final byte LRE = 1;
171    
172    /** Left-to-Right Override */
173    public static final byte LRO = 2;
174    
175    /** Right-to-Left */
176    public static final byte R = 3;
177    
178    /** Right-to-Left Arabic */
179    public static final byte AL = 4;
180    
181    /** Right-to-Left Embedding */
182    public static final byte RLE = 5;
183    
184    /** Right-to-Left Override */
185    public static final byte RLO = 6;
186    
187    /** Pop Directional Format */
188    public static final byte PDF = 7;
189    
190    /** European Number */
191    public static final byte EN = 8;
192    
193    /** European Number Separator */
194    public static final byte ES = 9;
195    
196    /** European Number Terminator */
197    public static final byte ET = 10;
198    
199    /** Arabic Number */
200    public static final byte AN = 11;
201    
202    /** Common Number Separator */
203    public static final byte CS = 12;
204    
205    /** Non-Spacing Mark */
206    public static final byte NSM = 13;
207    
208    /** Boundary Neutral */
209    public static final byte BN = 14;
210    
211    /** Paragraph Separator */
212    public static final byte B = 15;
213    
214    /** Segment Separator */
215    public static final byte S = 16;
216    
217    /** Whitespace */
218    public static final byte WS = 17;
219    
220    /** Other Neutrals */
221    public static final byte ON = 18;
222    
223    /** Minimum bidi type value. */
224    public static final byte TYPE_MIN = 0;
225    
226    /** Maximum bidi type value. */
227    public static final byte TYPE_MAX = 18;
228    
229    //
230    // Input
231    //
232    
233    /**
234     * Initialize using an array of direction types.  Types range from TYPE_MIN to TYPE_MAX inclusive
235     * and represent the direction codes of the characters in the text.
236     *
237     * @param types the types array
238     */
239    public BidiOrder(byte[] types) {
240        validateTypes(types);
241        
242        this.initialTypes = (byte[])types.clone(); // client type array remains unchanged
243        
244        runAlgorithm();
245    }
246    
247    /**
248     * Initialize using an array of direction types and an externally supplied paragraph embedding level.
249     * The embedding level may be -1, 0, or 1.  -1 means to apply the default algorithm (rules P2 and P3),
250     * 0 is for LTR paragraphs, and 1 is for RTL paragraphs.
251     *
252     * @param types the types array
253     * @param paragraphEmbeddingLevel the externally supplied paragraph embedding level.
254     */
255    public BidiOrder(byte[] types, byte paragraphEmbeddingLevel) {
256        validateTypes(types);
257        validateParagraphEmbeddingLevel(paragraphEmbeddingLevel);
258        
259        this.initialTypes = (byte[])types.clone(); // client type array remains unchanged
260        this.paragraphEmbeddingLevel = paragraphEmbeddingLevel;
261        
262        runAlgorithm();
263    }
264    
265    public BidiOrder(char text[], int offset, int length, byte paragraphEmbeddingLevel) {
266        initialTypes = new byte[length];
267        for (int k = 0; k < length; ++k) {
268            initialTypes[k] = rtypes[text[offset + k]];
269        }
270        validateParagraphEmbeddingLevel(paragraphEmbeddingLevel);
271        
272        this.paragraphEmbeddingLevel = paragraphEmbeddingLevel;
273        
274        runAlgorithm();
275    }
276    
277    public final static byte getDirection(char c) {
278        return rtypes[c];
279    }
280    
281    /**
282     * The algorithm.
283     * Does not include line-based processing (Rules L1, L2).
284     * These are applied later in the line-based phase of the algorithm.
285     */
286    private void runAlgorithm() {
287        textLength = initialTypes.length;
288        
289        // Initialize output types.
290        // Result types initialized to input types.
291        resultTypes = (byte[])initialTypes.clone();
292        
293        
294        // 1) determining the paragraph level
295        // Rule P1 is the requirement for entering this algorithm.
296        // Rules P2, P3.
297        // If no externally supplied paragraph embedding level, use default.
298        if (paragraphEmbeddingLevel == -1) {
299            determineParagraphEmbeddingLevel();
300        }
301        
302        // Initialize result levels to paragraph embedding level.
303        resultLevels = new byte[textLength];
304        setLevels(0, textLength, paragraphEmbeddingLevel);
305        
306        // 2) Explicit levels and directions
307        // Rules X1-X8.
308        determineExplicitEmbeddingLevels();
309        
310        // Rule X9.
311        textLength = removeExplicitCodes();
312        
313        // Rule X10.
314        // Run remainder of algorithm one level run at a time
315        byte prevLevel = paragraphEmbeddingLevel;
316        int start = 0;
317        while (start < textLength) {
318            byte level = resultLevels[start];
319            byte prevType = typeForLevel(Math.max(prevLevel, level));
320            
321            int limit = start + 1;
322            while (limit < textLength && resultLevels[limit] == level) {
323                ++limit;
324            }
325            
326            byte succLevel = limit < textLength ? resultLevels[limit] : paragraphEmbeddingLevel;
327            byte succType = typeForLevel(Math.max(succLevel, level));
328            
329            // 3) resolving weak types
330            // Rules W1-W7.
331            resolveWeakTypes(start, limit, level, prevType, succType);
332            
333            // 4) resolving neutral types
334            // Rules N1-N3.
335            resolveNeutralTypes(start, limit, level, prevType, succType);
336            
337            // 5) resolving implicit embedding levels
338            // Rules I1, I2.
339            resolveImplicitLevels(start, limit, level, prevType, succType);
340            
341            prevLevel = level;
342            start = limit;
343        }
344        
345        // Reinsert explicit codes and assign appropriate levels to 'hide' them.
346        // This is for convenience, so the resulting level array maps 1-1
347        // with the initial array.
348        // See the implementation suggestions section of TR#9 for guidelines on
349        // how to implement the algorithm without removing and reinserting the codes.
350        textLength = reinsertExplicitCodes(textLength);
351    }
352    
353    /**
354     * 1) determining the paragraph level.
355     * <p>
356     * Rules P2, P3.
357     * <p>
358     * At the end of this function, the member variable paragraphEmbeddingLevel is set to either 0 or 1.
359     */
360    private void determineParagraphEmbeddingLevel() {
361        byte strongType = -1; // unknown
362        
363        // Rule P2.
364        for (int i = 0; i < textLength; ++i) {
365            byte t = resultTypes[i];
366            if (t == L || t == AL || t == R) {
367                strongType = t;
368                break;
369            }
370        }
371        
372        // Rule P3.
373        if (strongType == -1) { // none found
374            // default embedding level when no strong types found is 0.
375            paragraphEmbeddingLevel = 0;
376        } else if (strongType == L) {
377            paragraphEmbeddingLevel = 0;
378        } else { // AL, R
379            paragraphEmbeddingLevel = 1;
380        }
381    }
382    
383    /**
384     * Process embedding format codes.
385     * <p>
386     * Calls processEmbeddings to generate an embedding array from the explicit format codes.  The
387     * embedding overrides in the array are then applied to the result types, and the result levels are
388     * initialized.
389     * @see #processEmbeddings
390     */
391    private void determineExplicitEmbeddingLevels() {
392        embeddings = processEmbeddings(resultTypes, paragraphEmbeddingLevel);
393        
394        for (int i = 0; i < textLength; ++i) {
395            byte level = embeddings[i];
396            if ((level & 0x80) != 0) {
397                level &= 0x7f;
398                resultTypes[i] = typeForLevel(level);
399            }
400            resultLevels[i] = level;
401        }
402    }
403    
404    /**
405     * Rules X9.
406     * Remove explicit codes so that they may be ignored during the remainder
407     * of the main portion of the algorithm.  The length of the resulting text
408     * is returned.
409     * @return the length of the data excluding explicit codes and BN.
410     */
411    private int removeExplicitCodes() {
412        int w = 0;
413        for (int i = 0; i < textLength; ++i) {
414            byte t = initialTypes[i];
415            if (!(t == LRE || t == RLE || t == LRO || t == RLO || t == PDF || t == BN)) {
416                embeddings[w] = embeddings[i];
417                resultTypes[w] = resultTypes[i];
418                resultLevels[w] = resultLevels[i];
419                w++;
420            }
421        }
422        return w; // new textLength while explicit levels are removed
423    }
424    
425    /**
426     * Reinsert levels information for explicit codes.
427     * This is for ease of relating the level information
428     * to the original input data.  Note that the levels
429     * assigned to these codes are arbitrary, they're
430     * chosen so as to avoid breaking level runs.
431     * @param textLength the length of the data after compression
432     * @return the length of the data (original length of
433     * types array supplied to constructor)
434     */
435    private int reinsertExplicitCodes(int textLength) {
436        for (int i = initialTypes.length; --i >= 0;) {
437            byte t = initialTypes[i];
438            if (t == LRE || t == RLE || t == LRO || t == RLO || t == PDF || t == BN) {
439                embeddings[i] = 0;
440                resultTypes[i] = t;
441                resultLevels[i] = -1;
442            } else {
443                --textLength;
444                embeddings[i] = embeddings[textLength];
445                resultTypes[i] = resultTypes[textLength];
446                resultLevels[i] = resultLevels[textLength];
447            }
448        }
449        
450        // now propagate forward the levels information (could have
451        // propagated backward, the main thing is not to introduce a level
452        // break where one doesn't already exist).
453        
454        if (resultLevels[0] == -1) {
455            resultLevels[0] = paragraphEmbeddingLevel;
456        }
457        for (int i = 1; i < initialTypes.length; ++i) {
458            if (resultLevels[i] == -1) {
459                resultLevels[i] = resultLevels[i-1];
460            }
461        }
462        
463        // Embedding information is for informational purposes only
464        // so need not be adjusted.
465        
466        return initialTypes.length;
467    }
468    
469    /**
470     * 2) determining explicit levels
471     * Rules X1 - X8
472     *
473     * The interaction of these rules makes handling them a bit complex.
474     * This examines resultTypes but does not modify it.  It returns embedding and
475     * override information in the result array.  The low 7 bits are the level, the high
476     * bit is set if the level is an override, and clear if it is an embedding.
477     */
478    private static byte[] processEmbeddings(byte[] resultTypes, byte paragraphEmbeddingLevel) {
479        final int EXPLICIT_LEVEL_LIMIT = 62;
480        
481        int textLength = resultTypes.length;
482        byte[] embeddings = new byte[textLength];
483        
484        // This stack will store the embedding levels and override status in a single byte
485        // as described above.
486        byte[] embeddingValueStack = new byte[EXPLICIT_LEVEL_LIMIT];
487        int stackCounter = 0;
488        
489        // An LRE or LRO at level 60 is invalid, since the new level 62 is invalid.  But
490        // an RLE at level 60 is valid, since the new level 61 is valid.  The current wording
491        // of the rules requires that the RLE remain valid even if a previous LRE is invalid.
492        // This keeps track of ignored LRE or LRO codes at level 60, so that the matching PDFs
493        // will not try to pop the stack.
494        int overflowAlmostCounter = 0;
495        
496        // This keeps track of ignored pushes at level 61 or higher, so that matching PDFs will
497        // not try to pop the stack.
498        int overflowCounter = 0;
499        
500        // Rule X1.
501        
502        // Keep the level separate from the value (level | override status flag) for ease of access.
503        byte currentEmbeddingLevel = paragraphEmbeddingLevel;
504        byte currentEmbeddingValue = paragraphEmbeddingLevel;
505        
506        // Loop through types, handling all remaining rules
507        for (int i = 0; i < textLength; ++i) {
508            
509            embeddings[i] = currentEmbeddingValue;
510            
511            byte t = resultTypes[i];
512            
513            // Rules X2, X3, X4, X5
514            switch (t) {
515                case RLE:
516                case LRE:
517                case RLO:
518                case LRO:
519                    // Only need to compute new level if current level is valid
520                    if (overflowCounter == 0) {
521                        byte newLevel;
522                        if (t == RLE || t == RLO) {
523                            newLevel = (byte)((currentEmbeddingLevel + 1) | 1); // least greater odd
524                        } else { // t == LRE || t == LRO
525                            newLevel = (byte)((currentEmbeddingLevel + 2) & ~1); // least greater even
526                        }
527                        
528                        // If the new level is valid, push old embedding level and override status
529                        // No check for valid stack counter, since the level check suffices.
530                        if (newLevel < EXPLICIT_LEVEL_LIMIT) {
531                            embeddingValueStack[stackCounter] = currentEmbeddingValue;
532                            stackCounter++;
533                            
534                            currentEmbeddingLevel = newLevel;
535                            if (t == LRO || t == RLO) { // override
536                                currentEmbeddingValue = (byte)(newLevel | 0x80);
537                            } else {
538                                currentEmbeddingValue = newLevel;
539                            }
540                            
541                            // Adjust level of format mark (for expositional purposes only, this gets
542                            // removed later).
543                            embeddings[i] = currentEmbeddingValue;
544                            break;
545                        }
546                        
547                        // Otherwise new level is invalid, but a valid level can still be achieved if this
548                        // level is 60 and we encounter an RLE or RLO further on.  So record that we
549                        // 'almost' overflowed.
550                        if (currentEmbeddingLevel == 60) {
551                            overflowAlmostCounter++;
552                            break;
553                        }
554                    }
555                    
556                    // Otherwise old or new level is invalid.
557                    overflowCounter++;
558                    break;
559                    
560                case PDF:
561                    // The only case where this did not actually overflow but may have almost overflowed
562                    // is when there was an RLE or RLO on level 60, which would result in level 61.  So we
563                    // only test the almost overflow condition in that case.
564                    //
565                    // Also note that there may be a PDF without any pushes at all.
566                    
567                    if (overflowCounter > 0) {
568                        --overflowCounter;
569                    } else if (overflowAlmostCounter > 0 && currentEmbeddingLevel != 61) {
570                        --overflowAlmostCounter;
571                    } else if (stackCounter > 0) {
572                        --stackCounter;
573                        currentEmbeddingValue = embeddingValueStack[stackCounter];
574                        currentEmbeddingLevel = (byte)(currentEmbeddingValue & 0x7f);
575                    }
576                    break;
577                    
578                case B:
579                    // Rule X8.
580                    
581                    // These values are reset for clarity, in this implementation B can only
582                    // occur as the last code in the array.
583                    stackCounter = 0;
584                    overflowCounter = 0;
585                    overflowAlmostCounter = 0;
586                    currentEmbeddingLevel = paragraphEmbeddingLevel;
587                    currentEmbeddingValue = paragraphEmbeddingLevel;
588                    
589                    embeddings[i] = paragraphEmbeddingLevel;
590                    break;
591                    
592                default:
593                    break;
594            }
595        }
596        
597        return embeddings;
598    }
599    
600    
601    /**
602     * 3) resolving weak types
603     * Rules W1-W7.
604     *
605     * Note that some weak types (EN, AN) remain after this processing is complete.
606     */
607    private void resolveWeakTypes(int start, int limit, byte level, byte sor, byte eor) {
608        
609        // Rule W1.
610        // Changes all NSMs.
611        byte preceedingCharacterType = sor;
612        for (int i = start; i < limit; ++i) {
613            byte t = resultTypes[i];
614            if (t == NSM) {
615                resultTypes[i] = preceedingCharacterType;
616            } else {
617                preceedingCharacterType = t;
618            }
619        }
620        
621        // Rule W2.
622        // EN does not change at the start of the run, because sor != AL.
623        for (int i = start; i < limit; ++i) {
624            if (resultTypes[i] == EN) {
625                for (int j = i - 1; j >= start; --j) {
626                    byte t = resultTypes[j];
627                    if (t == L || t == R || t == AL) {
628                        if (t == AL) {
629                            resultTypes[i] = AN;
630                        }
631                        break;
632                    }
633                }
634            }
635        }
636        
637        // Rule W3.
638        for (int i = start; i < limit; ++i) {
639            if (resultTypes[i] == AL) {
640                resultTypes[i] = R;
641            }
642        }
643        
644        // Rule W4.
645        // Since there must be values on both sides for this rule to have an
646        // effect, the scan skips the first and last value.
647        //
648        // Although the scan proceeds left to right, and changes the type values
649        // in a way that would appear to affect the computations later in the scan,
650        // there is actually no problem.  A change in the current value can only
651        // affect the value to its immediate right, and only affect it if it is
652        // ES or CS.  But the current value can only change if the value to its
653        // right is not ES or CS.  Thus either the current value will not change,
654        // or its change will have no effect on the remainder of the analysis.
655        
656        for (int i = start + 1; i < limit - 1; ++i) {
657            if (resultTypes[i] == ES || resultTypes[i] == CS) {
658                byte prevSepType = resultTypes[i-1];
659                byte succSepType = resultTypes[i+1];
660                if (prevSepType == EN && succSepType == EN) {
661                    resultTypes[i] = EN;
662                } else if (resultTypes[i] == CS && prevSepType == AN && succSepType == AN) {
663                    resultTypes[i] = AN;
664                }
665            }
666        }
667        
668        // Rule W5.
669        for (int i = start; i < limit; ++i) {
670            if (resultTypes[i] == ET) {
671                // locate end of sequence
672                int runstart = i;
673                int runlimit = findRunLimit(runstart, limit, new byte[] { ET });
674                
675                // check values at ends of sequence
676                byte t = runstart == start ? sor : resultTypes[runstart - 1];
677                
678                if (t != EN) {
679                    t = runlimit == limit ? eor : resultTypes[runlimit];
680                }
681                
682                if (t == EN) {
683                    setTypes(runstart, runlimit, EN);
684                }
685                
686                // continue at end of sequence
687                i = runlimit;
688            }
689        }
690        
691        // Rule W6.
692        for (int i = start; i < limit; ++i) {
693            byte t = resultTypes[i];
694            if (t == ES || t == ET || t == CS) {
695                resultTypes[i] = ON;
696            }
697        }
698        
699        // Rule W7.
700        for (int i = start; i < limit; ++i) {
701            if (resultTypes[i] == EN) {
702                // set default if we reach start of run
703                byte prevStrongType = sor;
704                for (int j = i - 1; j >= start; --j) {
705                    byte t = resultTypes[j];
706                    if (t == L || t == R) { // AL's have been removed
707                        prevStrongType = t;
708                        break;
709                    }
710                }
711                if (prevStrongType == L) {
712                    resultTypes[i] = L;
713                }
714            }
715        }
716    }
717    
718    /**
719     * 6) resolving neutral types
720     * Rules N1-N2.
721     */
722    private void resolveNeutralTypes(int start, int limit, byte level, byte sor, byte eor) {
723        
724        for (int i = start; i < limit; ++i) {
725            byte t = resultTypes[i];
726            if (t == WS || t == ON || t == B || t == S) {
727                // find bounds of run of neutrals
728                int runstart = i;
729                int runlimit = findRunLimit(runstart, limit, new byte[] {B, S, WS, ON});
730                
731                // determine effective types at ends of run
732                byte leadingType;
733                byte trailingType;
734                
735                if (runstart == start) {
736                    leadingType = sor;
737                } else {
738                    leadingType = resultTypes[runstart - 1];
739                    if (leadingType == L || leadingType == R) {
740                        // found the strong type
741                    } else if (leadingType == AN) {
742                        leadingType = R;
743                    } else if (leadingType == EN) {
744                        // Since EN's with previous strong L types have been changed
745                        // to L in W7, the leadingType must be R.
746                        leadingType = R;
747                    }
748                }
749                
750                if (runlimit == limit) {
751                    trailingType = eor;
752                } else {
753                    trailingType = resultTypes[runlimit];
754                    if (trailingType == L || trailingType == R) {
755                        // found the strong type
756                    } else if (trailingType == AN) {
757                        trailingType = R;
758                    } else if (trailingType == EN) {
759                        trailingType = R;
760                    }
761                }
762                
763                byte resolvedType;
764                if (leadingType == trailingType) {
765                    // Rule N1.
766                    resolvedType = leadingType;
767                } else {
768                    // Rule N2.
769                    // Notice the embedding level of the run is used, not
770                    // the paragraph embedding level.
771                    resolvedType = typeForLevel(level);
772                }
773                
774                setTypes(runstart, runlimit, resolvedType);
775                
776                // skip over run of (former) neutrals
777                i = runlimit;
778            }
779        }
780    }
781    
782    /**
783     * 7) resolving implicit embedding levels
784     * Rules I1, I2.
785     */
786    private void resolveImplicitLevels(int start, int limit, byte level, byte sor, byte eor) {
787        if ((level & 1) == 0) { // even level
788            for (int i = start; i < limit; ++i) {
789                byte t = resultTypes[i];
790                // Rule I1.
791                if (t == L ) {
792                    // no change
793                } else if (t == R) {
794                    resultLevels[i] += 1;
795                } else { // t == AN || t == EN
796                    resultLevels[i] += 2;
797                }
798            }
799        } else { // odd level
800            for (int i = start; i < limit; ++i) {
801                byte t = resultTypes[i];
802                // Rule I2.
803                if (t == R) {
804                    // no change
805                } else { // t == L || t == AN || t == EN
806                    resultLevels[i] += 1;
807                }
808            }
809        }
810    }
811    
812    //
813    // Output
814    //
815    
816    public byte[] getLevels() {
817        return getLevels(new int[]{textLength});
818    }
819    
820    /**
821     * Return levels array breaking lines at offsets in linebreaks. <br>
822     * Rule L1.
823     * <p>
824     * The returned levels array contains the resolved level for each
825     * bidi code passed to the constructor.
826     * <p>
827     * The linebreaks array must include at least one value.
828     * The values must be in strictly increasing order (no duplicates)
829     * between 1 and the length of the text, inclusive.  The last value
830     * must be the length of the text.
831     *
832     * @param linebreaks the offsets at which to break the paragraph
833     * @return the resolved levels of the text
834     */
835    public byte[] getLevels(int[] linebreaks) {
836        
837        // Note that since the previous processing has removed all
838        // P, S, and WS values from resultTypes, the values referred to
839        // in these rules are the initial types, before any processing
840        // has been applied (including processing of overrides).
841        //
842        // This example implementation has reinserted explicit format codes
843        // and BN, in order that the levels array correspond to the
844        // initial text.  Their final placement is not normative.
845        // These codes are treated like WS in this implementation,
846        // so they don't interrupt sequences of WS.
847        
848        validateLineBreaks(linebreaks, textLength);
849        
850        byte[] result = (byte[])resultLevels.clone(); // will be returned to caller
851        
852        // don't worry about linebreaks since if there is a break within
853        // a series of WS values preceding S, the linebreak itself
854        // causes the reset.
855        for (int i = 0; i < result.length; ++i) {
856            byte t = initialTypes[i];
857            if (t == B || t == S) {
858                // Rule L1, clauses one and two.
859                result[i] = paragraphEmbeddingLevel;
860                
861                // Rule L1, clause three.
862                for (int j = i - 1; j >= 0; --j) {
863                    if (isWhitespace(initialTypes[j])) { // including format codes
864                        result[j] = paragraphEmbeddingLevel;
865                    } else {
866                        break;
867                    }
868                }
869            }
870        }
871        
872        // Rule L1, clause four.
873        int start = 0;
874        for (int i = 0; i < linebreaks.length; ++i) {
875            int limit = linebreaks[i];
876            for (int j = limit - 1; j >= start; --j) {
877                if (isWhitespace(initialTypes[j])) { // including format codes
878                    result[j] = paragraphEmbeddingLevel;
879                } else {
880                    break;
881                }
882            }
883            
884            start = limit;
885        }
886        
887        return result;
888    }
889    
890    /**
891     * Return reordering array breaking lines at offsets in linebreaks.
892     * <p>
893     * The reordering array maps from a visual index to a logical index.
894     * Lines are concatenated from left to right.  So for example, the
895     * fifth character from the left on the third line is
896     * <pre> getReordering(linebreaks)[linebreaks[1] + 4]</pre>
897     * (linebreaks[1] is the position after the last character of the
898     * second line, which is also the index of the first character on the
899     * third line, and adding four gets the fifth character from the left).
900     * <p>
901     * The linebreaks array must include at least one value.
902     * The values must be in strictly increasing order (no duplicates)
903     * between 1 and the length of the text, inclusive.  The last value
904     * must be the length of the text.
905     *
906     * @param linebreaks the offsets at which to break the paragraph.
907     */
908    public int[] getReordering(int[] linebreaks) {
909        validateLineBreaks(linebreaks, textLength);
910        
911        byte[] levels = getLevels(linebreaks);
912        
913        return computeMultilineReordering(levels, linebreaks);
914    }
915    
916    /**
917     * Return multiline reordering array for a given level array.
918     * Reordering does not occur across a line break.
919     */
920    private static int[] computeMultilineReordering(byte[] levels, int[] linebreaks) {
921        int[] result = new int[levels.length];
922        
923        int start = 0;
924        for (int i = 0; i < linebreaks.length; ++i) {
925            int limit = linebreaks[i];
926            
927            byte[] templevels = new byte[limit - start];
928            System.arraycopy(levels, start, templevels, 0, templevels.length);
929            
930            int[] temporder = computeReordering(templevels);
931            for (int j = 0; j < temporder.length; ++j) {
932                result[start + j] = temporder[j] + start;
933            }
934            
935            start = limit;
936        }
937        
938        return result;
939    }
940    
941    /**
942     * Return reordering array for a given level array.  This reorders a single line.
943     * The reordering is a visual to logical map.  For example,
944     * the leftmost char is string.charAt(order[0]).
945     * Rule L2.
946     */
947    private static int[] computeReordering(byte[] levels) {
948        int lineLength = levels.length;
949        
950        int[] result = new int[lineLength];
951        
952        // initialize order
953        for (int i = 0; i < lineLength; ++i) {
954            result[i] = i;
955        }
956        
957        // locate highest level found on line.
958        // Note the rules say text, but no reordering across line bounds is performed,
959        // so this is sufficient.
960        byte highestLevel = 0;
961        byte lowestOddLevel = 63;
962        for (int i = 0; i < lineLength; ++i) {
963            byte level = levels[i];
964            if (level > highestLevel) {
965                highestLevel = level;
966            }
967            if (((level & 1) != 0) && level < lowestOddLevel) {
968                lowestOddLevel = level;
969            }
970        }
971        
972        for (int level = highestLevel; level >= lowestOddLevel; --level) {
973            for (int i = 0; i < lineLength; ++i) {
974                if (levels[i] >= level) {
975                    // find range of text at or above this level
976                    int start = i;
977                    int limit = i + 1;
978                    while (limit < lineLength && levels[limit] >= level) {
979                        ++limit;
980                    }
981                    
982                    // reverse run
983                    for (int j = start, k = limit - 1; j < k; ++j, --k) {
984                        int temp = result[j];
985                        result[j] = result[k];
986                        result[k] = temp;
987                    }
988                    
989                    // skip to end of level run
990                    i = limit;
991                }
992            }
993        }
994        
995        return result;
996    }
997    
998    /**
999     * Return the base level of the paragraph.
1000     */
1001    public byte getBaseLevel() {
1002        return paragraphEmbeddingLevel;
1003    }
1004    
1005    // --- internal utilities -------------------------------------------------
1006    
1007    /**
1008     * Return true if the type is considered a whitespace type for the line break rules.
1009     */
1010    private static boolean isWhitespace(byte biditype) {
1011        switch (biditype) {
1012            case LRE:
1013            case RLE:
1014            case LRO:
1015            case RLO:
1016            case PDF:
1017            case BN:
1018            case WS:
1019                return true;
1020            default:
1021                return false;
1022        }
1023    }
1024    
1025    /**
1026     * Return the strong type (L or R) corresponding to the level.
1027     */
1028    private static byte typeForLevel(int level) {
1029        return ((level & 0x1) == 0) ? L : R;
1030    }
1031    
1032    /**
1033     * Return the limit of the run starting at index that includes only resultTypes in validSet.
1034     * This checks the value at index, and will return index if that value is not in validSet.
1035     */
1036    private int findRunLimit(int index, int limit, byte[] validSet) {
1037        --index;
1038        loop:
1039            while (++index < limit) {
1040                byte t = resultTypes[index];
1041                for (int i = 0; i < validSet.length; ++i) {
1042                    if (t == validSet[i]) {
1043                        continue loop;
1044                    }
1045                }
1046                // didn't find a match in validSet
1047                return index;
1048            }
1049            return limit;
1050    }
1051    
1052    /**
1053     * Return the start of the run including index that includes only resultTypes in validSet.
1054     * This assumes the value at index is valid, and does not check it.
1055     */
1056    private int findRunStart(int index, byte[] validSet) {
1057        loop:
1058            while (--index >= 0) {
1059                byte t = resultTypes[index];
1060                for (int i = 0; i < validSet.length; ++i) {
1061                    if (t == validSet[i]) {
1062                        continue loop;
1063                    }
1064                }
1065                return index + 1;
1066            }
1067            return 0;
1068    }
1069    
1070    /**
1071     * Set resultTypes from start up to (but not including) limit to newType.
1072     */
1073    private void setTypes(int start, int limit, byte newType) {
1074        for (int i = start; i < limit; ++i) {
1075            resultTypes[i] = newType;
1076        }
1077    }
1078    
1079    /**
1080     * Set resultLevels from start up to (but not including) limit to newLevel.
1081     */
1082    private void setLevels(int start, int limit, byte newLevel) {
1083        for (int i = start; i < limit; ++i) {
1084            resultLevels[i] = newLevel;
1085        }
1086    }
1087    
1088    // --- input validation ---------------------------------------------------
1089    
1090    /**
1091     * Throw exception if type array is invalid.
1092     */
1093    private static void validateTypes(byte[] types) {
1094        if (types == null) {
1095            throw new IllegalArgumentException(MessageLocalization.getComposedMessage("types.is.null"));
1096        }
1097        for (int i = 0; i < types.length; ++i) {
1098            if (types[i] < TYPE_MIN || types[i] > TYPE_MAX) {
1099                throw new IllegalArgumentException(MessageLocalization.getComposedMessage("illegal.type.value.at.1.2", String.valueOf(i), String.valueOf(types[i])));
1100            }
1101        }
1102        for (int i = 0; i < types.length - 1; ++i) {
1103            if (types[i] == B) {
1104                throw new IllegalArgumentException(MessageLocalization.getComposedMessage("b.type.before.end.of.paragraph.at.index.1", i));
1105            }
1106        }
1107    }
1108    
1109    /**
1110     * Throw exception if paragraph embedding level is invalid. Special allowance for -1 so that
1111     * default processing can still be performed when using this API.
1112     */
1113    private static void validateParagraphEmbeddingLevel(byte paragraphEmbeddingLevel) {
1114        if (paragraphEmbeddingLevel != -1 &&
1115        paragraphEmbeddingLevel != 0 &&
1116        paragraphEmbeddingLevel != 1) {
1117            throw new IllegalArgumentException(MessageLocalization.getComposedMessage("illegal.paragraph.embedding.level.1", paragraphEmbeddingLevel));
1118        }
1119    }
1120    
1121    /**
1122     * Throw exception if line breaks array is invalid.
1123     */
1124    private static void validateLineBreaks(int[] linebreaks, int textLength) {
1125        int prev = 0;
1126        for (int i = 0; i < linebreaks.length; ++i) {
1127            int next = linebreaks[i];
1128            if (next <= prev) {
1129                throw new IllegalArgumentException(MessageLocalization.getComposedMessage("bad.linebreak.1.at.index.2", String.valueOf(next), String.valueOf(i)));
1130            }
1131            prev = next;
1132        }
1133        if (prev != textLength) {
1134            throw new IllegalArgumentException(MessageLocalization.getComposedMessage("last.linebreak.must.be.at.1", textLength));
1135        }
1136    }
1137    
1138    private static final byte rtypes[] = new byte[0x10000];
1139    
1140    private static char baseTypes[] = {
1141        0, 8, (char)BN, 9, 9, (char)S, 10, 10, (char)B, 11, 11, (char)S, 12, 12, (char)WS, 13, 13, (char)B,
1142        14, 27, (char)BN, 28, 30, (char)B, 31, 31, (char)S, 32, 32, (char)WS, 33, 34, (char)ON, 35, 37, (char)ET,
1143        38, 42, (char)ON, 43, 43, (char)ET, 44, 44, (char)CS, 45, 45, (char)ET, 46, 46, (char)CS, 47, 47, (char)ES,
1144        48, 57, (char)EN, 58, 58, (char)CS, 59, 64, (char)ON, 65, 90, (char)L, 91, 96, (char)ON, 97, 122, (char)L,
1145        123, 126, (char)ON, 127, 132, (char)BN, 133, 133, (char)B, 134, 159, (char)BN, 160, 160, (char)CS,
1146        161, 161, (char)ON, 162, 165, (char)ET, 166, 169, (char)ON, 170, 170, (char)L, 171, 175, (char)ON,
1147        176, 177, (char)ET, 178, 179, (char)EN, 180, 180, (char)ON, 181, 181, (char)L, 182, 184, (char)ON,
1148        185, 185, (char)EN, 186, 186, (char)L, 187, 191, (char)ON, 192, 214, (char)L, 215, 215, (char)ON,
1149        216, 246, (char)L, 247, 247, (char)ON, 248, 696, (char)L, 697, 698, (char)ON, 699, 705, (char)L,
1150        706, 719, (char)ON, 720, 721, (char)L, 722, 735, (char)ON, 736, 740, (char)L, 741, 749, (char)ON,
1151        750, 750, (char)L, 751, 767, (char)ON, 768, 855, (char)NSM, 856, 860, (char)L, 861, 879, (char)NSM,
1152        880, 883, (char)L, 884, 885, (char)ON, 886, 893, (char)L, 894, 894, (char)ON, 895, 899, (char)L,
1153        900, 901, (char)ON, 902, 902, (char)L, 903, 903, (char)ON, 904, 1013, (char)L, 1014, 1014, (char)ON,
1154        1015, 1154, (char)L, 1155, 1158, (char)NSM, 1159, 1159, (char)L, 1160, 1161, (char)NSM,
1155        1162, 1417, (char)L, 1418, 1418, (char)ON, 1419, 1424, (char)L, 1425, 1441, (char)NSM,
1156        1442, 1442, (char)L, 1443, 1465, (char)NSM, 1466, 1466, (char)L, 1467, 1469, (char)NSM,
1157        1470, 1470, (char)R, 1471, 1471, (char)NSM, 1472, 1472, (char)R, 1473, 1474, (char)NSM,
1158        1475, 1475, (char)R, 1476, 1476, (char)NSM, 1477, 1487, (char)L, 1488, 1514, (char)R,
1159        1515, 1519, (char)L, 1520, 1524, (char)R, 1525, 1535, (char)L, 1536, 1539, (char)AL,
1160        1540, 1547, (char)L, 1548, 1548, (char)CS, 1549, 1549, (char)AL, 1550, 1551, (char)ON,
1161        1552, 1557, (char)NSM, 1558, 1562, (char)L, 1563, 1563, (char)AL, 1564, 1566, (char)L,
1162        1567, 1567, (char)AL, 1568, 1568, (char)L, 1569, 1594, (char)AL, 1595, 1599, (char)L,
1163        1600, 1610, (char)AL, 1611, 1624, (char)NSM, 1625, 1631, (char)L, 1632, 1641, (char)AN,
1164        1642, 1642, (char)ET, 1643, 1644, (char)AN, 1645, 1647, (char)AL, 1648, 1648, (char)NSM,
1165        1649, 1749, (char)AL, 1750, 1756, (char)NSM, 1757, 1757, (char)AL, 1758, 1764, (char)NSM,
1166        1765, 1766, (char)AL, 1767, 1768, (char)NSM, 1769, 1769, (char)ON, 1770, 1773, (char)NSM,
1167        1774, 1775, (char)AL, 1776, 1785, (char)EN, 1786, 1805, (char)AL, 1806, 1806, (char)L,
1168        1807, 1807, (char)BN, 1808, 1808, (char)AL, 1809, 1809, (char)NSM, 1810, 1839, (char)AL,
1169        1840, 1866, (char)NSM, 1867, 1868, (char)L, 1869, 1871, (char)AL, 1872, 1919, (char)L,
1170        1920, 1957, (char)AL, 1958, 1968, (char)NSM, 1969, 1969, (char)AL, 1970, 2304, (char)L,
1171        2305, 2306, (char)NSM, 2307, 2363, (char)L, 2364, 2364, (char)NSM, 2365, 2368, (char)L,
1172        2369, 2376, (char)NSM, 2377, 2380, (char)L, 2381, 2381, (char)NSM, 2382, 2384, (char)L,
1173        2385, 2388, (char)NSM, 2389, 2401, (char)L, 2402, 2403, (char)NSM, 2404, 2432, (char)L,
1174        2433, 2433, (char)NSM, 2434, 2491, (char)L, 2492, 2492, (char)NSM, 2493, 2496, (char)L,
1175        2497, 2500, (char)NSM, 2501, 2508, (char)L, 2509, 2509, (char)NSM, 2510, 2529, (char)L,
1176        2530, 2531, (char)NSM, 2532, 2545, (char)L, 2546, 2547, (char)ET, 2548, 2560, (char)L,
1177        2561, 2562, (char)NSM, 2563, 2619, (char)L, 2620, 2620, (char)NSM, 2621, 2624, (char)L,
1178        2625, 2626, (char)NSM, 2627, 2630, (char)L, 2631, 2632, (char)NSM, 2633, 2634, (char)L,
1179        2635, 2637, (char)NSM, 2638, 2671, (char)L, 2672, 2673, (char)NSM, 2674, 2688, (char)L,
1180        2689, 2690, (char)NSM, 2691, 2747, (char)L, 2748, 2748, (char)NSM, 2749, 2752, (char)L,
1181        2753, 2757, (char)NSM, 2758, 2758, (char)L, 2759, 2760, (char)NSM, 2761, 2764, (char)L,
1182        2765, 2765, (char)NSM, 2766, 2785, (char)L, 2786, 2787, (char)NSM, 2788, 2800, (char)L,
1183        2801, 2801, (char)ET, 2802, 2816, (char)L, 2817, 2817, (char)NSM, 2818, 2875, (char)L,
1184        2876, 2876, (char)NSM, 2877, 2878, (char)L, 2879, 2879, (char)NSM, 2880, 2880, (char)L,
1185        2881, 2883, (char)NSM, 2884, 2892, (char)L, 2893, 2893, (char)NSM, 2894, 2901, (char)L,
1186        2902, 2902, (char)NSM, 2903, 2945, (char)L, 2946, 2946, (char)NSM, 2947, 3007, (char)L,
1187        3008, 3008, (char)NSM, 3009, 3020, (char)L, 3021, 3021, (char)NSM, 3022, 3058, (char)L,
1188        3059, 3064, (char)ON, 3065, 3065, (char)ET, 3066, 3066, (char)ON, 3067, 3133, (char)L,
1189        3134, 3136, (char)NSM, 3137, 3141, (char)L, 3142, 3144, (char)NSM, 3145, 3145, (char)L,
1190        3146, 3149, (char)NSM, 3150, 3156, (char)L, 3157, 3158, (char)NSM, 3159, 3259, (char)L,
1191        3260, 3260, (char)NSM, 3261, 3275, (char)L, 3276, 3277, (char)NSM, 3278, 3392, (char)L,
1192        3393, 3395, (char)NSM, 3396, 3404, (char)L, 3405, 3405, (char)NSM, 3406, 3529, (char)L,
1193        3530, 3530, (char)NSM, 3531, 3537, (char)L, 3538, 3540, (char)NSM, 3541, 3541, (char)L,
1194        3542, 3542, (char)NSM, 3543, 3632, (char)L, 3633, 3633, (char)NSM, 3634, 3635, (char)L,
1195        3636, 3642, (char)NSM, 3643, 3646, (char)L, 3647, 3647, (char)ET, 3648, 3654, (char)L,
1196        3655, 3662, (char)NSM, 3663, 3760, (char)L, 3761, 3761, (char)NSM, 3762, 3763, (char)L,
1197        3764, 3769, (char)NSM, 3770, 3770, (char)L, 3771, 3772, (char)NSM, 3773, 3783, (char)L,
1198        3784, 3789, (char)NSM, 3790, 3863, (char)L, 3864, 3865, (char)NSM, 3866, 3892, (char)L,
1199        3893, 3893, (char)NSM, 3894, 3894, (char)L, 3895, 3895, (char)NSM, 3896, 3896, (char)L,
1200        3897, 3897, (char)NSM, 3898, 3901, (char)ON, 3902, 3952, (char)L, 3953, 3966, (char)NSM,
1201        3967, 3967, (char)L, 3968, 3972, (char)NSM, 3973, 3973, (char)L, 3974, 3975, (char)NSM,
1202        3976, 3983, (char)L, 3984, 3991, (char)NSM, 3992, 3992, (char)L, 3993, 4028, (char)NSM,
1203        4029, 4037, (char)L, 4038, 4038, (char)NSM, 4039, 4140, (char)L, 4141, 4144, (char)NSM,
1204        4145, 4145, (char)L, 4146, 4146, (char)NSM, 4147, 4149, (char)L, 4150, 4151, (char)NSM,
1205        4152, 4152, (char)L, 4153, 4153, (char)NSM, 4154, 4183, (char)L, 4184, 4185, (char)NSM,
1206        4186, 5759, (char)L, 5760, 5760, (char)WS, 5761, 5786, (char)L, 5787, 5788, (char)ON,
1207        5789, 5905, (char)L, 5906, 5908, (char)NSM, 5909, 5937, (char)L, 5938, 5940, (char)NSM,
1208        5941, 5969, (char)L, 5970, 5971, (char)NSM, 5972, 6001, (char)L, 6002, 6003, (char)NSM,
1209        6004, 6070, (char)L, 6071, 6077, (char)NSM, 6078, 6085, (char)L, 6086, 6086, (char)NSM,
1210        6087, 6088, (char)L, 6089, 6099, (char)NSM, 6100, 6106, (char)L, 6107, 6107, (char)ET,
1211        6108, 6108, (char)L, 6109, 6109, (char)NSM, 6110, 6127, (char)L, 6128, 6137, (char)ON,
1212        6138, 6143, (char)L, 6144, 6154, (char)ON, 6155, 6157, (char)NSM, 6158, 6158, (char)WS,
1213        6159, 6312, (char)L, 6313, 6313, (char)NSM, 6314, 6431, (char)L, 6432, 6434, (char)NSM,
1214        6435, 6438, (char)L, 6439, 6443, (char)NSM, 6444, 6449, (char)L, 6450, 6450, (char)NSM,
1215        6451, 6456, (char)L, 6457, 6459, (char)NSM, 6460, 6463, (char)L, 6464, 6464, (char)ON,
1216        6465, 6467, (char)L, 6468, 6469, (char)ON, 6470, 6623, (char)L, 6624, 6655, (char)ON,
1217        6656, 8124, (char)L, 8125, 8125, (char)ON, 8126, 8126, (char)L, 8127, 8129, (char)ON,
1218        8130, 8140, (char)L, 8141, 8143, (char)ON, 8144, 8156, (char)L, 8157, 8159, (char)ON,
1219        8160, 8172, (char)L, 8173, 8175, (char)ON, 8176, 8188, (char)L, 8189, 8190, (char)ON,
1220        8191, 8191, (char)L, 8192, 8202, (char)WS, 8203, 8205, (char)BN, 8206, 8206, (char)L,
1221        8207, 8207, (char)R, 8208, 8231, (char)ON, 8232, 8232, (char)WS, 8233, 8233, (char)B,
1222        8234, 8234, (char)LRE, 8235, 8235, (char)RLE, 8236, 8236, (char)PDF, 8237, 8237, (char)LRO,
1223        8238, 8238, (char)RLO, 8239, 8239, (char)WS, 8240, 8244, (char)ET, 8245, 8276, (char)ON,
1224        8277, 8278, (char)L, 8279, 8279, (char)ON, 8280, 8286, (char)L, 8287, 8287, (char)WS,
1225        8288, 8291, (char)BN, 8292, 8297, (char)L, 8298, 8303, (char)BN, 8304, 8304, (char)EN,
1226        8305, 8307, (char)L, 8308, 8313, (char)EN, 8314, 8315, (char)ET, 8316, 8318, (char)ON,
1227        8319, 8319, (char)L, 8320, 8329, (char)EN, 8330, 8331, (char)ET, 8332, 8334, (char)ON,
1228        8335, 8351, (char)L, 8352, 8369, (char)ET, 8370, 8399, (char)L, 8400, 8426, (char)NSM,
1229        8427, 8447, (char)L, 8448, 8449, (char)ON, 8450, 8450, (char)L, 8451, 8454, (char)ON,
1230        8455, 8455, (char)L, 8456, 8457, (char)ON, 8458, 8467, (char)L, 8468, 8468, (char)ON,
1231        8469, 8469, (char)L, 8470, 8472, (char)ON, 8473, 8477, (char)L, 8478, 8483, (char)ON,
1232        8484, 8484, (char)L, 8485, 8485, (char)ON, 8486, 8486, (char)L, 8487, 8487, (char)ON,
1233        8488, 8488, (char)L, 8489, 8489, (char)ON, 8490, 8493, (char)L, 8494, 8494, (char)ET,
1234        8495, 8497, (char)L, 8498, 8498, (char)ON, 8499, 8505, (char)L, 8506, 8507, (char)ON,
1235        8508, 8511, (char)L, 8512, 8516, (char)ON, 8517, 8521, (char)L, 8522, 8523, (char)ON,
1236        8524, 8530, (char)L, 8531, 8543, (char)ON, 8544, 8591, (char)L, 8592, 8721, (char)ON,
1237        8722, 8723, (char)ET, 8724, 9013, (char)ON, 9014, 9082, (char)L, 9083, 9108, (char)ON,
1238        9109, 9109, (char)L, 9110, 9168, (char)ON, 9169, 9215, (char)L, 9216, 9254, (char)ON,
1239        9255, 9279, (char)L, 9280, 9290, (char)ON, 9291, 9311, (char)L, 9312, 9371, (char)EN,
1240        9372, 9449, (char)L, 9450, 9450, (char)EN, 9451, 9751, (char)ON, 9752, 9752, (char)L,
1241        9753, 9853, (char)ON, 9854, 9855, (char)L, 9856, 9873, (char)ON, 9874, 9887, (char)L,
1242        9888, 9889, (char)ON, 9890, 9984, (char)L, 9985, 9988, (char)ON, 9989, 9989, (char)L,
1243        9990, 9993, (char)ON, 9994, 9995, (char)L, 9996, 10023, (char)ON, 10024, 10024, (char)L,
1244        10025, 10059, (char)ON, 10060, 10060, (char)L, 10061, 10061, (char)ON, 10062, 10062, (char)L,
1245        10063, 10066, (char)ON, 10067, 10069, (char)L, 10070, 10070, (char)ON, 10071, 10071, (char)L,
1246        10072, 10078, (char)ON, 10079, 10080, (char)L, 10081, 10132, (char)ON, 10133, 10135, (char)L,
1247        10136, 10159, (char)ON, 10160, 10160, (char)L, 10161, 10174, (char)ON, 10175, 10191, (char)L,
1248        10192, 10219, (char)ON, 10220, 10223, (char)L, 10224, 11021, (char)ON, 11022, 11903, (char)L,
1249        11904, 11929, (char)ON, 11930, 11930, (char)L, 11931, 12019, (char)ON, 12020, 12031, (char)L,
1250        12032, 12245, (char)ON, 12246, 12271, (char)L, 12272, 12283, (char)ON, 12284, 12287, (char)L,
1251        12288, 12288, (char)WS, 12289, 12292, (char)ON, 12293, 12295, (char)L, 12296, 12320, (char)ON,
1252        12321, 12329, (char)L, 12330, 12335, (char)NSM, 12336, 12336, (char)ON, 12337, 12341, (char)L,
1253        12342, 12343, (char)ON, 12344, 12348, (char)L, 12349, 12351, (char)ON, 12352, 12440, (char)L,
1254        12441, 12442, (char)NSM, 12443, 12444, (char)ON, 12445, 12447, (char)L, 12448, 12448, (char)ON,
1255        12449, 12538, (char)L, 12539, 12539, (char)ON, 12540, 12828, (char)L, 12829, 12830, (char)ON,
1256        12831, 12879, (char)L, 12880, 12895, (char)ON, 12896, 12923, (char)L, 12924, 12925, (char)ON,
1257        12926, 12976, (char)L, 12977, 12991, (char)ON, 12992, 13003, (char)L, 13004, 13007, (char)ON,
1258        13008, 13174, (char)L, 13175, 13178, (char)ON, 13179, 13277, (char)L, 13278, 13279, (char)ON,
1259        13280, 13310, (char)L, 13311, 13311, (char)ON, 13312, 19903, (char)L, 19904, 19967, (char)ON,
1260        19968, 42127, (char)L, 42128, 42182, (char)ON, 42183, 64284, (char)L, 64285, 64285, (char)R,
1261        64286, 64286, (char)NSM, 64287, 64296, (char)R, 64297, 64297, (char)ET, 64298, 64310, (char)R,
1262        64311, 64311, (char)L, 64312, 64316, (char)R, 64317, 64317, (char)L, 64318, 64318, (char)R,
1263        64319, 64319, (char)L, 64320, 64321, (char)R, 64322, 64322, (char)L, 64323, 64324, (char)R,
1264        64325, 64325, (char)L, 64326, 64335, (char)R, 64336, 64433, (char)AL, 64434, 64466, (char)L,
1265        64467, 64829, (char)AL, 64830, 64831, (char)ON, 64832, 64847, (char)L, 64848, 64911, (char)AL,
1266        64912, 64913, (char)L, 64914, 64967, (char)AL, 64968, 65007, (char)L, 65008, 65020, (char)AL,
1267        65021, 65021, (char)ON, 65022, 65023, (char)L, 65024, 65039, (char)NSM, 65040, 65055, (char)L,
1268        65056, 65059, (char)NSM, 65060, 65071, (char)L, 65072, 65103, (char)ON, 65104, 65104, (char)CS,
1269        65105, 65105, (char)ON, 65106, 65106, (char)CS, 65107, 65107, (char)L, 65108, 65108, (char)ON,
1270        65109, 65109, (char)CS, 65110, 65118, (char)ON, 65119, 65119, (char)ET, 65120, 65121, (char)ON,
1271        65122, 65123, (char)ET, 65124, 65126, (char)ON, 65127, 65127, (char)L, 65128, 65128, (char)ON,
1272        65129, 65130, (char)ET, 65131, 65131, (char)ON, 65132, 65135, (char)L, 65136, 65140, (char)AL,
1273        65141, 65141, (char)L, 65142, 65276, (char)AL, 65277, 65278, (char)L, 65279, 65279, (char)BN,
1274        65280, 65280, (char)L, 65281, 65282, (char)ON, 65283, 65285, (char)ET, 65286, 65290, (char)ON,
1275        65291, 65291, (char)ET, 65292, 65292, (char)CS, 65293, 65293, (char)ET, 65294, 65294, (char)CS,
1276        65295, 65295, (char)ES, 65296, 65305, (char)EN, 65306, 65306, (char)CS, 65307, 65312, (char)ON,
1277        65313, 65338, (char)L, 65339, 65344, (char)ON, 65345, 65370, (char)L, 65371, 65381, (char)ON,
1278        65382, 65503, (char)L, 65504, 65505, (char)ET, 65506, 65508, (char)ON, 65509, 65510, (char)ET,
1279        65511, 65511, (char)L, 65512, 65518, (char)ON, 65519, 65528, (char)L, 65529, 65531, (char)BN,
1280        65532, 65533, (char)ON, 65534, 65535, (char)L};
1281        
1282    static {
1283        for (int k = 0; k < baseTypes.length; ++k) {
1284            int start = baseTypes[k];
1285            int end = baseTypes[++k];
1286            byte b = (byte)baseTypes[++k];
1287            while (start <= end)
1288                rtypes[start++] = b;
1289        }
1290    }        
1291}