001/* 002 * $Id: BidiOrder.java 4784 2011-03-15 08:33:00Z blowagie $ 003 * 004 * This file is part of the iText (R) project. 005 * Copyright (c) 1998-2011 1T3XT BVBA 006 * Authors: Bruno Lowagie, Paulo Soares, et al. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Affero General Public License version 3 010 * as published by the Free Software Foundation with the addition of the 011 * following permission added to Section 15 as permitted in Section 7(a): 012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT, 013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS. 014 * 015 * This program is distributed in the hope that it will be useful, but 016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 017 * or FITNESS FOR A PARTICULAR PURPOSE. 018 * See the GNU Affero General Public License for more details. 019 * You should have received a copy of the GNU Affero General Public License 020 * along with this program; if not, see http://www.gnu.org/licenses or write to 021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 022 * Boston, MA, 02110-1301 USA, or download the license from the following URL: 023 * http://itextpdf.com/terms-of-use/ 024 * 025 * The interactive user interfaces in modified source and object code versions 026 * of this program must display Appropriate Legal Notices, as required under 027 * Section 5 of the GNU Affero General Public License. 028 * 029 * In accordance with Section 7(b) of the GNU Affero General Public License, 030 * a covered work must retain the producer line in every PDF that is created 031 * or manipulated using iText. 032 * 033 * You can be released from the requirements of the license by purchasing 034 * a commercial license. Buying such a license is mandatory as soon as you 035 * develop commercial activities involving the iText software without 036 * disclosing the source code of your own applications. 037 * These activities include: offering paid services to customers as an ASP, 038 * serving PDFs on the fly in a web application, shipping iText with a closed 039 * source product. 040 * 041 * For more information, please contact iText Software Corp. at this 042 * address: sales@itextpdf.com 043 */ 044 045/* 046 * (C) Copyright IBM Corp. 1999, All Rights Reserved 047 * 048 * version 1.1 049 */ 050 051/* 052 * As stated in the Javadoc comments below, materials from Unicode.org 053 * are used in this class. The following license applies to these materials: 054 * http://www.unicode.org/copyright.html#Exhibit1 055 * 056 * EXHIBIT 1 057 * UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE 058 * 059 * Unicode Data Files include all data files under the directories 060 * http://www.unicode.org/Public/, http://www.unicode.org/reports/, 061 * and http://www.unicode.org/cldr/data/ . 062 * Unicode Software includes any source code published in the Unicode Standard 063 * or under the directories http://www.unicode.org/Public/, http://www.unicode.org/reports/, 064 * and http://www.unicode.org/cldr/data/. 065 * 066 * NOTICE TO USER: Carefully read the following legal agreement. BY DOWNLOADING, 067 * INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S DATA FILES ("DATA FILES"), 068 * AND/OR SOFTWARE ("SOFTWARE"), YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, 069 * ALL OF THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT 070 * DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE. 071 * 072 * COPYRIGHT AND PERMISSION NOTICE 073 * Copyright (C) 1991-2007 Unicode, Inc. All rights reserved. Distributed under 074 * the Terms of Use in http://www.unicode.org/copyright.html. 075 * 076 * Permission is hereby granted, free of charge, to any person obtaining a copy 077 * of the Unicode data files and any associated documentation (the "Data Files") 078 * or Unicode software and any associated documentation (the "Software") to deal 079 * in the Data Files or Software without restriction, including without limitation 080 * the rights to use, copy, modify, merge, publish, distribute, and/or sell copies 081 * of the Data Files or Software, and to permit persons to whom the Data Files 082 * or Software are furnished to do so, provided that (a) the above copyright 083 * notice(s) and this permission notice appear with all copies of the Data Files 084 * or Software, (b) both the above copyright notice(s) and this permission notice 085 * appear in associated documentation, and (c) there is clear notice in each 086 * modified Data File or in the Software as well as in the documentation associated 087 * with the Data File(s) or Software that the data or software has been modified. 088 * 089 * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 090 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 091 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. 092 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE 093 * LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY 094 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 095 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 096 * CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA FILES OR SOFTWARE. 097 * 098 * Except as contained in this notice, the name of a copyright holder shall not 099 * be used in advertising or otherwise to promote the sale, use or other dealings 100 * in these Data Files or Software without prior written authorization of the 101 * copyright holder. 102 */ 103package com.itextpdf.text.pdf; 104 105/** 106 * Reference implementation of the Unicode 3.0 Bidi algorithm. 107 * 108 * <p> 109 * This implementation is not optimized for performance. It is intended 110 * as a reference implementation that closely follows the specification 111 * of the Bidirectional Algorithm in The Unicode Standard version 3.0. 112 * <p> 113 * <b>Input:</b><br> 114 * There are two levels of input to the algorithm, since clients may prefer 115 * to supply some information from out-of-band sources rather than relying on 116 * the default behavior. 117 * <ol> 118 * <li>unicode type array 119 * <li>unicode type array, with externally supplied base line direction 120 * </ol> 121 * <p><b>Output:</b><br> 122 * Output is separated into several stages as well, to better enable clients 123 * to evaluate various aspects of implementation conformance. 124 * <ol> 125 * <li>levels array over entire paragraph 126 * <li>reordering array over entire paragraph 127 * <li>levels array over line 128 * <li>reordering array over line 129 * </ol> 130 * Note that for conformance, algorithms are only required to generate correct 131 * reordering and character directionality (odd or even levels) over a line. 132 * Generating identical level arrays over a line is not required. Bidi 133 * explicit format codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned 134 * arbitrary levels and positions as long as the other text matches. 135 * <p> 136 * As the algorithm is defined to operate on a single paragraph at a time, 137 * this implementation is written to handle single paragraphs. Thus 138 * rule P1 is presumed by this implementation-- the data provided to the 139 * implementation is assumed to be a single paragraph, and either contains no 140 * 'B' codes, or a single 'B' code at the end of the input. 'B' is allowed 141 * as input to illustrate how the algorithm assigns it a level. 142 * <p> 143 * Also note that rules L3 and L4 depend on the rendering engine that uses 144 * the result of the bidi algorithm. This implementation assumes that the 145 * rendering engine expects combining marks in visual order (e.g. to the 146 * left of their base character in RTL runs) and that it adjust the glyphs 147 * used to render mirrored characters that are in RTL runs so that they 148 * render appropriately. 149 * 150 * @author Doug Felt 151 */ 152 153import com.itextpdf.text.error_messages.MessageLocalization; 154 155public final class BidiOrder { 156 private byte[] initialTypes; 157 private byte[] embeddings; // generated from processing format codes 158 private byte paragraphEmbeddingLevel = -1; // undefined 159 160 private int textLength; // for convenience 161 private byte[] resultTypes; // for paragraph, not lines 162 private byte[] resultLevels; // for paragraph, not lines 163 164 // The bidi types 165 166 /** Left-to-right*/ 167 public static final byte L = 0; 168 169 /** Left-to-Right Embedding */ 170 public static final byte LRE = 1; 171 172 /** Left-to-Right Override */ 173 public static final byte LRO = 2; 174 175 /** Right-to-Left */ 176 public static final byte R = 3; 177 178 /** Right-to-Left Arabic */ 179 public static final byte AL = 4; 180 181 /** Right-to-Left Embedding */ 182 public static final byte RLE = 5; 183 184 /** Right-to-Left Override */ 185 public static final byte RLO = 6; 186 187 /** Pop Directional Format */ 188 public static final byte PDF = 7; 189 190 /** European Number */ 191 public static final byte EN = 8; 192 193 /** European Number Separator */ 194 public static final byte ES = 9; 195 196 /** European Number Terminator */ 197 public static final byte ET = 10; 198 199 /** Arabic Number */ 200 public static final byte AN = 11; 201 202 /** Common Number Separator */ 203 public static final byte CS = 12; 204 205 /** Non-Spacing Mark */ 206 public static final byte NSM = 13; 207 208 /** Boundary Neutral */ 209 public static final byte BN = 14; 210 211 /** Paragraph Separator */ 212 public static final byte B = 15; 213 214 /** Segment Separator */ 215 public static final byte S = 16; 216 217 /** Whitespace */ 218 public static final byte WS = 17; 219 220 /** Other Neutrals */ 221 public static final byte ON = 18; 222 223 /** Minimum bidi type value. */ 224 public static final byte TYPE_MIN = 0; 225 226 /** Maximum bidi type value. */ 227 public static final byte TYPE_MAX = 18; 228 229 // 230 // Input 231 // 232 233 /** 234 * Initialize using an array of direction types. Types range from TYPE_MIN to TYPE_MAX inclusive 235 * and represent the direction codes of the characters in the text. 236 * 237 * @param types the types array 238 */ 239 public BidiOrder(byte[] types) { 240 validateTypes(types); 241 242 this.initialTypes = (byte[])types.clone(); // client type array remains unchanged 243 244 runAlgorithm(); 245 } 246 247 /** 248 * Initialize using an array of direction types and an externally supplied paragraph embedding level. 249 * The embedding level may be -1, 0, or 1. -1 means to apply the default algorithm (rules P2 and P3), 250 * 0 is for LTR paragraphs, and 1 is for RTL paragraphs. 251 * 252 * @param types the types array 253 * @param paragraphEmbeddingLevel the externally supplied paragraph embedding level. 254 */ 255 public BidiOrder(byte[] types, byte paragraphEmbeddingLevel) { 256 validateTypes(types); 257 validateParagraphEmbeddingLevel(paragraphEmbeddingLevel); 258 259 this.initialTypes = (byte[])types.clone(); // client type array remains unchanged 260 this.paragraphEmbeddingLevel = paragraphEmbeddingLevel; 261 262 runAlgorithm(); 263 } 264 265 public BidiOrder(char text[], int offset, int length, byte paragraphEmbeddingLevel) { 266 initialTypes = new byte[length]; 267 for (int k = 0; k < length; ++k) { 268 initialTypes[k] = rtypes[text[offset + k]]; 269 } 270 validateParagraphEmbeddingLevel(paragraphEmbeddingLevel); 271 272 this.paragraphEmbeddingLevel = paragraphEmbeddingLevel; 273 274 runAlgorithm(); 275 } 276 277 public final static byte getDirection(char c) { 278 return rtypes[c]; 279 } 280 281 /** 282 * The algorithm. 283 * Does not include line-based processing (Rules L1, L2). 284 * These are applied later in the line-based phase of the algorithm. 285 */ 286 private void runAlgorithm() { 287 textLength = initialTypes.length; 288 289 // Initialize output types. 290 // Result types initialized to input types. 291 resultTypes = (byte[])initialTypes.clone(); 292 293 294 // 1) determining the paragraph level 295 // Rule P1 is the requirement for entering this algorithm. 296 // Rules P2, P3. 297 // If no externally supplied paragraph embedding level, use default. 298 if (paragraphEmbeddingLevel == -1) { 299 determineParagraphEmbeddingLevel(); 300 } 301 302 // Initialize result levels to paragraph embedding level. 303 resultLevels = new byte[textLength]; 304 setLevels(0, textLength, paragraphEmbeddingLevel); 305 306 // 2) Explicit levels and directions 307 // Rules X1-X8. 308 determineExplicitEmbeddingLevels(); 309 310 // Rule X9. 311 textLength = removeExplicitCodes(); 312 313 // Rule X10. 314 // Run remainder of algorithm one level run at a time 315 byte prevLevel = paragraphEmbeddingLevel; 316 int start = 0; 317 while (start < textLength) { 318 byte level = resultLevels[start]; 319 byte prevType = typeForLevel(Math.max(prevLevel, level)); 320 321 int limit = start + 1; 322 while (limit < textLength && resultLevels[limit] == level) { 323 ++limit; 324 } 325 326 byte succLevel = limit < textLength ? resultLevels[limit] : paragraphEmbeddingLevel; 327 byte succType = typeForLevel(Math.max(succLevel, level)); 328 329 // 3) resolving weak types 330 // Rules W1-W7. 331 resolveWeakTypes(start, limit, level, prevType, succType); 332 333 // 4) resolving neutral types 334 // Rules N1-N3. 335 resolveNeutralTypes(start, limit, level, prevType, succType); 336 337 // 5) resolving implicit embedding levels 338 // Rules I1, I2. 339 resolveImplicitLevels(start, limit, level, prevType, succType); 340 341 prevLevel = level; 342 start = limit; 343 } 344 345 // Reinsert explicit codes and assign appropriate levels to 'hide' them. 346 // This is for convenience, so the resulting level array maps 1-1 347 // with the initial array. 348 // See the implementation suggestions section of TR#9 for guidelines on 349 // how to implement the algorithm without removing and reinserting the codes. 350 textLength = reinsertExplicitCodes(textLength); 351 } 352 353 /** 354 * 1) determining the paragraph level. 355 * <p> 356 * Rules P2, P3. 357 * <p> 358 * At the end of this function, the member variable paragraphEmbeddingLevel is set to either 0 or 1. 359 */ 360 private void determineParagraphEmbeddingLevel() { 361 byte strongType = -1; // unknown 362 363 // Rule P2. 364 for (int i = 0; i < textLength; ++i) { 365 byte t = resultTypes[i]; 366 if (t == L || t == AL || t == R) { 367 strongType = t; 368 break; 369 } 370 } 371 372 // Rule P3. 373 if (strongType == -1) { // none found 374 // default embedding level when no strong types found is 0. 375 paragraphEmbeddingLevel = 0; 376 } else if (strongType == L) { 377 paragraphEmbeddingLevel = 0; 378 } else { // AL, R 379 paragraphEmbeddingLevel = 1; 380 } 381 } 382 383 /** 384 * Process embedding format codes. 385 * <p> 386 * Calls processEmbeddings to generate an embedding array from the explicit format codes. The 387 * embedding overrides in the array are then applied to the result types, and the result levels are 388 * initialized. 389 * @see #processEmbeddings 390 */ 391 private void determineExplicitEmbeddingLevels() { 392 embeddings = processEmbeddings(resultTypes, paragraphEmbeddingLevel); 393 394 for (int i = 0; i < textLength; ++i) { 395 byte level = embeddings[i]; 396 if ((level & 0x80) != 0) { 397 level &= 0x7f; 398 resultTypes[i] = typeForLevel(level); 399 } 400 resultLevels[i] = level; 401 } 402 } 403 404 /** 405 * Rules X9. 406 * Remove explicit codes so that they may be ignored during the remainder 407 * of the main portion of the algorithm. The length of the resulting text 408 * is returned. 409 * @return the length of the data excluding explicit codes and BN. 410 */ 411 private int removeExplicitCodes() { 412 int w = 0; 413 for (int i = 0; i < textLength; ++i) { 414 byte t = initialTypes[i]; 415 if (!(t == LRE || t == RLE || t == LRO || t == RLO || t == PDF || t == BN)) { 416 embeddings[w] = embeddings[i]; 417 resultTypes[w] = resultTypes[i]; 418 resultLevels[w] = resultLevels[i]; 419 w++; 420 } 421 } 422 return w; // new textLength while explicit levels are removed 423 } 424 425 /** 426 * Reinsert levels information for explicit codes. 427 * This is for ease of relating the level information 428 * to the original input data. Note that the levels 429 * assigned to these codes are arbitrary, they're 430 * chosen so as to avoid breaking level runs. 431 * @param textLength the length of the data after compression 432 * @return the length of the data (original length of 433 * types array supplied to constructor) 434 */ 435 private int reinsertExplicitCodes(int textLength) { 436 for (int i = initialTypes.length; --i >= 0;) { 437 byte t = initialTypes[i]; 438 if (t == LRE || t == RLE || t == LRO || t == RLO || t == PDF || t == BN) { 439 embeddings[i] = 0; 440 resultTypes[i] = t; 441 resultLevels[i] = -1; 442 } else { 443 --textLength; 444 embeddings[i] = embeddings[textLength]; 445 resultTypes[i] = resultTypes[textLength]; 446 resultLevels[i] = resultLevels[textLength]; 447 } 448 } 449 450 // now propagate forward the levels information (could have 451 // propagated backward, the main thing is not to introduce a level 452 // break where one doesn't already exist). 453 454 if (resultLevels[0] == -1) { 455 resultLevels[0] = paragraphEmbeddingLevel; 456 } 457 for (int i = 1; i < initialTypes.length; ++i) { 458 if (resultLevels[i] == -1) { 459 resultLevels[i] = resultLevels[i-1]; 460 } 461 } 462 463 // Embedding information is for informational purposes only 464 // so need not be adjusted. 465 466 return initialTypes.length; 467 } 468 469 /** 470 * 2) determining explicit levels 471 * Rules X1 - X8 472 * 473 * The interaction of these rules makes handling them a bit complex. 474 * This examines resultTypes but does not modify it. It returns embedding and 475 * override information in the result array. The low 7 bits are the level, the high 476 * bit is set if the level is an override, and clear if it is an embedding. 477 */ 478 private static byte[] processEmbeddings(byte[] resultTypes, byte paragraphEmbeddingLevel) { 479 final int EXPLICIT_LEVEL_LIMIT = 62; 480 481 int textLength = resultTypes.length; 482 byte[] embeddings = new byte[textLength]; 483 484 // This stack will store the embedding levels and override status in a single byte 485 // as described above. 486 byte[] embeddingValueStack = new byte[EXPLICIT_LEVEL_LIMIT]; 487 int stackCounter = 0; 488 489 // An LRE or LRO at level 60 is invalid, since the new level 62 is invalid. But 490 // an RLE at level 60 is valid, since the new level 61 is valid. The current wording 491 // of the rules requires that the RLE remain valid even if a previous LRE is invalid. 492 // This keeps track of ignored LRE or LRO codes at level 60, so that the matching PDFs 493 // will not try to pop the stack. 494 int overflowAlmostCounter = 0; 495 496 // This keeps track of ignored pushes at level 61 or higher, so that matching PDFs will 497 // not try to pop the stack. 498 int overflowCounter = 0; 499 500 // Rule X1. 501 502 // Keep the level separate from the value (level | override status flag) for ease of access. 503 byte currentEmbeddingLevel = paragraphEmbeddingLevel; 504 byte currentEmbeddingValue = paragraphEmbeddingLevel; 505 506 // Loop through types, handling all remaining rules 507 for (int i = 0; i < textLength; ++i) { 508 509 embeddings[i] = currentEmbeddingValue; 510 511 byte t = resultTypes[i]; 512 513 // Rules X2, X3, X4, X5 514 switch (t) { 515 case RLE: 516 case LRE: 517 case RLO: 518 case LRO: 519 // Only need to compute new level if current level is valid 520 if (overflowCounter == 0) { 521 byte newLevel; 522 if (t == RLE || t == RLO) { 523 newLevel = (byte)((currentEmbeddingLevel + 1) | 1); // least greater odd 524 } else { // t == LRE || t == LRO 525 newLevel = (byte)((currentEmbeddingLevel + 2) & ~1); // least greater even 526 } 527 528 // If the new level is valid, push old embedding level and override status 529 // No check for valid stack counter, since the level check suffices. 530 if (newLevel < EXPLICIT_LEVEL_LIMIT) { 531 embeddingValueStack[stackCounter] = currentEmbeddingValue; 532 stackCounter++; 533 534 currentEmbeddingLevel = newLevel; 535 if (t == LRO || t == RLO) { // override 536 currentEmbeddingValue = (byte)(newLevel | 0x80); 537 } else { 538 currentEmbeddingValue = newLevel; 539 } 540 541 // Adjust level of format mark (for expositional purposes only, this gets 542 // removed later). 543 embeddings[i] = currentEmbeddingValue; 544 break; 545 } 546 547 // Otherwise new level is invalid, but a valid level can still be achieved if this 548 // level is 60 and we encounter an RLE or RLO further on. So record that we 549 // 'almost' overflowed. 550 if (currentEmbeddingLevel == 60) { 551 overflowAlmostCounter++; 552 break; 553 } 554 } 555 556 // Otherwise old or new level is invalid. 557 overflowCounter++; 558 break; 559 560 case PDF: 561 // The only case where this did not actually overflow but may have almost overflowed 562 // is when there was an RLE or RLO on level 60, which would result in level 61. So we 563 // only test the almost overflow condition in that case. 564 // 565 // Also note that there may be a PDF without any pushes at all. 566 567 if (overflowCounter > 0) { 568 --overflowCounter; 569 } else if (overflowAlmostCounter > 0 && currentEmbeddingLevel != 61) { 570 --overflowAlmostCounter; 571 } else if (stackCounter > 0) { 572 --stackCounter; 573 currentEmbeddingValue = embeddingValueStack[stackCounter]; 574 currentEmbeddingLevel = (byte)(currentEmbeddingValue & 0x7f); 575 } 576 break; 577 578 case B: 579 // Rule X8. 580 581 // These values are reset for clarity, in this implementation B can only 582 // occur as the last code in the array. 583 stackCounter = 0; 584 overflowCounter = 0; 585 overflowAlmostCounter = 0; 586 currentEmbeddingLevel = paragraphEmbeddingLevel; 587 currentEmbeddingValue = paragraphEmbeddingLevel; 588 589 embeddings[i] = paragraphEmbeddingLevel; 590 break; 591 592 default: 593 break; 594 } 595 } 596 597 return embeddings; 598 } 599 600 601 /** 602 * 3) resolving weak types 603 * Rules W1-W7. 604 * 605 * Note that some weak types (EN, AN) remain after this processing is complete. 606 */ 607 private void resolveWeakTypes(int start, int limit, byte level, byte sor, byte eor) { 608 609 // Rule W1. 610 // Changes all NSMs. 611 byte preceedingCharacterType = sor; 612 for (int i = start; i < limit; ++i) { 613 byte t = resultTypes[i]; 614 if (t == NSM) { 615 resultTypes[i] = preceedingCharacterType; 616 } else { 617 preceedingCharacterType = t; 618 } 619 } 620 621 // Rule W2. 622 // EN does not change at the start of the run, because sor != AL. 623 for (int i = start; i < limit; ++i) { 624 if (resultTypes[i] == EN) { 625 for (int j = i - 1; j >= start; --j) { 626 byte t = resultTypes[j]; 627 if (t == L || t == R || t == AL) { 628 if (t == AL) { 629 resultTypes[i] = AN; 630 } 631 break; 632 } 633 } 634 } 635 } 636 637 // Rule W3. 638 for (int i = start; i < limit; ++i) { 639 if (resultTypes[i] == AL) { 640 resultTypes[i] = R; 641 } 642 } 643 644 // Rule W4. 645 // Since there must be values on both sides for this rule to have an 646 // effect, the scan skips the first and last value. 647 // 648 // Although the scan proceeds left to right, and changes the type values 649 // in a way that would appear to affect the computations later in the scan, 650 // there is actually no problem. A change in the current value can only 651 // affect the value to its immediate right, and only affect it if it is 652 // ES or CS. But the current value can only change if the value to its 653 // right is not ES or CS. Thus either the current value will not change, 654 // or its change will have no effect on the remainder of the analysis. 655 656 for (int i = start + 1; i < limit - 1; ++i) { 657 if (resultTypes[i] == ES || resultTypes[i] == CS) { 658 byte prevSepType = resultTypes[i-1]; 659 byte succSepType = resultTypes[i+1]; 660 if (prevSepType == EN && succSepType == EN) { 661 resultTypes[i] = EN; 662 } else if (resultTypes[i] == CS && prevSepType == AN && succSepType == AN) { 663 resultTypes[i] = AN; 664 } 665 } 666 } 667 668 // Rule W5. 669 for (int i = start; i < limit; ++i) { 670 if (resultTypes[i] == ET) { 671 // locate end of sequence 672 int runstart = i; 673 int runlimit = findRunLimit(runstart, limit, new byte[] { ET }); 674 675 // check values at ends of sequence 676 byte t = runstart == start ? sor : resultTypes[runstart - 1]; 677 678 if (t != EN) { 679 t = runlimit == limit ? eor : resultTypes[runlimit]; 680 } 681 682 if (t == EN) { 683 setTypes(runstart, runlimit, EN); 684 } 685 686 // continue at end of sequence 687 i = runlimit; 688 } 689 } 690 691 // Rule W6. 692 for (int i = start; i < limit; ++i) { 693 byte t = resultTypes[i]; 694 if (t == ES || t == ET || t == CS) { 695 resultTypes[i] = ON; 696 } 697 } 698 699 // Rule W7. 700 for (int i = start; i < limit; ++i) { 701 if (resultTypes[i] == EN) { 702 // set default if we reach start of run 703 byte prevStrongType = sor; 704 for (int j = i - 1; j >= start; --j) { 705 byte t = resultTypes[j]; 706 if (t == L || t == R) { // AL's have been removed 707 prevStrongType = t; 708 break; 709 } 710 } 711 if (prevStrongType == L) { 712 resultTypes[i] = L; 713 } 714 } 715 } 716 } 717 718 /** 719 * 6) resolving neutral types 720 * Rules N1-N2. 721 */ 722 private void resolveNeutralTypes(int start, int limit, byte level, byte sor, byte eor) { 723 724 for (int i = start; i < limit; ++i) { 725 byte t = resultTypes[i]; 726 if (t == WS || t == ON || t == B || t == S) { 727 // find bounds of run of neutrals 728 int runstart = i; 729 int runlimit = findRunLimit(runstart, limit, new byte[] {B, S, WS, ON}); 730 731 // determine effective types at ends of run 732 byte leadingType; 733 byte trailingType; 734 735 if (runstart == start) { 736 leadingType = sor; 737 } else { 738 leadingType = resultTypes[runstart - 1]; 739 if (leadingType == L || leadingType == R) { 740 // found the strong type 741 } else if (leadingType == AN) { 742 leadingType = R; 743 } else if (leadingType == EN) { 744 // Since EN's with previous strong L types have been changed 745 // to L in W7, the leadingType must be R. 746 leadingType = R; 747 } 748 } 749 750 if (runlimit == limit) { 751 trailingType = eor; 752 } else { 753 trailingType = resultTypes[runlimit]; 754 if (trailingType == L || trailingType == R) { 755 // found the strong type 756 } else if (trailingType == AN) { 757 trailingType = R; 758 } else if (trailingType == EN) { 759 trailingType = R; 760 } 761 } 762 763 byte resolvedType; 764 if (leadingType == trailingType) { 765 // Rule N1. 766 resolvedType = leadingType; 767 } else { 768 // Rule N2. 769 // Notice the embedding level of the run is used, not 770 // the paragraph embedding level. 771 resolvedType = typeForLevel(level); 772 } 773 774 setTypes(runstart, runlimit, resolvedType); 775 776 // skip over run of (former) neutrals 777 i = runlimit; 778 } 779 } 780 } 781 782 /** 783 * 7) resolving implicit embedding levels 784 * Rules I1, I2. 785 */ 786 private void resolveImplicitLevels(int start, int limit, byte level, byte sor, byte eor) { 787 if ((level & 1) == 0) { // even level 788 for (int i = start; i < limit; ++i) { 789 byte t = resultTypes[i]; 790 // Rule I1. 791 if (t == L ) { 792 // no change 793 } else if (t == R) { 794 resultLevels[i] += 1; 795 } else { // t == AN || t == EN 796 resultLevels[i] += 2; 797 } 798 } 799 } else { // odd level 800 for (int i = start; i < limit; ++i) { 801 byte t = resultTypes[i]; 802 // Rule I2. 803 if (t == R) { 804 // no change 805 } else { // t == L || t == AN || t == EN 806 resultLevels[i] += 1; 807 } 808 } 809 } 810 } 811 812 // 813 // Output 814 // 815 816 public byte[] getLevels() { 817 return getLevels(new int[]{textLength}); 818 } 819 820 /** 821 * Return levels array breaking lines at offsets in linebreaks. <br> 822 * Rule L1. 823 * <p> 824 * The returned levels array contains the resolved level for each 825 * bidi code passed to the constructor. 826 * <p> 827 * The linebreaks array must include at least one value. 828 * The values must be in strictly increasing order (no duplicates) 829 * between 1 and the length of the text, inclusive. The last value 830 * must be the length of the text. 831 * 832 * @param linebreaks the offsets at which to break the paragraph 833 * @return the resolved levels of the text 834 */ 835 public byte[] getLevels(int[] linebreaks) { 836 837 // Note that since the previous processing has removed all 838 // P, S, and WS values from resultTypes, the values referred to 839 // in these rules are the initial types, before any processing 840 // has been applied (including processing of overrides). 841 // 842 // This example implementation has reinserted explicit format codes 843 // and BN, in order that the levels array correspond to the 844 // initial text. Their final placement is not normative. 845 // These codes are treated like WS in this implementation, 846 // so they don't interrupt sequences of WS. 847 848 validateLineBreaks(linebreaks, textLength); 849 850 byte[] result = (byte[])resultLevels.clone(); // will be returned to caller 851 852 // don't worry about linebreaks since if there is a break within 853 // a series of WS values preceding S, the linebreak itself 854 // causes the reset. 855 for (int i = 0; i < result.length; ++i) { 856 byte t = initialTypes[i]; 857 if (t == B || t == S) { 858 // Rule L1, clauses one and two. 859 result[i] = paragraphEmbeddingLevel; 860 861 // Rule L1, clause three. 862 for (int j = i - 1; j >= 0; --j) { 863 if (isWhitespace(initialTypes[j])) { // including format codes 864 result[j] = paragraphEmbeddingLevel; 865 } else { 866 break; 867 } 868 } 869 } 870 } 871 872 // Rule L1, clause four. 873 int start = 0; 874 for (int i = 0; i < linebreaks.length; ++i) { 875 int limit = linebreaks[i]; 876 for (int j = limit - 1; j >= start; --j) { 877 if (isWhitespace(initialTypes[j])) { // including format codes 878 result[j] = paragraphEmbeddingLevel; 879 } else { 880 break; 881 } 882 } 883 884 start = limit; 885 } 886 887 return result; 888 } 889 890 /** 891 * Return reordering array breaking lines at offsets in linebreaks. 892 * <p> 893 * The reordering array maps from a visual index to a logical index. 894 * Lines are concatenated from left to right. So for example, the 895 * fifth character from the left on the third line is 896 * <pre> getReordering(linebreaks)[linebreaks[1] + 4]</pre> 897 * (linebreaks[1] is the position after the last character of the 898 * second line, which is also the index of the first character on the 899 * third line, and adding four gets the fifth character from the left). 900 * <p> 901 * The linebreaks array must include at least one value. 902 * The values must be in strictly increasing order (no duplicates) 903 * between 1 and the length of the text, inclusive. The last value 904 * must be the length of the text. 905 * 906 * @param linebreaks the offsets at which to break the paragraph. 907 */ 908 public int[] getReordering(int[] linebreaks) { 909 validateLineBreaks(linebreaks, textLength); 910 911 byte[] levels = getLevels(linebreaks); 912 913 return computeMultilineReordering(levels, linebreaks); 914 } 915 916 /** 917 * Return multiline reordering array for a given level array. 918 * Reordering does not occur across a line break. 919 */ 920 private static int[] computeMultilineReordering(byte[] levels, int[] linebreaks) { 921 int[] result = new int[levels.length]; 922 923 int start = 0; 924 for (int i = 0; i < linebreaks.length; ++i) { 925 int limit = linebreaks[i]; 926 927 byte[] templevels = new byte[limit - start]; 928 System.arraycopy(levels, start, templevels, 0, templevels.length); 929 930 int[] temporder = computeReordering(templevels); 931 for (int j = 0; j < temporder.length; ++j) { 932 result[start + j] = temporder[j] + start; 933 } 934 935 start = limit; 936 } 937 938 return result; 939 } 940 941 /** 942 * Return reordering array for a given level array. This reorders a single line. 943 * The reordering is a visual to logical map. For example, 944 * the leftmost char is string.charAt(order[0]). 945 * Rule L2. 946 */ 947 private static int[] computeReordering(byte[] levels) { 948 int lineLength = levels.length; 949 950 int[] result = new int[lineLength]; 951 952 // initialize order 953 for (int i = 0; i < lineLength; ++i) { 954 result[i] = i; 955 } 956 957 // locate highest level found on line. 958 // Note the rules say text, but no reordering across line bounds is performed, 959 // so this is sufficient. 960 byte highestLevel = 0; 961 byte lowestOddLevel = 63; 962 for (int i = 0; i < lineLength; ++i) { 963 byte level = levels[i]; 964 if (level > highestLevel) { 965 highestLevel = level; 966 } 967 if (((level & 1) != 0) && level < lowestOddLevel) { 968 lowestOddLevel = level; 969 } 970 } 971 972 for (int level = highestLevel; level >= lowestOddLevel; --level) { 973 for (int i = 0; i < lineLength; ++i) { 974 if (levels[i] >= level) { 975 // find range of text at or above this level 976 int start = i; 977 int limit = i + 1; 978 while (limit < lineLength && levels[limit] >= level) { 979 ++limit; 980 } 981 982 // reverse run 983 for (int j = start, k = limit - 1; j < k; ++j, --k) { 984 int temp = result[j]; 985 result[j] = result[k]; 986 result[k] = temp; 987 } 988 989 // skip to end of level run 990 i = limit; 991 } 992 } 993 } 994 995 return result; 996 } 997 998 /** 999 * Return the base level of the paragraph. 1000 */ 1001 public byte getBaseLevel() { 1002 return paragraphEmbeddingLevel; 1003 } 1004 1005 // --- internal utilities ------------------------------------------------- 1006 1007 /** 1008 * Return true if the type is considered a whitespace type for the line break rules. 1009 */ 1010 private static boolean isWhitespace(byte biditype) { 1011 switch (biditype) { 1012 case LRE: 1013 case RLE: 1014 case LRO: 1015 case RLO: 1016 case PDF: 1017 case BN: 1018 case WS: 1019 return true; 1020 default: 1021 return false; 1022 } 1023 } 1024 1025 /** 1026 * Return the strong type (L or R) corresponding to the level. 1027 */ 1028 private static byte typeForLevel(int level) { 1029 return ((level & 0x1) == 0) ? L : R; 1030 } 1031 1032 /** 1033 * Return the limit of the run starting at index that includes only resultTypes in validSet. 1034 * This checks the value at index, and will return index if that value is not in validSet. 1035 */ 1036 private int findRunLimit(int index, int limit, byte[] validSet) { 1037 --index; 1038 loop: 1039 while (++index < limit) { 1040 byte t = resultTypes[index]; 1041 for (int i = 0; i < validSet.length; ++i) { 1042 if (t == validSet[i]) { 1043 continue loop; 1044 } 1045 } 1046 // didn't find a match in validSet 1047 return index; 1048 } 1049 return limit; 1050 } 1051 1052 /** 1053 * Return the start of the run including index that includes only resultTypes in validSet. 1054 * This assumes the value at index is valid, and does not check it. 1055 */ 1056 private int findRunStart(int index, byte[] validSet) { 1057 loop: 1058 while (--index >= 0) { 1059 byte t = resultTypes[index]; 1060 for (int i = 0; i < validSet.length; ++i) { 1061 if (t == validSet[i]) { 1062 continue loop; 1063 } 1064 } 1065 return index + 1; 1066 } 1067 return 0; 1068 } 1069 1070 /** 1071 * Set resultTypes from start up to (but not including) limit to newType. 1072 */ 1073 private void setTypes(int start, int limit, byte newType) { 1074 for (int i = start; i < limit; ++i) { 1075 resultTypes[i] = newType; 1076 } 1077 } 1078 1079 /** 1080 * Set resultLevels from start up to (but not including) limit to newLevel. 1081 */ 1082 private void setLevels(int start, int limit, byte newLevel) { 1083 for (int i = start; i < limit; ++i) { 1084 resultLevels[i] = newLevel; 1085 } 1086 } 1087 1088 // --- input validation --------------------------------------------------- 1089 1090 /** 1091 * Throw exception if type array is invalid. 1092 */ 1093 private static void validateTypes(byte[] types) { 1094 if (types == null) { 1095 throw new IllegalArgumentException(MessageLocalization.getComposedMessage("types.is.null")); 1096 } 1097 for (int i = 0; i < types.length; ++i) { 1098 if (types[i] < TYPE_MIN || types[i] > TYPE_MAX) { 1099 throw new IllegalArgumentException(MessageLocalization.getComposedMessage("illegal.type.value.at.1.2", String.valueOf(i), String.valueOf(types[i]))); 1100 } 1101 } 1102 for (int i = 0; i < types.length - 1; ++i) { 1103 if (types[i] == B) { 1104 throw new IllegalArgumentException(MessageLocalization.getComposedMessage("b.type.before.end.of.paragraph.at.index.1", i)); 1105 } 1106 } 1107 } 1108 1109 /** 1110 * Throw exception if paragraph embedding level is invalid. Special allowance for -1 so that 1111 * default processing can still be performed when using this API. 1112 */ 1113 private static void validateParagraphEmbeddingLevel(byte paragraphEmbeddingLevel) { 1114 if (paragraphEmbeddingLevel != -1 && 1115 paragraphEmbeddingLevel != 0 && 1116 paragraphEmbeddingLevel != 1) { 1117 throw new IllegalArgumentException(MessageLocalization.getComposedMessage("illegal.paragraph.embedding.level.1", paragraphEmbeddingLevel)); 1118 } 1119 } 1120 1121 /** 1122 * Throw exception if line breaks array is invalid. 1123 */ 1124 private static void validateLineBreaks(int[] linebreaks, int textLength) { 1125 int prev = 0; 1126 for (int i = 0; i < linebreaks.length; ++i) { 1127 int next = linebreaks[i]; 1128 if (next <= prev) { 1129 throw new IllegalArgumentException(MessageLocalization.getComposedMessage("bad.linebreak.1.at.index.2", String.valueOf(next), String.valueOf(i))); 1130 } 1131 prev = next; 1132 } 1133 if (prev != textLength) { 1134 throw new IllegalArgumentException(MessageLocalization.getComposedMessage("last.linebreak.must.be.at.1", textLength)); 1135 } 1136 } 1137 1138 private static final byte rtypes[] = new byte[0x10000]; 1139 1140 private static char baseTypes[] = { 1141 0, 8, (char)BN, 9, 9, (char)S, 10, 10, (char)B, 11, 11, (char)S, 12, 12, (char)WS, 13, 13, (char)B, 1142 14, 27, (char)BN, 28, 30, (char)B, 31, 31, (char)S, 32, 32, (char)WS, 33, 34, (char)ON, 35, 37, (char)ET, 1143 38, 42, (char)ON, 43, 43, (char)ET, 44, 44, (char)CS, 45, 45, (char)ET, 46, 46, (char)CS, 47, 47, (char)ES, 1144 48, 57, (char)EN, 58, 58, (char)CS, 59, 64, (char)ON, 65, 90, (char)L, 91, 96, (char)ON, 97, 122, (char)L, 1145 123, 126, (char)ON, 127, 132, (char)BN, 133, 133, (char)B, 134, 159, (char)BN, 160, 160, (char)CS, 1146 161, 161, (char)ON, 162, 165, (char)ET, 166, 169, (char)ON, 170, 170, (char)L, 171, 175, (char)ON, 1147 176, 177, (char)ET, 178, 179, (char)EN, 180, 180, (char)ON, 181, 181, (char)L, 182, 184, (char)ON, 1148 185, 185, (char)EN, 186, 186, (char)L, 187, 191, (char)ON, 192, 214, (char)L, 215, 215, (char)ON, 1149 216, 246, (char)L, 247, 247, (char)ON, 248, 696, (char)L, 697, 698, (char)ON, 699, 705, (char)L, 1150 706, 719, (char)ON, 720, 721, (char)L, 722, 735, (char)ON, 736, 740, (char)L, 741, 749, (char)ON, 1151 750, 750, (char)L, 751, 767, (char)ON, 768, 855, (char)NSM, 856, 860, (char)L, 861, 879, (char)NSM, 1152 880, 883, (char)L, 884, 885, (char)ON, 886, 893, (char)L, 894, 894, (char)ON, 895, 899, (char)L, 1153 900, 901, (char)ON, 902, 902, (char)L, 903, 903, (char)ON, 904, 1013, (char)L, 1014, 1014, (char)ON, 1154 1015, 1154, (char)L, 1155, 1158, (char)NSM, 1159, 1159, (char)L, 1160, 1161, (char)NSM, 1155 1162, 1417, (char)L, 1418, 1418, (char)ON, 1419, 1424, (char)L, 1425, 1441, (char)NSM, 1156 1442, 1442, (char)L, 1443, 1465, (char)NSM, 1466, 1466, (char)L, 1467, 1469, (char)NSM, 1157 1470, 1470, (char)R, 1471, 1471, (char)NSM, 1472, 1472, (char)R, 1473, 1474, (char)NSM, 1158 1475, 1475, (char)R, 1476, 1476, (char)NSM, 1477, 1487, (char)L, 1488, 1514, (char)R, 1159 1515, 1519, (char)L, 1520, 1524, (char)R, 1525, 1535, (char)L, 1536, 1539, (char)AL, 1160 1540, 1547, (char)L, 1548, 1548, (char)CS, 1549, 1549, (char)AL, 1550, 1551, (char)ON, 1161 1552, 1557, (char)NSM, 1558, 1562, (char)L, 1563, 1563, (char)AL, 1564, 1566, (char)L, 1162 1567, 1567, (char)AL, 1568, 1568, (char)L, 1569, 1594, (char)AL, 1595, 1599, (char)L, 1163 1600, 1610, (char)AL, 1611, 1624, (char)NSM, 1625, 1631, (char)L, 1632, 1641, (char)AN, 1164 1642, 1642, (char)ET, 1643, 1644, (char)AN, 1645, 1647, (char)AL, 1648, 1648, (char)NSM, 1165 1649, 1749, (char)AL, 1750, 1756, (char)NSM, 1757, 1757, (char)AL, 1758, 1764, (char)NSM, 1166 1765, 1766, (char)AL, 1767, 1768, (char)NSM, 1769, 1769, (char)ON, 1770, 1773, (char)NSM, 1167 1774, 1775, (char)AL, 1776, 1785, (char)EN, 1786, 1805, (char)AL, 1806, 1806, (char)L, 1168 1807, 1807, (char)BN, 1808, 1808, (char)AL, 1809, 1809, (char)NSM, 1810, 1839, (char)AL, 1169 1840, 1866, (char)NSM, 1867, 1868, (char)L, 1869, 1871, (char)AL, 1872, 1919, (char)L, 1170 1920, 1957, (char)AL, 1958, 1968, (char)NSM, 1969, 1969, (char)AL, 1970, 2304, (char)L, 1171 2305, 2306, (char)NSM, 2307, 2363, (char)L, 2364, 2364, (char)NSM, 2365, 2368, (char)L, 1172 2369, 2376, (char)NSM, 2377, 2380, (char)L, 2381, 2381, (char)NSM, 2382, 2384, (char)L, 1173 2385, 2388, (char)NSM, 2389, 2401, (char)L, 2402, 2403, (char)NSM, 2404, 2432, (char)L, 1174 2433, 2433, (char)NSM, 2434, 2491, (char)L, 2492, 2492, (char)NSM, 2493, 2496, (char)L, 1175 2497, 2500, (char)NSM, 2501, 2508, (char)L, 2509, 2509, (char)NSM, 2510, 2529, (char)L, 1176 2530, 2531, (char)NSM, 2532, 2545, (char)L, 2546, 2547, (char)ET, 2548, 2560, (char)L, 1177 2561, 2562, (char)NSM, 2563, 2619, (char)L, 2620, 2620, (char)NSM, 2621, 2624, (char)L, 1178 2625, 2626, (char)NSM, 2627, 2630, (char)L, 2631, 2632, (char)NSM, 2633, 2634, (char)L, 1179 2635, 2637, (char)NSM, 2638, 2671, (char)L, 2672, 2673, (char)NSM, 2674, 2688, (char)L, 1180 2689, 2690, (char)NSM, 2691, 2747, (char)L, 2748, 2748, (char)NSM, 2749, 2752, (char)L, 1181 2753, 2757, (char)NSM, 2758, 2758, (char)L, 2759, 2760, (char)NSM, 2761, 2764, (char)L, 1182 2765, 2765, (char)NSM, 2766, 2785, (char)L, 2786, 2787, (char)NSM, 2788, 2800, (char)L, 1183 2801, 2801, (char)ET, 2802, 2816, (char)L, 2817, 2817, (char)NSM, 2818, 2875, (char)L, 1184 2876, 2876, (char)NSM, 2877, 2878, (char)L, 2879, 2879, (char)NSM, 2880, 2880, (char)L, 1185 2881, 2883, (char)NSM, 2884, 2892, (char)L, 2893, 2893, (char)NSM, 2894, 2901, (char)L, 1186 2902, 2902, (char)NSM, 2903, 2945, (char)L, 2946, 2946, (char)NSM, 2947, 3007, (char)L, 1187 3008, 3008, (char)NSM, 3009, 3020, (char)L, 3021, 3021, (char)NSM, 3022, 3058, (char)L, 1188 3059, 3064, (char)ON, 3065, 3065, (char)ET, 3066, 3066, (char)ON, 3067, 3133, (char)L, 1189 3134, 3136, (char)NSM, 3137, 3141, (char)L, 3142, 3144, (char)NSM, 3145, 3145, (char)L, 1190 3146, 3149, (char)NSM, 3150, 3156, (char)L, 3157, 3158, (char)NSM, 3159, 3259, (char)L, 1191 3260, 3260, (char)NSM, 3261, 3275, (char)L, 3276, 3277, (char)NSM, 3278, 3392, (char)L, 1192 3393, 3395, (char)NSM, 3396, 3404, (char)L, 3405, 3405, (char)NSM, 3406, 3529, (char)L, 1193 3530, 3530, (char)NSM, 3531, 3537, (char)L, 3538, 3540, (char)NSM, 3541, 3541, (char)L, 1194 3542, 3542, (char)NSM, 3543, 3632, (char)L, 3633, 3633, (char)NSM, 3634, 3635, (char)L, 1195 3636, 3642, (char)NSM, 3643, 3646, (char)L, 3647, 3647, (char)ET, 3648, 3654, (char)L, 1196 3655, 3662, (char)NSM, 3663, 3760, (char)L, 3761, 3761, (char)NSM, 3762, 3763, (char)L, 1197 3764, 3769, (char)NSM, 3770, 3770, (char)L, 3771, 3772, (char)NSM, 3773, 3783, (char)L, 1198 3784, 3789, (char)NSM, 3790, 3863, (char)L, 3864, 3865, (char)NSM, 3866, 3892, (char)L, 1199 3893, 3893, (char)NSM, 3894, 3894, (char)L, 3895, 3895, (char)NSM, 3896, 3896, (char)L, 1200 3897, 3897, (char)NSM, 3898, 3901, (char)ON, 3902, 3952, (char)L, 3953, 3966, (char)NSM, 1201 3967, 3967, (char)L, 3968, 3972, (char)NSM, 3973, 3973, (char)L, 3974, 3975, (char)NSM, 1202 3976, 3983, (char)L, 3984, 3991, (char)NSM, 3992, 3992, (char)L, 3993, 4028, (char)NSM, 1203 4029, 4037, (char)L, 4038, 4038, (char)NSM, 4039, 4140, (char)L, 4141, 4144, (char)NSM, 1204 4145, 4145, (char)L, 4146, 4146, (char)NSM, 4147, 4149, (char)L, 4150, 4151, (char)NSM, 1205 4152, 4152, (char)L, 4153, 4153, (char)NSM, 4154, 4183, (char)L, 4184, 4185, (char)NSM, 1206 4186, 5759, (char)L, 5760, 5760, (char)WS, 5761, 5786, (char)L, 5787, 5788, (char)ON, 1207 5789, 5905, (char)L, 5906, 5908, (char)NSM, 5909, 5937, (char)L, 5938, 5940, (char)NSM, 1208 5941, 5969, (char)L, 5970, 5971, (char)NSM, 5972, 6001, (char)L, 6002, 6003, (char)NSM, 1209 6004, 6070, (char)L, 6071, 6077, (char)NSM, 6078, 6085, (char)L, 6086, 6086, (char)NSM, 1210 6087, 6088, (char)L, 6089, 6099, (char)NSM, 6100, 6106, (char)L, 6107, 6107, (char)ET, 1211 6108, 6108, (char)L, 6109, 6109, (char)NSM, 6110, 6127, (char)L, 6128, 6137, (char)ON, 1212 6138, 6143, (char)L, 6144, 6154, (char)ON, 6155, 6157, (char)NSM, 6158, 6158, (char)WS, 1213 6159, 6312, (char)L, 6313, 6313, (char)NSM, 6314, 6431, (char)L, 6432, 6434, (char)NSM, 1214 6435, 6438, (char)L, 6439, 6443, (char)NSM, 6444, 6449, (char)L, 6450, 6450, (char)NSM, 1215 6451, 6456, (char)L, 6457, 6459, (char)NSM, 6460, 6463, (char)L, 6464, 6464, (char)ON, 1216 6465, 6467, (char)L, 6468, 6469, (char)ON, 6470, 6623, (char)L, 6624, 6655, (char)ON, 1217 6656, 8124, (char)L, 8125, 8125, (char)ON, 8126, 8126, (char)L, 8127, 8129, (char)ON, 1218 8130, 8140, (char)L, 8141, 8143, (char)ON, 8144, 8156, (char)L, 8157, 8159, (char)ON, 1219 8160, 8172, (char)L, 8173, 8175, (char)ON, 8176, 8188, (char)L, 8189, 8190, (char)ON, 1220 8191, 8191, (char)L, 8192, 8202, (char)WS, 8203, 8205, (char)BN, 8206, 8206, (char)L, 1221 8207, 8207, (char)R, 8208, 8231, (char)ON, 8232, 8232, (char)WS, 8233, 8233, (char)B, 1222 8234, 8234, (char)LRE, 8235, 8235, (char)RLE, 8236, 8236, (char)PDF, 8237, 8237, (char)LRO, 1223 8238, 8238, (char)RLO, 8239, 8239, (char)WS, 8240, 8244, (char)ET, 8245, 8276, (char)ON, 1224 8277, 8278, (char)L, 8279, 8279, (char)ON, 8280, 8286, (char)L, 8287, 8287, (char)WS, 1225 8288, 8291, (char)BN, 8292, 8297, (char)L, 8298, 8303, (char)BN, 8304, 8304, (char)EN, 1226 8305, 8307, (char)L, 8308, 8313, (char)EN, 8314, 8315, (char)ET, 8316, 8318, (char)ON, 1227 8319, 8319, (char)L, 8320, 8329, (char)EN, 8330, 8331, (char)ET, 8332, 8334, (char)ON, 1228 8335, 8351, (char)L, 8352, 8369, (char)ET, 8370, 8399, (char)L, 8400, 8426, (char)NSM, 1229 8427, 8447, (char)L, 8448, 8449, (char)ON, 8450, 8450, (char)L, 8451, 8454, (char)ON, 1230 8455, 8455, (char)L, 8456, 8457, (char)ON, 8458, 8467, (char)L, 8468, 8468, (char)ON, 1231 8469, 8469, (char)L, 8470, 8472, (char)ON, 8473, 8477, (char)L, 8478, 8483, (char)ON, 1232 8484, 8484, (char)L, 8485, 8485, (char)ON, 8486, 8486, (char)L, 8487, 8487, (char)ON, 1233 8488, 8488, (char)L, 8489, 8489, (char)ON, 8490, 8493, (char)L, 8494, 8494, (char)ET, 1234 8495, 8497, (char)L, 8498, 8498, (char)ON, 8499, 8505, (char)L, 8506, 8507, (char)ON, 1235 8508, 8511, (char)L, 8512, 8516, (char)ON, 8517, 8521, (char)L, 8522, 8523, (char)ON, 1236 8524, 8530, (char)L, 8531, 8543, (char)ON, 8544, 8591, (char)L, 8592, 8721, (char)ON, 1237 8722, 8723, (char)ET, 8724, 9013, (char)ON, 9014, 9082, (char)L, 9083, 9108, (char)ON, 1238 9109, 9109, (char)L, 9110, 9168, (char)ON, 9169, 9215, (char)L, 9216, 9254, (char)ON, 1239 9255, 9279, (char)L, 9280, 9290, (char)ON, 9291, 9311, (char)L, 9312, 9371, (char)EN, 1240 9372, 9449, (char)L, 9450, 9450, (char)EN, 9451, 9751, (char)ON, 9752, 9752, (char)L, 1241 9753, 9853, (char)ON, 9854, 9855, (char)L, 9856, 9873, (char)ON, 9874, 9887, (char)L, 1242 9888, 9889, (char)ON, 9890, 9984, (char)L, 9985, 9988, (char)ON, 9989, 9989, (char)L, 1243 9990, 9993, (char)ON, 9994, 9995, (char)L, 9996, 10023, (char)ON, 10024, 10024, (char)L, 1244 10025, 10059, (char)ON, 10060, 10060, (char)L, 10061, 10061, (char)ON, 10062, 10062, (char)L, 1245 10063, 10066, (char)ON, 10067, 10069, (char)L, 10070, 10070, (char)ON, 10071, 10071, (char)L, 1246 10072, 10078, (char)ON, 10079, 10080, (char)L, 10081, 10132, (char)ON, 10133, 10135, (char)L, 1247 10136, 10159, (char)ON, 10160, 10160, (char)L, 10161, 10174, (char)ON, 10175, 10191, (char)L, 1248 10192, 10219, (char)ON, 10220, 10223, (char)L, 10224, 11021, (char)ON, 11022, 11903, (char)L, 1249 11904, 11929, (char)ON, 11930, 11930, (char)L, 11931, 12019, (char)ON, 12020, 12031, (char)L, 1250 12032, 12245, (char)ON, 12246, 12271, (char)L, 12272, 12283, (char)ON, 12284, 12287, (char)L, 1251 12288, 12288, (char)WS, 12289, 12292, (char)ON, 12293, 12295, (char)L, 12296, 12320, (char)ON, 1252 12321, 12329, (char)L, 12330, 12335, (char)NSM, 12336, 12336, (char)ON, 12337, 12341, (char)L, 1253 12342, 12343, (char)ON, 12344, 12348, (char)L, 12349, 12351, (char)ON, 12352, 12440, (char)L, 1254 12441, 12442, (char)NSM, 12443, 12444, (char)ON, 12445, 12447, (char)L, 12448, 12448, (char)ON, 1255 12449, 12538, (char)L, 12539, 12539, (char)ON, 12540, 12828, (char)L, 12829, 12830, (char)ON, 1256 12831, 12879, (char)L, 12880, 12895, (char)ON, 12896, 12923, (char)L, 12924, 12925, (char)ON, 1257 12926, 12976, (char)L, 12977, 12991, (char)ON, 12992, 13003, (char)L, 13004, 13007, (char)ON, 1258 13008, 13174, (char)L, 13175, 13178, (char)ON, 13179, 13277, (char)L, 13278, 13279, (char)ON, 1259 13280, 13310, (char)L, 13311, 13311, (char)ON, 13312, 19903, (char)L, 19904, 19967, (char)ON, 1260 19968, 42127, (char)L, 42128, 42182, (char)ON, 42183, 64284, (char)L, 64285, 64285, (char)R, 1261 64286, 64286, (char)NSM, 64287, 64296, (char)R, 64297, 64297, (char)ET, 64298, 64310, (char)R, 1262 64311, 64311, (char)L, 64312, 64316, (char)R, 64317, 64317, (char)L, 64318, 64318, (char)R, 1263 64319, 64319, (char)L, 64320, 64321, (char)R, 64322, 64322, (char)L, 64323, 64324, (char)R, 1264 64325, 64325, (char)L, 64326, 64335, (char)R, 64336, 64433, (char)AL, 64434, 64466, (char)L, 1265 64467, 64829, (char)AL, 64830, 64831, (char)ON, 64832, 64847, (char)L, 64848, 64911, (char)AL, 1266 64912, 64913, (char)L, 64914, 64967, (char)AL, 64968, 65007, (char)L, 65008, 65020, (char)AL, 1267 65021, 65021, (char)ON, 65022, 65023, (char)L, 65024, 65039, (char)NSM, 65040, 65055, (char)L, 1268 65056, 65059, (char)NSM, 65060, 65071, (char)L, 65072, 65103, (char)ON, 65104, 65104, (char)CS, 1269 65105, 65105, (char)ON, 65106, 65106, (char)CS, 65107, 65107, (char)L, 65108, 65108, (char)ON, 1270 65109, 65109, (char)CS, 65110, 65118, (char)ON, 65119, 65119, (char)ET, 65120, 65121, (char)ON, 1271 65122, 65123, (char)ET, 65124, 65126, (char)ON, 65127, 65127, (char)L, 65128, 65128, (char)ON, 1272 65129, 65130, (char)ET, 65131, 65131, (char)ON, 65132, 65135, (char)L, 65136, 65140, (char)AL, 1273 65141, 65141, (char)L, 65142, 65276, (char)AL, 65277, 65278, (char)L, 65279, 65279, (char)BN, 1274 65280, 65280, (char)L, 65281, 65282, (char)ON, 65283, 65285, (char)ET, 65286, 65290, (char)ON, 1275 65291, 65291, (char)ET, 65292, 65292, (char)CS, 65293, 65293, (char)ET, 65294, 65294, (char)CS, 1276 65295, 65295, (char)ES, 65296, 65305, (char)EN, 65306, 65306, (char)CS, 65307, 65312, (char)ON, 1277 65313, 65338, (char)L, 65339, 65344, (char)ON, 65345, 65370, (char)L, 65371, 65381, (char)ON, 1278 65382, 65503, (char)L, 65504, 65505, (char)ET, 65506, 65508, (char)ON, 65509, 65510, (char)ET, 1279 65511, 65511, (char)L, 65512, 65518, (char)ON, 65519, 65528, (char)L, 65529, 65531, (char)BN, 1280 65532, 65533, (char)ON, 65534, 65535, (char)L}; 1281 1282 static { 1283 for (int k = 0; k < baseTypes.length; ++k) { 1284 int start = baseTypes[k]; 1285 int end = baseTypes[++k]; 1286 byte b = (byte)baseTypes[++k]; 1287 while (start <= end) 1288 rtypes[start++] = b; 1289 } 1290 } 1291}