001/* 002 * Copyright 2008 ZXing authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.itextpdf.text.pdf.qrcode; 018 019import java.io.UnsupportedEncodingException; 020import java.util.Map; 021import java.util.ArrayList; 022 023/** 024 * @author satorux@google.com (Satoru Takabayashi) - creator 025 * @author dswitkin@google.com (Daniel Switkin) - ported from C++ 026 * @since 5.0.2 027 */ 028public final class Encoder { 029 030 // The original table is defined in the table 5 of JISX0510:2004 (p.19). 031 private static final int[] ALPHANUMERIC_TABLE = { 032 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x00-0x0f 033 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x10-0x1f 034 36, -1, -1, -1, 37, 38, -1, -1, -1, -1, 39, 40, -1, 41, 42, 43, // 0x20-0x2f 035 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 44, -1, -1, -1, -1, -1, // 0x30-0x3f 036 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 0x40-0x4f 037 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, // 0x50-0x5f 038 }; 039 040 static final String DEFAULT_BYTE_MODE_ENCODING = "ISO-8859-1"; 041 042 private Encoder() { 043 } 044 045 // The mask penalty calculation is complicated. See Table 21 of JISX0510:2004 (p.45) for details. 046 // Basically it applies four rules and summate all penalties. 047 private static int calculateMaskPenalty(ByteMatrix matrix) { 048 int penalty = 0; 049 penalty += MaskUtil.applyMaskPenaltyRule1(matrix); 050 penalty += MaskUtil.applyMaskPenaltyRule2(matrix); 051 penalty += MaskUtil.applyMaskPenaltyRule3(matrix); 052 penalty += MaskUtil.applyMaskPenaltyRule4(matrix); 053 return penalty; 054 } 055 056 /** 057 * Encode "bytes" with the error correction level "ecLevel". The encoding mode will be chosen 058 * internally by chooseMode(). On success, store the result in "qrCode". 059 * 060 * We recommend you to use QRCode.EC_LEVEL_L (the lowest level) for 061 * "getECLevel" since our primary use is to show QR code on desktop screens. We don't need very 062 * strong error correction for this purpose. 063 * 064 * Note that there is no way to encode bytes in MODE_KANJI. We might want to add EncodeWithMode() 065 * with which clients can specify the encoding mode. For now, we don't need the functionality. 066 */ 067 public static void encode(String content, ErrorCorrectionLevel ecLevel, QRCode qrCode) 068 throws WriterException { 069 encode(content, ecLevel, null, qrCode); 070 } 071 072 public static void encode(String content, ErrorCorrectionLevel ecLevel, Map<EncodeHintType,Object> hints, 073 QRCode qrCode) throws WriterException { 074 075 String encoding = hints == null ? null : (String) hints.get(EncodeHintType.CHARACTER_SET); 076 if (encoding == null) { 077 encoding = DEFAULT_BYTE_MODE_ENCODING; 078 } 079 080 // Step 1: Choose the mode (encoding). 081 Mode mode = chooseMode(content, encoding); 082 083 // Step 2: Append "bytes" into "dataBits" in appropriate encoding. 084 BitVector dataBits = new BitVector(); 085 appendBytes(content, mode, dataBits, encoding); 086 // Step 3: Initialize QR code that can contain "dataBits". 087 int numInputBytes = dataBits.sizeInBytes(); 088 initQRCode(numInputBytes, ecLevel, mode, qrCode); 089 090 // Step 4: Build another bit vector that contains header and data. 091 BitVector headerAndDataBits = new BitVector(); 092 093 // Step 4.5: Append ECI message if applicable 094 if (mode == Mode.BYTE && !DEFAULT_BYTE_MODE_ENCODING.equals(encoding)) { 095 CharacterSetECI eci = CharacterSetECI.getCharacterSetECIByName(encoding); 096 if (eci != null) { 097 appendECI(eci, headerAndDataBits); 098 } 099 } 100 101 appendModeInfo(mode, headerAndDataBits); 102 103 int numLetters = mode.equals(Mode.BYTE) ? dataBits.sizeInBytes() : content.length(); 104 appendLengthInfo(numLetters, qrCode.getVersion(), mode, headerAndDataBits); 105 headerAndDataBits.appendBitVector(dataBits); 106 107 // Step 5: Terminate the bits properly. 108 terminateBits(qrCode.getNumDataBytes(), headerAndDataBits); 109 110 // Step 6: Interleave data bits with error correction code. 111 BitVector finalBits = new BitVector(); 112 interleaveWithECBytes(headerAndDataBits, qrCode.getNumTotalBytes(), qrCode.getNumDataBytes(), 113 qrCode.getNumRSBlocks(), finalBits); 114 115 // Step 7: Choose the mask pattern and set to "qrCode". 116 ByteMatrix matrix = new ByteMatrix(qrCode.getMatrixWidth(), qrCode.getMatrixWidth()); 117 qrCode.setMaskPattern(chooseMaskPattern(finalBits, qrCode.getECLevel(), qrCode.getVersion(), 118 matrix)); 119 120 // Step 8. Build the matrix and set it to "qrCode". 121 MatrixUtil.buildMatrix(finalBits, qrCode.getECLevel(), qrCode.getVersion(), 122 qrCode.getMaskPattern(), matrix); 123 qrCode.setMatrix(matrix); 124 // Step 9. Make sure we have a valid QR Code. 125 if (!qrCode.isValid()) { 126 throw new WriterException("Invalid QR code: " + qrCode.toString()); 127 } 128 } 129 130 /** 131 * @return the code point of the table used in alphanumeric mode or 132 * -1 if there is no corresponding code in the table. 133 */ 134 static int getAlphanumericCode(int code) { 135 if (code < ALPHANUMERIC_TABLE.length) { 136 return ALPHANUMERIC_TABLE[code]; 137 } 138 return -1; 139 } 140 141 public static Mode chooseMode(String content) { 142 return chooseMode(content, null); 143 } 144 145 /** 146 * Choose the best mode by examining the content. Note that 'encoding' is used as a hint; 147 * if it is Shift_JIS, and the input is only double-byte Kanji, then we return {@link Mode#KANJI}. 148 */ 149 public static Mode chooseMode(String content, String encoding) { 150 if ("Shift_JIS".equals(encoding)) { 151 // Choose Kanji mode if all input are double-byte characters 152 return isOnlyDoubleByteKanji(content) ? Mode.KANJI : Mode.BYTE; 153 } 154 boolean hasNumeric = false; 155 boolean hasAlphanumeric = false; 156 for (int i = 0; i < content.length(); ++i) { 157 char c = content.charAt(i); 158 if (c >= '0' && c <= '9') { 159 hasNumeric = true; 160 } else if (getAlphanumericCode(c) != -1) { 161 hasAlphanumeric = true; 162 } else { 163 return Mode.BYTE; 164 } 165 } 166 if (hasAlphanumeric) { 167 return Mode.ALPHANUMERIC; 168 } else if (hasNumeric) { 169 return Mode.NUMERIC; 170 } 171 return Mode.BYTE; 172 } 173 174 private static boolean isOnlyDoubleByteKanji(String content) { 175 byte[] bytes; 176 try { 177 bytes = content.getBytes("Shift_JIS"); 178 } catch (UnsupportedEncodingException uee) { 179 return false; 180 } 181 int length = bytes.length; 182 if (length % 2 != 0) { 183 return false; 184 } 185 for (int i = 0; i < length; i += 2) { 186 int byte1 = bytes[i] & 0xFF; 187 if ((byte1 < 0x81 || byte1 > 0x9F) && (byte1 < 0xE0 || byte1 > 0xEB)) { 188 return false; 189 } 190 } 191 return true; 192 } 193 194 private static int chooseMaskPattern(BitVector bits, ErrorCorrectionLevel ecLevel, int version, 195 ByteMatrix matrix) throws WriterException { 196 197 int minPenalty = Integer.MAX_VALUE; // Lower penalty is better. 198 int bestMaskPattern = -1; 199 // We try all mask patterns to choose the best one. 200 for (int maskPattern = 0; maskPattern < QRCode.NUM_MASK_PATTERNS; maskPattern++) { 201 MatrixUtil.buildMatrix(bits, ecLevel, version, maskPattern, matrix); 202 int penalty = calculateMaskPenalty(matrix); 203 if (penalty < minPenalty) { 204 minPenalty = penalty; 205 bestMaskPattern = maskPattern; 206 } 207 } 208 return bestMaskPattern; 209 } 210 211 /** 212 * Initialize "qrCode" according to "numInputBytes", "ecLevel", and "mode". On success, 213 * modify "qrCode". 214 */ 215 private static void initQRCode(int numInputBytes, ErrorCorrectionLevel ecLevel, Mode mode, 216 QRCode qrCode) throws WriterException { 217 qrCode.setECLevel(ecLevel); 218 qrCode.setMode(mode); 219 220 // In the following comments, we use numbers of Version 7-H. 221 for (int versionNum = 1; versionNum <= 40; versionNum++) { 222 Version version = Version.getVersionForNumber(versionNum); 223 // numBytes = 196 224 int numBytes = version.getTotalCodewords(); 225 // getNumECBytes = 130 226 Version.ECBlocks ecBlocks = version.getECBlocksForLevel(ecLevel); 227 int numEcBytes = ecBlocks.getTotalECCodewords(); 228 // getNumRSBlocks = 5 229 int numRSBlocks = ecBlocks.getNumBlocks(); 230 // getNumDataBytes = 196 - 130 = 66 231 int numDataBytes = numBytes - numEcBytes; 232 // We want to choose the smallest version which can contain data of "numInputBytes" + some 233 // extra bits for the header (mode info and length info). The header can be three bytes 234 // (precisely 4 + 16 bits) at most. Hence we do +3 here. 235 if (numDataBytes >= numInputBytes + 3) { 236 // Yay, we found the proper rs block info! 237 qrCode.setVersion(versionNum); 238 qrCode.setNumTotalBytes(numBytes); 239 qrCode.setNumDataBytes(numDataBytes); 240 qrCode.setNumRSBlocks(numRSBlocks); 241 // getNumECBytes = 196 - 66 = 130 242 qrCode.setNumECBytes(numEcBytes); 243 // matrix width = 21 + 6 * 4 = 45 244 qrCode.setMatrixWidth(version.getDimensionForVersion()); 245 return; 246 } 247 } 248 throw new WriterException("Cannot find proper rs block info (input data too big?)"); 249 } 250 251 /** 252 * Terminate bits as described in 8.4.8 and 8.4.9 of JISX0510:2004 (p.24). 253 */ 254 static void terminateBits(int numDataBytes, BitVector bits) throws WriterException { 255 int capacity = numDataBytes << 3; 256 if (bits.size() > capacity) { 257 throw new WriterException("data bits cannot fit in the QR Code" + bits.size() + " > " + 258 capacity); 259 } 260 // Append termination bits. See 8.4.8 of JISX0510:2004 (p.24) for details. 261 // TODO: srowen says we can remove this for loop, since the 4 terminator bits are optional if 262 // the last byte has less than 4 bits left. So it amounts to padding the last byte with zeroes 263 // either way. 264 for (int i = 0; i < 4 && bits.size() < capacity; ++i) { 265 bits.appendBit(0); 266 } 267 int numBitsInLastByte = bits.size() % 8; 268 // If the last byte isn't 8-bit aligned, we'll add padding bits. 269 if (numBitsInLastByte > 0) { 270 int numPaddingBits = 8 - numBitsInLastByte; 271 for (int i = 0; i < numPaddingBits; ++i) { 272 bits.appendBit(0); 273 } 274 } 275 // Should be 8-bit aligned here. 276 if (bits.size() % 8 != 0) { 277 throw new WriterException("Number of bits is not a multiple of 8"); 278 } 279 // If we have more space, we'll fill the space with padding patterns defined in 8.4.9 (p.24). 280 int numPaddingBytes = numDataBytes - bits.sizeInBytes(); 281 for (int i = 0; i < numPaddingBytes; ++i) { 282 if (i % 2 == 0) { 283 bits.appendBits(0xec, 8); 284 } else { 285 bits.appendBits(0x11, 8); 286 } 287 } 288 if (bits.size() != capacity) { 289 throw new WriterException("Bits size does not equal capacity"); 290 } 291 } 292 293 /** 294 * Get number of data bytes and number of error correction bytes for block id "blockID". Store 295 * the result in "numDataBytesInBlock", and "numECBytesInBlock". See table 12 in 8.5.1 of 296 * JISX0510:2004 (p.30) 297 */ 298 static void getNumDataBytesAndNumECBytesForBlockID(int numTotalBytes, int numDataBytes, 299 int numRSBlocks, int blockID, int[] numDataBytesInBlock, 300 int[] numECBytesInBlock) throws WriterException { 301 if (blockID >= numRSBlocks) { 302 throw new WriterException("Block ID too large"); 303 } 304 // numRsBlocksInGroup2 = 196 % 5 = 1 305 int numRsBlocksInGroup2 = numTotalBytes % numRSBlocks; 306 // numRsBlocksInGroup1 = 5 - 1 = 4 307 int numRsBlocksInGroup1 = numRSBlocks - numRsBlocksInGroup2; 308 // numTotalBytesInGroup1 = 196 / 5 = 39 309 int numTotalBytesInGroup1 = numTotalBytes / numRSBlocks; 310 // numTotalBytesInGroup2 = 39 + 1 = 40 311 int numTotalBytesInGroup2 = numTotalBytesInGroup1 + 1; 312 // numDataBytesInGroup1 = 66 / 5 = 13 313 int numDataBytesInGroup1 = numDataBytes / numRSBlocks; 314 // numDataBytesInGroup2 = 13 + 1 = 14 315 int numDataBytesInGroup2 = numDataBytesInGroup1 + 1; 316 // numEcBytesInGroup1 = 39 - 13 = 26 317 int numEcBytesInGroup1 = numTotalBytesInGroup1 - numDataBytesInGroup1; 318 // numEcBytesInGroup2 = 40 - 14 = 26 319 int numEcBytesInGroup2 = numTotalBytesInGroup2 - numDataBytesInGroup2; 320 // Sanity checks. 321 // 26 = 26 322 if (numEcBytesInGroup1 != numEcBytesInGroup2) { 323 throw new WriterException("EC bytes mismatch"); 324 } 325 // 5 = 4 + 1. 326 if (numRSBlocks != numRsBlocksInGroup1 + numRsBlocksInGroup2) { 327 throw new WriterException("RS blocks mismatch"); 328 } 329 // 196 = (13 + 26) * 4 + (14 + 26) * 1 330 if (numTotalBytes != 331 ((numDataBytesInGroup1 + numEcBytesInGroup1) * 332 numRsBlocksInGroup1) + 333 ((numDataBytesInGroup2 + numEcBytesInGroup2) * 334 numRsBlocksInGroup2)) { 335 throw new WriterException("Total bytes mismatch"); 336 } 337 338 if (blockID < numRsBlocksInGroup1) { 339 numDataBytesInBlock[0] = numDataBytesInGroup1; 340 numECBytesInBlock[0] = numEcBytesInGroup1; 341 } else { 342 numDataBytesInBlock[0] = numDataBytesInGroup2; 343 numECBytesInBlock[0] = numEcBytesInGroup2; 344 } 345 } 346 347 /** 348 * Interleave "bits" with corresponding error correction bytes. On success, store the result in 349 * "result". The interleave rule is complicated. See 8.6 of JISX0510:2004 (p.37) for details. 350 */ 351 static void interleaveWithECBytes(BitVector bits, int numTotalBytes, 352 int numDataBytes, int numRSBlocks, BitVector result) throws WriterException { 353 354 // "bits" must have "getNumDataBytes" bytes of data. 355 if (bits.sizeInBytes() != numDataBytes) { 356 throw new WriterException("Number of bits and data bytes does not match"); 357 } 358 359 // Step 1. Divide data bytes into blocks and generate error correction bytes for them. We'll 360 // store the divided data bytes blocks and error correction bytes blocks into "blocks". 361 int dataBytesOffset = 0; 362 int maxNumDataBytes = 0; 363 int maxNumEcBytes = 0; 364 365 // Since, we know the number of reedsolmon blocks, we can initialize the vector with the number. 366 ArrayList<BlockPair> blocks = new ArrayList<BlockPair>(numRSBlocks); 367 368 for (int i = 0; i < numRSBlocks; ++i) { 369 int[] numDataBytesInBlock = new int[1]; 370 int[] numEcBytesInBlock = new int[1]; 371 getNumDataBytesAndNumECBytesForBlockID( 372 numTotalBytes, numDataBytes, numRSBlocks, i, 373 numDataBytesInBlock, numEcBytesInBlock); 374 375 ByteArray dataBytes = new ByteArray(); 376 dataBytes.set(bits.getArray(), dataBytesOffset, numDataBytesInBlock[0]); 377 ByteArray ecBytes = generateECBytes(dataBytes, numEcBytesInBlock[0]); 378 blocks.add(new BlockPair(dataBytes, ecBytes)); 379 380 maxNumDataBytes = Math.max(maxNumDataBytes, dataBytes.size()); 381 maxNumEcBytes = Math.max(maxNumEcBytes, ecBytes.size()); 382 dataBytesOffset += numDataBytesInBlock[0]; 383 } 384 if (numDataBytes != dataBytesOffset) { 385 throw new WriterException("Data bytes does not match offset"); 386 } 387 388 // First, place data blocks. 389 for (int i = 0; i < maxNumDataBytes; ++i) { 390 for (int j = 0; j < blocks.size(); ++j) { 391 ByteArray dataBytes = blocks.get(j).getDataBytes(); 392 if (i < dataBytes.size()) { 393 result.appendBits(dataBytes.at(i), 8); 394 } 395 } 396 } 397 // Then, place error correction blocks. 398 for (int i = 0; i < maxNumEcBytes; ++i) { 399 for (int j = 0; j < blocks.size(); ++j) { 400 ByteArray ecBytes = blocks.get(j).getErrorCorrectionBytes(); 401 if (i < ecBytes.size()) { 402 result.appendBits(ecBytes.at(i), 8); 403 } 404 } 405 } 406 if (numTotalBytes != result.sizeInBytes()) { // Should be same. 407 throw new WriterException("Interleaving error: " + numTotalBytes + " and " + 408 result.sizeInBytes() + " differ."); 409 } 410 } 411 412 static ByteArray generateECBytes(ByteArray dataBytes, int numEcBytesInBlock) { 413 int numDataBytes = dataBytes.size(); 414 int[] toEncode = new int[numDataBytes + numEcBytesInBlock]; 415 for (int i = 0; i < numDataBytes; i++) { 416 toEncode[i] = dataBytes.at(i); 417 } 418 new ReedSolomonEncoder(GF256.QR_CODE_FIELD).encode(toEncode, numEcBytesInBlock); 419 420 ByteArray ecBytes = new ByteArray(numEcBytesInBlock); 421 for (int i = 0; i < numEcBytesInBlock; i++) { 422 ecBytes.set(i, toEncode[numDataBytes + i]); 423 } 424 return ecBytes; 425 } 426 427 /** 428 * Append mode info. On success, store the result in "bits". 429 */ 430 static void appendModeInfo(Mode mode, BitVector bits) { 431 bits.appendBits(mode.getBits(), 4); 432 } 433 434 435 /** 436 * Append length info. On success, store the result in "bits". 437 */ 438 static void appendLengthInfo(int numLetters, int version, Mode mode, BitVector bits) 439 throws WriterException { 440 int numBits = mode.getCharacterCountBits(Version.getVersionForNumber(version)); 441 if (numLetters > ((1 << numBits) - 1)) { 442 throw new WriterException(numLetters + "is bigger than" + ((1 << numBits) - 1)); 443 } 444 bits.appendBits(numLetters, numBits); 445 } 446 447 /** 448 * Append "bytes" in "mode" mode (encoding) into "bits". On success, store the result in "bits". 449 */ 450 static void appendBytes(String content, Mode mode, BitVector bits, String encoding) 451 throws WriterException { 452 if (mode.equals(Mode.NUMERIC)) { 453 appendNumericBytes(content, bits); 454 } else if (mode.equals(Mode.ALPHANUMERIC)) { 455 appendAlphanumericBytes(content, bits); 456 } else if (mode.equals(Mode.BYTE)) { 457 append8BitBytes(content, bits, encoding); 458 } else if (mode.equals(Mode.KANJI)) { 459 appendKanjiBytes(content, bits); 460 } else { 461 throw new WriterException("Invalid mode: " + mode); 462 } 463 } 464 465 static void appendNumericBytes(String content, BitVector bits) { 466 int length = content.length(); 467 int i = 0; 468 while (i < length) { 469 int num1 = content.charAt(i) - '0'; 470 if (i + 2 < length) { 471 // Encode three numeric letters in ten bits. 472 int num2 = content.charAt(i + 1) - '0'; 473 int num3 = content.charAt(i + 2) - '0'; 474 bits.appendBits(num1 * 100 + num2 * 10 + num3, 10); 475 i += 3; 476 } else if (i + 1 < length) { 477 // Encode two numeric letters in seven bits. 478 int num2 = content.charAt(i + 1) - '0'; 479 bits.appendBits(num1 * 10 + num2, 7); 480 i += 2; 481 } else { 482 // Encode one numeric letter in four bits. 483 bits.appendBits(num1, 4); 484 i++; 485 } 486 } 487 } 488 489 static void appendAlphanumericBytes(String content, BitVector bits) throws WriterException { 490 int length = content.length(); 491 int i = 0; 492 while (i < length) { 493 int code1 = getAlphanumericCode(content.charAt(i)); 494 if (code1 == -1) { 495 throw new WriterException(); 496 } 497 if (i + 1 < length) { 498 int code2 = getAlphanumericCode(content.charAt(i + 1)); 499 if (code2 == -1) { 500 throw new WriterException(); 501 } 502 // Encode two alphanumeric letters in 11 bits. 503 bits.appendBits(code1 * 45 + code2, 11); 504 i += 2; 505 } else { 506 // Encode one alphanumeric letter in six bits. 507 bits.appendBits(code1, 6); 508 i++; 509 } 510 } 511 } 512 513 static void append8BitBytes(String content, BitVector bits, String encoding) 514 throws WriterException { 515 byte[] bytes; 516 try { 517 bytes = content.getBytes(encoding); 518 } catch (UnsupportedEncodingException uee) { 519 throw new WriterException(uee.toString()); 520 } 521 for (int i = 0; i < bytes.length; ++i) { 522 bits.appendBits(bytes[i], 8); 523 } 524 } 525 526 static void appendKanjiBytes(String content, BitVector bits) throws WriterException { 527 byte[] bytes; 528 try { 529 bytes = content.getBytes("Shift_JIS"); 530 } catch (UnsupportedEncodingException uee) { 531 throw new WriterException(uee.toString()); 532 } 533 int length = bytes.length; 534 for (int i = 0; i < length; i += 2) { 535 int byte1 = bytes[i] & 0xFF; 536 int byte2 = bytes[i + 1] & 0xFF; 537 int code = (byte1 << 8) | byte2; 538 int subtracted = -1; 539 if (code >= 0x8140 && code <= 0x9ffc) { 540 subtracted = code - 0x8140; 541 } else if (code >= 0xe040 && code <= 0xebbf) { 542 subtracted = code - 0xc140; 543 } 544 if (subtracted == -1) { 545 throw new WriterException("Invalid byte sequence"); 546 } 547 int encoded = ((subtracted >> 8) * 0xc0) + (subtracted & 0xff); 548 bits.appendBits(encoded, 13); 549 } 550 } 551 552 private static void appendECI(CharacterSetECI eci, BitVector bits) { 553 bits.appendBits(Mode.ECI.getBits(), 4); 554 // This is correct for values up to 127, which is all we need now. 555 bits.appendBits(eci.getValue(), 8); 556 } 557 558}