001/*
002 * Copyright 2008 ZXing authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.itextpdf.text.pdf.qrcode;
018
019import java.io.UnsupportedEncodingException;
020import java.util.Map;
021import java.util.ArrayList;
022
023/**
024 * @author satorux@google.com (Satoru Takabayashi) - creator
025 * @author dswitkin@google.com (Daniel Switkin) - ported from C++
026 * @since 5.0.2
027 */
028public final class Encoder {
029
030  // The original table is defined in the table 5 of JISX0510:2004 (p.19).
031  private static final int[] ALPHANUMERIC_TABLE = {
032      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 0x00-0x0f
033      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 0x10-0x1f
034      36, -1, -1, -1, 37, 38, -1, -1, -1, -1, 39, 40, -1, 41, 42, 43,  // 0x20-0x2f
035      0,   1,  2,  3,  4,  5,  6,  7,  8,  9, 44, -1, -1, -1, -1, -1,  // 0x30-0x3f
036      -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,  // 0x40-0x4f
037      25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1,  // 0x50-0x5f
038  };
039
040  static final String DEFAULT_BYTE_MODE_ENCODING = "ISO-8859-1";
041
042  private Encoder() {
043  }
044
045  // The mask penalty calculation is complicated.  See Table 21 of JISX0510:2004 (p.45) for details.
046  // Basically it applies four rules and summate all penalties.
047  private static int calculateMaskPenalty(ByteMatrix matrix) {
048    int penalty = 0;
049    penalty += MaskUtil.applyMaskPenaltyRule1(matrix);
050    penalty += MaskUtil.applyMaskPenaltyRule2(matrix);
051    penalty += MaskUtil.applyMaskPenaltyRule3(matrix);
052    penalty += MaskUtil.applyMaskPenaltyRule4(matrix);
053    return penalty;
054  }
055
056  /**
057   *  Encode "bytes" with the error correction level "ecLevel". The encoding mode will be chosen
058   * internally by chooseMode(). On success, store the result in "qrCode".
059   *
060   * We recommend you to use QRCode.EC_LEVEL_L (the lowest level) for
061   * "getECLevel" since our primary use is to show QR code on desktop screens. We don't need very
062   * strong error correction for this purpose.
063   *
064   * Note that there is no way to encode bytes in MODE_KANJI. We might want to add EncodeWithMode()
065   * with which clients can specify the encoding mode. For now, we don't need the functionality.
066   */
067  public static void encode(String content, ErrorCorrectionLevel ecLevel, QRCode qrCode)
068      throws WriterException {
069    encode(content, ecLevel, null, qrCode);
070  }
071
072  public static void encode(String content, ErrorCorrectionLevel ecLevel, Map<EncodeHintType,Object> hints,
073      QRCode qrCode) throws WriterException {
074
075    String encoding = hints == null ? null : (String) hints.get(EncodeHintType.CHARACTER_SET);
076    if (encoding == null) {
077      encoding = DEFAULT_BYTE_MODE_ENCODING;
078    }
079
080    // Step 1: Choose the mode (encoding).
081    Mode mode = chooseMode(content, encoding);
082
083    // Step 2: Append "bytes" into "dataBits" in appropriate encoding.
084    BitVector dataBits = new BitVector();
085    appendBytes(content, mode, dataBits, encoding);
086    // Step 3: Initialize QR code that can contain "dataBits".
087    int numInputBytes = dataBits.sizeInBytes();
088    initQRCode(numInputBytes, ecLevel, mode, qrCode);
089
090    // Step 4: Build another bit vector that contains header and data.
091    BitVector headerAndDataBits = new BitVector();
092
093    // Step 4.5: Append ECI message if applicable
094    if (mode == Mode.BYTE && !DEFAULT_BYTE_MODE_ENCODING.equals(encoding)) {
095      CharacterSetECI eci = CharacterSetECI.getCharacterSetECIByName(encoding);
096      if (eci != null) {
097        appendECI(eci, headerAndDataBits);
098      }
099    }
100
101    appendModeInfo(mode, headerAndDataBits);
102
103    int numLetters = mode.equals(Mode.BYTE) ? dataBits.sizeInBytes() : content.length();
104    appendLengthInfo(numLetters, qrCode.getVersion(), mode, headerAndDataBits);
105    headerAndDataBits.appendBitVector(dataBits);
106
107    // Step 5: Terminate the bits properly.
108    terminateBits(qrCode.getNumDataBytes(), headerAndDataBits);
109
110    // Step 6: Interleave data bits with error correction code.
111    BitVector finalBits = new BitVector();
112    interleaveWithECBytes(headerAndDataBits, qrCode.getNumTotalBytes(), qrCode.getNumDataBytes(),
113        qrCode.getNumRSBlocks(), finalBits);
114
115    // Step 7: Choose the mask pattern and set to "qrCode".
116    ByteMatrix matrix = new ByteMatrix(qrCode.getMatrixWidth(), qrCode.getMatrixWidth());
117    qrCode.setMaskPattern(chooseMaskPattern(finalBits, qrCode.getECLevel(), qrCode.getVersion(),
118        matrix));
119
120    // Step 8.  Build the matrix and set it to "qrCode".
121    MatrixUtil.buildMatrix(finalBits, qrCode.getECLevel(), qrCode.getVersion(),
122        qrCode.getMaskPattern(), matrix);
123    qrCode.setMatrix(matrix);
124    // Step 9.  Make sure we have a valid QR Code.
125    if (!qrCode.isValid()) {
126      throw new WriterException("Invalid QR code: " + qrCode.toString());
127    }
128  }
129
130  /**
131   * @return the code point of the table used in alphanumeric mode or
132   *  -1 if there is no corresponding code in the table.
133   */
134  static int getAlphanumericCode(int code) {
135    if (code < ALPHANUMERIC_TABLE.length) {
136      return ALPHANUMERIC_TABLE[code];
137    }
138    return -1;
139  }
140
141  public static Mode chooseMode(String content) {
142    return chooseMode(content, null);
143  }
144
145  /**
146   * Choose the best mode by examining the content. Note that 'encoding' is used as a hint;
147   * if it is Shift_JIS, and the input is only double-byte Kanji, then we return {@link Mode#KANJI}.
148   */
149  public static Mode chooseMode(String content, String encoding) {
150    if ("Shift_JIS".equals(encoding)) {
151      // Choose Kanji mode if all input are double-byte characters
152      return isOnlyDoubleByteKanji(content) ? Mode.KANJI : Mode.BYTE;
153    }
154    boolean hasNumeric = false;
155    boolean hasAlphanumeric = false;
156    for (int i = 0; i < content.length(); ++i) {
157      char c = content.charAt(i);
158      if (c >= '0' && c <= '9') {
159        hasNumeric = true;
160      } else if (getAlphanumericCode(c) != -1) {
161        hasAlphanumeric = true;
162      } else {
163        return Mode.BYTE;
164      }
165    }
166    if (hasAlphanumeric) {
167      return Mode.ALPHANUMERIC;
168    } else if (hasNumeric) {
169      return Mode.NUMERIC;
170    }
171    return Mode.BYTE;
172  }
173
174  private static boolean isOnlyDoubleByteKanji(String content) {
175    byte[] bytes;
176    try {
177      bytes = content.getBytes("Shift_JIS");
178    } catch (UnsupportedEncodingException uee) {
179      return false;
180    }
181    int length = bytes.length;
182    if (length % 2 != 0) {
183      return false;
184    }
185    for (int i = 0; i < length; i += 2) {
186      int byte1 = bytes[i] & 0xFF;
187      if ((byte1 < 0x81 || byte1 > 0x9F) && (byte1 < 0xE0 || byte1 > 0xEB)) {
188        return false;
189      }
190    }
191    return true;
192  }
193
194  private static int chooseMaskPattern(BitVector bits, ErrorCorrectionLevel ecLevel, int version,
195      ByteMatrix matrix) throws WriterException {
196
197    int minPenalty = Integer.MAX_VALUE;  // Lower penalty is better.
198    int bestMaskPattern = -1;
199    // We try all mask patterns to choose the best one.
200    for (int maskPattern = 0; maskPattern < QRCode.NUM_MASK_PATTERNS; maskPattern++) {
201      MatrixUtil.buildMatrix(bits, ecLevel, version, maskPattern, matrix);
202      int penalty = calculateMaskPenalty(matrix);
203      if (penalty < minPenalty) {
204        minPenalty = penalty;
205        bestMaskPattern = maskPattern;
206      }
207    }
208    return bestMaskPattern;
209  }
210
211  /**
212   * Initialize "qrCode" according to "numInputBytes", "ecLevel", and "mode". On success,
213   * modify "qrCode".
214   */
215  private static void initQRCode(int numInputBytes, ErrorCorrectionLevel ecLevel, Mode mode,
216      QRCode qrCode) throws WriterException {
217    qrCode.setECLevel(ecLevel);
218    qrCode.setMode(mode);
219
220    // In the following comments, we use numbers of Version 7-H.
221    for (int versionNum = 1; versionNum <= 40; versionNum++) {
222      Version version = Version.getVersionForNumber(versionNum);
223      // numBytes = 196
224      int numBytes = version.getTotalCodewords();
225      // getNumECBytes = 130
226      Version.ECBlocks ecBlocks = version.getECBlocksForLevel(ecLevel);
227      int numEcBytes = ecBlocks.getTotalECCodewords();
228      // getNumRSBlocks = 5
229      int numRSBlocks = ecBlocks.getNumBlocks();
230      // getNumDataBytes = 196 - 130 = 66
231      int numDataBytes = numBytes - numEcBytes;
232      // We want to choose the smallest version which can contain data of "numInputBytes" + some
233      // extra bits for the header (mode info and length info). The header can be three bytes
234      // (precisely 4 + 16 bits) at most. Hence we do +3 here.
235      if (numDataBytes >= numInputBytes + 3) {
236        // Yay, we found the proper rs block info!
237        qrCode.setVersion(versionNum);
238        qrCode.setNumTotalBytes(numBytes);
239        qrCode.setNumDataBytes(numDataBytes);
240        qrCode.setNumRSBlocks(numRSBlocks);
241        // getNumECBytes = 196 - 66 = 130
242        qrCode.setNumECBytes(numEcBytes);
243        // matrix width = 21 + 6 * 4 = 45
244        qrCode.setMatrixWidth(version.getDimensionForVersion());
245        return;
246      }
247    }
248    throw new WriterException("Cannot find proper rs block info (input data too big?)");
249  }
250
251  /**
252   * Terminate bits as described in 8.4.8 and 8.4.9 of JISX0510:2004 (p.24).
253   */
254  static void terminateBits(int numDataBytes, BitVector bits) throws WriterException {
255    int capacity = numDataBytes << 3;
256    if (bits.size() > capacity) {
257      throw new WriterException("data bits cannot fit in the QR Code" + bits.size() + " > " +
258          capacity);
259    }
260    // Append termination bits. See 8.4.8 of JISX0510:2004 (p.24) for details.
261    // TODO: srowen says we can remove this for loop, since the 4 terminator bits are optional if
262    // the last byte has less than 4 bits left. So it amounts to padding the last byte with zeroes
263    // either way.
264    for (int i = 0; i < 4 && bits.size() < capacity; ++i) {
265      bits.appendBit(0);
266    }
267    int numBitsInLastByte = bits.size() % 8;
268    // If the last byte isn't 8-bit aligned, we'll add padding bits.
269    if (numBitsInLastByte > 0) {
270      int numPaddingBits = 8 - numBitsInLastByte;
271      for (int i = 0; i < numPaddingBits; ++i) {
272        bits.appendBit(0);
273      }
274    }
275    // Should be 8-bit aligned here.
276    if (bits.size() % 8 != 0) {
277      throw new WriterException("Number of bits is not a multiple of 8");
278    }
279    // If we have more space, we'll fill the space with padding patterns defined in 8.4.9 (p.24).
280    int numPaddingBytes = numDataBytes - bits.sizeInBytes();
281    for (int i = 0; i < numPaddingBytes; ++i) {
282      if (i % 2 == 0) {
283        bits.appendBits(0xec, 8);
284      } else {
285        bits.appendBits(0x11, 8);
286      }
287    }
288    if (bits.size() != capacity) {
289      throw new WriterException("Bits size does not equal capacity");
290    }
291  }
292
293  /**
294   * Get number of data bytes and number of error correction bytes for block id "blockID". Store
295   * the result in "numDataBytesInBlock", and "numECBytesInBlock". See table 12 in 8.5.1 of
296   * JISX0510:2004 (p.30)
297   */
298  static void getNumDataBytesAndNumECBytesForBlockID(int numTotalBytes, int numDataBytes,
299      int numRSBlocks, int blockID, int[] numDataBytesInBlock,
300      int[] numECBytesInBlock) throws WriterException {
301    if (blockID >= numRSBlocks) {
302      throw new WriterException("Block ID too large");
303    }
304    // numRsBlocksInGroup2 = 196 % 5 = 1
305    int numRsBlocksInGroup2 = numTotalBytes % numRSBlocks;
306    // numRsBlocksInGroup1 = 5 - 1 = 4
307    int numRsBlocksInGroup1 = numRSBlocks - numRsBlocksInGroup2;
308    // numTotalBytesInGroup1 = 196 / 5 = 39
309    int numTotalBytesInGroup1 = numTotalBytes / numRSBlocks;
310    // numTotalBytesInGroup2 = 39 + 1 = 40
311    int numTotalBytesInGroup2 = numTotalBytesInGroup1 + 1;
312    // numDataBytesInGroup1 = 66 / 5 = 13
313    int numDataBytesInGroup1 = numDataBytes / numRSBlocks;
314    // numDataBytesInGroup2 = 13 + 1 = 14
315    int numDataBytesInGroup2 = numDataBytesInGroup1 + 1;
316    // numEcBytesInGroup1 = 39 - 13 = 26
317    int numEcBytesInGroup1 = numTotalBytesInGroup1 - numDataBytesInGroup1;
318    // numEcBytesInGroup2 = 40 - 14 = 26
319    int numEcBytesInGroup2 = numTotalBytesInGroup2 - numDataBytesInGroup2;
320    // Sanity checks.
321    // 26 = 26
322    if (numEcBytesInGroup1 != numEcBytesInGroup2) {
323      throw new WriterException("EC bytes mismatch");
324    }
325    // 5 = 4 + 1.
326    if (numRSBlocks != numRsBlocksInGroup1 + numRsBlocksInGroup2) {
327      throw new WriterException("RS blocks mismatch");
328    }
329    // 196 = (13 + 26) * 4 + (14 + 26) * 1
330    if (numTotalBytes !=
331        ((numDataBytesInGroup1 + numEcBytesInGroup1) *
332            numRsBlocksInGroup1) +
333            ((numDataBytesInGroup2 + numEcBytesInGroup2) *
334                numRsBlocksInGroup2)) {
335      throw new WriterException("Total bytes mismatch");
336    }
337
338    if (blockID < numRsBlocksInGroup1) {
339      numDataBytesInBlock[0] = numDataBytesInGroup1;
340      numECBytesInBlock[0] = numEcBytesInGroup1;
341    } else {
342      numDataBytesInBlock[0] = numDataBytesInGroup2;
343      numECBytesInBlock[0] = numEcBytesInGroup2;
344    }
345  }
346
347  /**
348   * Interleave "bits" with corresponding error correction bytes. On success, store the result in
349   * "result". The interleave rule is complicated. See 8.6 of JISX0510:2004 (p.37) for details.
350   */
351  static void interleaveWithECBytes(BitVector bits, int numTotalBytes,
352      int numDataBytes, int numRSBlocks, BitVector result) throws WriterException {
353
354    // "bits" must have "getNumDataBytes" bytes of data.
355    if (bits.sizeInBytes() != numDataBytes) {
356      throw new WriterException("Number of bits and data bytes does not match");
357    }
358
359    // Step 1.  Divide data bytes into blocks and generate error correction bytes for them. We'll
360    // store the divided data bytes blocks and error correction bytes blocks into "blocks".
361    int dataBytesOffset = 0;
362    int maxNumDataBytes = 0;
363    int maxNumEcBytes = 0;
364
365    // Since, we know the number of reedsolmon blocks, we can initialize the vector with the number.
366    ArrayList<BlockPair> blocks = new ArrayList<BlockPair>(numRSBlocks);
367
368    for (int i = 0; i < numRSBlocks; ++i) {
369      int[] numDataBytesInBlock = new int[1];
370      int[] numEcBytesInBlock = new int[1];
371      getNumDataBytesAndNumECBytesForBlockID(
372          numTotalBytes, numDataBytes, numRSBlocks, i,
373          numDataBytesInBlock, numEcBytesInBlock);
374
375      ByteArray dataBytes = new ByteArray();
376      dataBytes.set(bits.getArray(), dataBytesOffset, numDataBytesInBlock[0]);
377      ByteArray ecBytes = generateECBytes(dataBytes, numEcBytesInBlock[0]);
378      blocks.add(new BlockPair(dataBytes, ecBytes));
379
380      maxNumDataBytes = Math.max(maxNumDataBytes, dataBytes.size());
381      maxNumEcBytes = Math.max(maxNumEcBytes, ecBytes.size());
382      dataBytesOffset += numDataBytesInBlock[0];
383    }
384    if (numDataBytes != dataBytesOffset) {
385      throw new WriterException("Data bytes does not match offset");
386    }
387
388    // First, place data blocks.
389    for (int i = 0; i < maxNumDataBytes; ++i) {
390      for (int j = 0; j < blocks.size(); ++j) {
391        ByteArray dataBytes = blocks.get(j).getDataBytes();
392        if (i < dataBytes.size()) {
393          result.appendBits(dataBytes.at(i), 8);
394        }
395      }
396    }
397    // Then, place error correction blocks.
398    for (int i = 0; i < maxNumEcBytes; ++i) {
399      for (int j = 0; j < blocks.size(); ++j) {
400        ByteArray ecBytes = blocks.get(j).getErrorCorrectionBytes();
401        if (i < ecBytes.size()) {
402          result.appendBits(ecBytes.at(i), 8);
403        }
404      }
405    }
406    if (numTotalBytes != result.sizeInBytes()) {  // Should be same.
407      throw new WriterException("Interleaving error: " + numTotalBytes + " and " +
408          result.sizeInBytes() + " differ.");
409    }
410  }
411
412  static ByteArray generateECBytes(ByteArray dataBytes, int numEcBytesInBlock) {
413    int numDataBytes = dataBytes.size();
414    int[] toEncode = new int[numDataBytes + numEcBytesInBlock];
415    for (int i = 0; i < numDataBytes; i++) {
416      toEncode[i] = dataBytes.at(i);
417    }
418    new ReedSolomonEncoder(GF256.QR_CODE_FIELD).encode(toEncode, numEcBytesInBlock);
419
420    ByteArray ecBytes = new ByteArray(numEcBytesInBlock);
421    for (int i = 0; i < numEcBytesInBlock; i++) {
422      ecBytes.set(i, toEncode[numDataBytes + i]);
423    }
424    return ecBytes;
425  }
426
427  /**
428   * Append mode info. On success, store the result in "bits".
429   */
430  static void appendModeInfo(Mode mode, BitVector bits) {
431    bits.appendBits(mode.getBits(), 4);
432  }
433
434
435  /**
436   * Append length info. On success, store the result in "bits".
437   */
438  static void appendLengthInfo(int numLetters, int version, Mode mode, BitVector bits)
439      throws WriterException {
440    int numBits = mode.getCharacterCountBits(Version.getVersionForNumber(version));
441    if (numLetters > ((1 << numBits) - 1)) {
442      throw new WriterException(numLetters + "is bigger than" + ((1 << numBits) - 1));
443    }
444    bits.appendBits(numLetters, numBits);
445  }
446
447  /**
448   * Append "bytes" in "mode" mode (encoding) into "bits". On success, store the result in "bits".
449   */
450  static void appendBytes(String content, Mode mode, BitVector bits, String encoding)
451      throws WriterException {
452    if (mode.equals(Mode.NUMERIC)) {
453      appendNumericBytes(content, bits);
454    } else if (mode.equals(Mode.ALPHANUMERIC)) {
455      appendAlphanumericBytes(content, bits);
456    } else if (mode.equals(Mode.BYTE)) {
457      append8BitBytes(content, bits, encoding);
458    } else if (mode.equals(Mode.KANJI)) {
459      appendKanjiBytes(content, bits);
460    } else {
461      throw new WriterException("Invalid mode: " + mode);
462    }
463  }
464
465  static void appendNumericBytes(String content, BitVector bits) {
466    int length = content.length();
467    int i = 0;
468    while (i < length) {
469      int num1 = content.charAt(i) - '0';
470      if (i + 2 < length) {
471        // Encode three numeric letters in ten bits.
472        int num2 = content.charAt(i + 1) - '0';
473        int num3 = content.charAt(i + 2) - '0';
474        bits.appendBits(num1 * 100 + num2 * 10 + num3, 10);
475        i += 3;
476      } else if (i + 1 < length) {
477        // Encode two numeric letters in seven bits.
478        int num2 = content.charAt(i + 1) - '0';
479        bits.appendBits(num1 * 10 + num2, 7);
480        i += 2;
481      } else {
482        // Encode one numeric letter in four bits.
483        bits.appendBits(num1, 4);
484        i++;
485      }
486    }
487  }
488
489  static void appendAlphanumericBytes(String content, BitVector bits) throws WriterException {
490    int length = content.length();
491    int i = 0;
492    while (i < length) {
493      int code1 = getAlphanumericCode(content.charAt(i));
494      if (code1 == -1) {
495        throw new WriterException();
496      }
497      if (i + 1 < length) {
498        int code2 = getAlphanumericCode(content.charAt(i + 1));
499        if (code2 == -1) {
500          throw new WriterException();
501        }
502        // Encode two alphanumeric letters in 11 bits.
503        bits.appendBits(code1 * 45 + code2, 11);
504        i += 2;
505      } else {
506        // Encode one alphanumeric letter in six bits.
507        bits.appendBits(code1, 6);
508        i++;
509      }
510    }
511  }
512
513  static void append8BitBytes(String content, BitVector bits, String encoding)
514      throws WriterException {
515    byte[] bytes;
516    try {
517      bytes = content.getBytes(encoding);
518    } catch (UnsupportedEncodingException uee) {
519      throw new WriterException(uee.toString());
520    }
521    for (int i = 0; i < bytes.length; ++i) {
522      bits.appendBits(bytes[i], 8);
523    }
524  }
525
526  static void appendKanjiBytes(String content, BitVector bits) throws WriterException {
527    byte[] bytes;
528    try {
529      bytes = content.getBytes("Shift_JIS");
530    } catch (UnsupportedEncodingException uee) {
531      throw new WriterException(uee.toString());
532    }
533    int length = bytes.length;
534    for (int i = 0; i < length; i += 2) {
535      int byte1 = bytes[i] & 0xFF;
536      int byte2 = bytes[i + 1] & 0xFF;
537      int code = (byte1 << 8) | byte2;
538      int subtracted = -1;
539      if (code >= 0x8140 && code <= 0x9ffc) {
540        subtracted = code - 0x8140;
541      } else if (code >= 0xe040 && code <= 0xebbf) {
542        subtracted = code - 0xc140;
543      }
544      if (subtracted == -1) {
545        throw new WriterException("Invalid byte sequence");
546      }
547      int encoded = ((subtracted >> 8) * 0xc0) + (subtracted & 0xff);
548      bits.appendBits(encoded, 13);
549    }
550  }
551
552  private static void appendECI(CharacterSetECI eci, BitVector bits) {
553    bits.appendBits(Mode.ECI.getBits(), 4);
554    // This is correct for values up to 127, which is all we need now.
555    bits.appendBits(eci.getValue(), 8);
556  }
557
558}