001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.net; 019 020import java.io.UnsupportedEncodingException; 021import java.nio.charset.Charset; 022 023import org.apache.commons.codec.DecoderException; 024import org.apache.commons.codec.EncoderException; 025import org.apache.commons.codec.binary.StringUtils; 026 027/** 028 * Implements methods common to all codecs defined in RFC 1522. 029 * <p> 030 * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the 031 * encoding of non-ASCII text in various portions of a RFC 822 [2] message header, in a manner which 032 * is unlikely to confuse existing message handling software. 033 * <p> 034 * This class is immutable and thread-safe. 035 * 036 * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: 037 * Message Header Extensions for Non-ASCII Text</a> 038 * 039 * @since 1.3 040 * @version $Id: RFC1522Codec.java 1429868 2013-01-07 16:08:05Z ggregory $ 041 */ 042abstract class RFC1522Codec { 043 044 /** Separator. */ 045 protected static final char SEP = '?'; 046 047 /** Prefix. */ 048 protected static final String POSTFIX = "?="; 049 050 /** Postfix. */ 051 protected static final String PREFIX = "=?"; 052 053 /** 054 * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset. 055 * <p> 056 * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes 057 * {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding. 058 * 059 * @param text 060 * a string to encode 061 * @param charset 062 * a charset to be used 063 * @return RFC 1522 compliant "encoded-word" 064 * @throws EncoderException 065 * thrown if there is an error condition during the Encoding process. 066 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 067 */ 068 protected String encodeText(final String text, final Charset charset) throws EncoderException { 069 if (text == null) { 070 return null; 071 } 072 final StringBuilder buffer = new StringBuilder(); 073 buffer.append(PREFIX); 074 buffer.append(charset); 075 buffer.append(SEP); 076 buffer.append(this.getEncoding()); 077 buffer.append(SEP); 078 final byte [] rawData = this.doEncoding(text.getBytes(charset)); 079 buffer.append(StringUtils.newStringUsAscii(rawData)); 080 buffer.append(POSTFIX); 081 return buffer.toString(); 082 } 083 084 /** 085 * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset. 086 * <p> 087 * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes 088 * {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding. 089 * 090 * @param text 091 * a string to encode 092 * @param charsetName 093 * the charset to use 094 * @return RFC 1522 compliant "encoded-word" 095 * @throws EncoderException 096 * thrown if there is an error condition during the Encoding process. 097 * @throws UnsupportedEncodingException 098 * if charset is not available 099 * 100 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 101 */ 102 protected String encodeText(final String text, final String charsetName) 103 throws EncoderException, UnsupportedEncodingException { 104 if (text == null) { 105 return null; 106 } 107 return this.encodeText(text, Charset.forName(charsetName)); 108 } 109 110 /** 111 * Applies an RFC 1522 compliant decoding scheme to the given string of text. 112 * <p> 113 * This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes 114 * {@link #doEncoding(byte [])} method of a concrete class to perform the specific decoding. 115 * 116 * @param text 117 * a string to decode 118 * @return A new decoded String or {@code null} if the input is {@code null}. 119 * @throws DecoderException 120 * thrown if there is an error condition during the decoding process. 121 * @throws UnsupportedEncodingException 122 * thrown if charset specified in the "encoded-word" header is not supported 123 */ 124 protected String decodeText(final String text) 125 throws DecoderException, UnsupportedEncodingException { 126 if (text == null) { 127 return null; 128 } 129 if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) { 130 throw new DecoderException("RFC 1522 violation: malformed encoded content"); 131 } 132 final int terminator = text.length() - 2; 133 int from = 2; 134 int to = text.indexOf(SEP, from); 135 if (to == terminator) { 136 throw new DecoderException("RFC 1522 violation: charset token not found"); 137 } 138 final String charset = text.substring(from, to); 139 if (charset.equals("")) { 140 throw new DecoderException("RFC 1522 violation: charset not specified"); 141 } 142 from = to + 1; 143 to = text.indexOf(SEP, from); 144 if (to == terminator) { 145 throw new DecoderException("RFC 1522 violation: encoding token not found"); 146 } 147 final String encoding = text.substring(from, to); 148 if (!getEncoding().equalsIgnoreCase(encoding)) { 149 throw new DecoderException("This codec cannot decode " + encoding + " encoded content"); 150 } 151 from = to + 1; 152 to = text.indexOf(SEP, from); 153 byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to)); 154 data = doDecoding(data); 155 return new String(data, charset); 156 } 157 158 /** 159 * Returns the codec name (referred to as encoding in the RFC 1522). 160 * 161 * @return name of the codec 162 */ 163 protected abstract String getEncoding(); 164 165 /** 166 * Encodes an array of bytes using the defined encoding scheme. 167 * 168 * @param bytes 169 * Data to be encoded 170 * @return A byte array containing the encoded data 171 * @throws EncoderException 172 * thrown if the Encoder encounters a failure condition during the encoding process. 173 */ 174 protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException; 175 176 /** 177 * Decodes an array of bytes using the defined encoding scheme. 178 * 179 * @param bytes 180 * Data to be decoded 181 * @return a byte array that contains decoded data 182 * @throws DecoderException 183 * A decoder exception is thrown if a Decoder encounters a failure condition during the decode process. 184 */ 185 protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException; 186}