001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.net;
019
020import java.io.UnsupportedEncodingException;
021import java.nio.charset.Charset;
022
023import org.apache.commons.codec.DecoderException;
024import org.apache.commons.codec.EncoderException;
025import org.apache.commons.codec.binary.StringUtils;
026
027/**
028 * Implements methods common to all codecs defined in RFC 1522.
029 * <p>
030 * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the
031 * encoding of non-ASCII text in various portions of a RFC 822 [2] message header, in a manner which
032 * is unlikely to confuse existing message handling software.
033 * <p>
034 * This class is immutable and thread-safe.
035 *
036 * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two:
037 *          Message Header Extensions for Non-ASCII Text</a>
038 *
039 * @since 1.3
040 * @version $Id: RFC1522Codec.java 1429868 2013-01-07 16:08:05Z ggregory $
041 */
042abstract class RFC1522Codec {
043
044    /** Separator. */
045    protected static final char SEP = '?';
046
047    /** Prefix. */
048    protected static final String POSTFIX = "?=";
049
050    /** Postfix. */
051    protected static final String PREFIX = "=?";
052
053    /**
054     * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
055     * <p>
056     * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
057     * {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding.
058     *
059     * @param text
060     *            a string to encode
061     * @param charset
062     *            a charset to be used
063     * @return RFC 1522 compliant "encoded-word"
064     * @throws EncoderException
065     *             thrown if there is an error condition during the Encoding process.
066     * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
067     */
068    protected String encodeText(final String text, final Charset charset) throws EncoderException {
069        if (text == null) {
070            return null;
071        }
072        final StringBuilder buffer = new StringBuilder();
073        buffer.append(PREFIX);
074        buffer.append(charset);
075        buffer.append(SEP);
076        buffer.append(this.getEncoding());
077        buffer.append(SEP);
078        final byte [] rawData = this.doEncoding(text.getBytes(charset));
079        buffer.append(StringUtils.newStringUsAscii(rawData));
080        buffer.append(POSTFIX);
081        return buffer.toString();
082    }
083
084    /**
085     * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
086     * <p>
087     * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
088     * {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding.
089     *
090     * @param text
091     *            a string to encode
092     * @param charsetName
093     *            the charset to use
094     * @return RFC 1522 compliant "encoded-word"
095     * @throws EncoderException
096     *             thrown if there is an error condition during the Encoding process.
097     * @throws UnsupportedEncodingException
098     *             if charset is not available
099     *
100     * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
101     */
102    protected String encodeText(final String text, final String charsetName)
103            throws EncoderException, UnsupportedEncodingException {
104        if (text == null) {
105            return null;
106        }
107        return this.encodeText(text, Charset.forName(charsetName));
108    }
109
110    /**
111     * Applies an RFC 1522 compliant decoding scheme to the given string of text.
112     * <p>
113     * This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes
114     * {@link #doEncoding(byte [])} method of a concrete class to perform the specific decoding.
115     *
116     * @param text
117     *            a string to decode
118     * @return A new decoded String or {@code null} if the input is {@code null}.
119     * @throws DecoderException
120     *             thrown if there is an error condition during the decoding process.
121     * @throws UnsupportedEncodingException
122     *             thrown if charset specified in the "encoded-word" header is not supported
123     */
124    protected String decodeText(final String text)
125            throws DecoderException, UnsupportedEncodingException {
126        if (text == null) {
127            return null;
128        }
129        if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) {
130            throw new DecoderException("RFC 1522 violation: malformed encoded content");
131        }
132        final int terminator = text.length() - 2;
133        int from = 2;
134        int to = text.indexOf(SEP, from);
135        if (to == terminator) {
136            throw new DecoderException("RFC 1522 violation: charset token not found");
137        }
138        final String charset = text.substring(from, to);
139        if (charset.equals("")) {
140            throw new DecoderException("RFC 1522 violation: charset not specified");
141        }
142        from = to + 1;
143        to = text.indexOf(SEP, from);
144        if (to == terminator) {
145            throw new DecoderException("RFC 1522 violation: encoding token not found");
146        }
147        final String encoding = text.substring(from, to);
148        if (!getEncoding().equalsIgnoreCase(encoding)) {
149            throw new DecoderException("This codec cannot decode " + encoding + " encoded content");
150        }
151        from = to + 1;
152        to = text.indexOf(SEP, from);
153        byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to));
154        data = doDecoding(data);
155        return new String(data, charset);
156    }
157
158    /**
159     * Returns the codec name (referred to as encoding in the RFC 1522).
160     *
161     * @return name of the codec
162     */
163    protected abstract String getEncoding();
164
165    /**
166     * Encodes an array of bytes using the defined encoding scheme.
167     *
168     * @param bytes
169     *            Data to be encoded
170     * @return A byte array containing the encoded data
171     * @throws EncoderException
172     *             thrown if the Encoder encounters a failure condition during the encoding process.
173     */
174    protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException;
175
176    /**
177     * Decodes an array of bytes using the defined encoding scheme.
178     *
179     * @param bytes
180     *            Data to be decoded
181     * @return a byte array that contains decoded data
182     * @throws DecoderException
183     *             A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.
184     */
185    protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException;
186}