001/*
002 * ====================================================================
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *   http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing,
014 * software distributed under the License is distributed on an
015 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
016 * KIND, either express or implied.  See the License for the
017 * specific language governing permissions and limitations
018 * under the License.
019 * ====================================================================
020 *
021 * This software consists of voluntary contributions made by many
022 * individuals on behalf of the Apache Software Foundation.  For more
023 * information on the Apache Software Foundation, please see
024 * <http://www.apache.org/>.
025 *
026 */
027
028package org.apache.http.message;
029
030import java.util.BitSet;
031
032import org.apache.http.annotation.ThreadingBehavior;
033import org.apache.http.annotation.Contract;
034import org.apache.http.util.CharArrayBuffer;
035
036/**
037 * Low level parser for header field elements. The parsing routines of this class are designed
038 * to produce near zero intermediate garbage and make no intermediate copies of input data.
039 * <p>
040 * This class is immutable and thread safe.
041 *
042 * @since 4.4
043 */
044@Contract(threading = ThreadingBehavior.IMMUTABLE)
045public class TokenParser {
046
047    public static BitSet INIT_BITSET(final int ... b) {
048        final BitSet bitset = new BitSet();
049        for (final int aB : b) {
050            bitset.set(aB);
051        }
052        return bitset;
053    }
054
055    /** US-ASCII CR, carriage return (13) */
056    public static final char CR = '\r';
057
058    /** US-ASCII LF, line feed (10) */
059    public static final char LF = '\n';
060
061    /** US-ASCII SP, space (32) */
062    public static final char SP = ' ';
063
064    /** US-ASCII HT, horizontal-tab (9) */
065    public static final char HT = '\t';
066
067    /** Double quote */
068    public static final char DQUOTE = '\"';
069
070    /** Backward slash / escape character */
071    public static final char ESCAPE = '\\';
072
073    public static boolean isWhitespace(final char ch) {
074        return ch == SP || ch == HT || ch == CR || ch == LF;
075    }
076
077    public static final TokenParser INSTANCE = new TokenParser();
078
079    /**
080     * Extracts from the sequence of chars a token terminated with any of the given delimiters
081     * discarding semantically insignificant whitespace characters.
082     *
083     * @param buf buffer with the sequence of chars to be parsed
084     * @param cursor defines the bounds and current position of the buffer
085     * @param delimiters set of delimiting characters. Can be {@code null} if the token
086     *  is not delimited by any character.
087     */
088    public String parseToken(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) {
089        final StringBuilder dst = new StringBuilder();
090        boolean whitespace = false;
091        while (!cursor.atEnd()) {
092            final char current = buf.charAt(cursor.getPos());
093            if (delimiters != null && delimiters.get(current)) {
094                break;
095            } else if (isWhitespace(current)) {
096                skipWhiteSpace(buf, cursor);
097                whitespace = true;
098            } else {
099                if (whitespace && dst.length() > 0) {
100                    dst.append(' ');
101                }
102                copyContent(buf, cursor, delimiters, dst);
103                whitespace = false;
104            }
105        }
106        return dst.toString();
107    }
108
109    /**
110     * Extracts from the sequence of chars a value which can be enclosed in quote marks and
111     * terminated with any of the given delimiters discarding semantically insignificant
112     * whitespace characters.
113     *
114     * @param buf buffer with the sequence of chars to be parsed
115     * @param cursor defines the bounds and current position of the buffer
116     * @param delimiters set of delimiting characters. Can be {@code null} if the value
117     *  is not delimited by any character.
118     */
119    public String parseValue(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) {
120        final StringBuilder dst = new StringBuilder();
121        boolean whitespace = false;
122        while (!cursor.atEnd()) {
123            final char current = buf.charAt(cursor.getPos());
124            if (delimiters != null && delimiters.get(current)) {
125                break;
126            } else if (isWhitespace(current)) {
127                skipWhiteSpace(buf, cursor);
128                whitespace = true;
129            } else if (current == DQUOTE) {
130                if (whitespace && dst.length() > 0) {
131                    dst.append(' ');
132                }
133                copyQuotedContent(buf, cursor, dst);
134                whitespace = false;
135            } else {
136                if (whitespace && dst.length() > 0) {
137                    dst.append(' ');
138                }
139                copyUnquotedContent(buf, cursor, delimiters, dst);
140                whitespace = false;
141            }
142        }
143        return dst.toString();
144    }
145
146    /**
147     * Skips semantically insignificant whitespace characters and moves the cursor to the closest
148     * non-whitespace character.
149     *
150     * @param buf buffer with the sequence of chars to be parsed
151     * @param cursor defines the bounds and current position of the buffer
152     */
153    public void skipWhiteSpace(final CharArrayBuffer buf, final ParserCursor cursor) {
154        int pos = cursor.getPos();
155        final int indexFrom = cursor.getPos();
156        final int indexTo = cursor.getUpperBound();
157        for (int i = indexFrom; i < indexTo; i++) {
158            final char current = buf.charAt(i);
159            if (!isWhitespace(current)) {
160                break;
161            } else {
162                pos++;
163            }
164        }
165        cursor.updatePos(pos);
166    }
167
168    /**
169     * Transfers content into the destination buffer until a whitespace character or any of
170     * the given delimiters is encountered.
171     *
172     * @param buf buffer with the sequence of chars to be parsed
173     * @param cursor defines the bounds and current position of the buffer
174     * @param delimiters set of delimiting characters. Can be {@code null} if the value
175     *  is delimited by a whitespace only.
176     * @param dst destination buffer
177     */
178    public void copyContent(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters,
179            final StringBuilder dst) {
180        int pos = cursor.getPos();
181        final int indexFrom = cursor.getPos();
182        final int indexTo = cursor.getUpperBound();
183        for (int i = indexFrom; i < indexTo; i++) {
184            final char current = buf.charAt(i);
185            if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) {
186                break;
187            } else {
188                pos++;
189                dst.append(current);
190            }
191        }
192        cursor.updatePos(pos);
193    }
194
195    /**
196     * Transfers content into the destination buffer until a whitespace character,  a quote,
197     * or any of the given delimiters is encountered.
198     *
199     * @param buf buffer with the sequence of chars to be parsed
200     * @param cursor defines the bounds and current position of the buffer
201     * @param delimiters set of delimiting characters. Can be {@code null} if the value
202     *  is delimited by a whitespace or a quote only.
203     * @param dst destination buffer
204     */
205    public void copyUnquotedContent(final CharArrayBuffer buf, final ParserCursor cursor,
206            final BitSet delimiters, final StringBuilder dst) {
207        int pos = cursor.getPos();
208        final int indexFrom = cursor.getPos();
209        final int indexTo = cursor.getUpperBound();
210        for (int i = indexFrom; i < indexTo; i++) {
211            final char current = buf.charAt(i);
212            if ((delimiters != null && delimiters.get(current))
213                    || isWhitespace(current) || current == DQUOTE) {
214                break;
215            } else {
216                pos++;
217                dst.append(current);
218            }
219        }
220        cursor.updatePos(pos);
221    }
222
223    /**
224     * Transfers content enclosed with quote marks into the destination buffer.
225     *
226     * @param buf buffer with the sequence of chars to be parsed
227     * @param cursor defines the bounds and current position of the buffer
228     * @param dst destination buffer
229     */
230    public void copyQuotedContent(final CharArrayBuffer buf, final ParserCursor cursor,
231            final StringBuilder dst) {
232        if (cursor.atEnd()) {
233            return;
234        }
235        int pos = cursor.getPos();
236        int indexFrom = cursor.getPos();
237        final int indexTo = cursor.getUpperBound();
238        char current = buf.charAt(pos);
239        if (current != DQUOTE) {
240            return;
241        }
242        pos++;
243        indexFrom++;
244        boolean escaped = false;
245        for (int i = indexFrom; i < indexTo; i++, pos++) {
246            current = buf.charAt(i);
247            if (escaped) {
248                if (current != DQUOTE && current != ESCAPE) {
249                    dst.append(ESCAPE);
250                }
251                dst.append(current);
252                escaped = false;
253            } else {
254                if (current == DQUOTE) {
255                    pos++;
256                    break;
257                }
258                if (current == ESCAPE) {
259                    escaped = true;
260                } else if (current != CR && current != LF) {
261                    dst.append(current);
262                }
263            }
264        }
265        cursor.updatePos(pos);
266    }
267
268}